# Freedreno
/src/gallium/drivers/freedreno/ @robclark
+# Imagination
+/include/drm-uapi/pvr_drm.h @CreativeCylon @frankbinns @rajnesh-kanwal
+/src/imagination/ @CreativeCylon @frankbinns @rajnesh-kanwal
+/src/imagination/rogue/ @simon-perretta-img
+
# Intel
/include/drm-uapi/i915_drm.h @kwg @llandwerlin @jekstrand @idr
/include/pci_ids/i*_pci_ids.h @kwg @llandwerlin @jekstrand @idr
with_virtio_vk = _vulkan_drivers.contains('virtio-experimental')
with_freedreno_kgsl = get_option('freedreno-kgsl')
with_broadcom_vk = _vulkan_drivers.contains('broadcom')
+with_imagination_vk = _vulkan_drivers.contains('imagination-experimental')
+with_imagination_srv = get_option('imagination-srv')
with_any_vk = _vulkan_drivers.length() != 0
with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk
'vulkan-drivers',
type : 'array',
value : ['auto'],
- choices : ['auto', 'amd', 'broadcom', 'freedreno', 'intel', 'panfrost', 'swrast', 'virtio-experimental'],
+ choices : ['auto', 'amd', 'broadcom', 'freedreno', 'imagination-experimental', 'intel', 'panfrost', 'swrast', 'virtio-experimental'],
description : 'List of vulkan drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
)
option(
description : 'use kgsl backend for freedreno vulkan driver',
)
option(
+ 'imagination-srv',
+ type : 'boolean',
+ value : false,
+ description : 'Enable Services backend for Imagination Technologies vulkan driver',
+)
+option(
'shader-cache',
type : 'combo',
value : 'auto',
--- /dev/null
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+Language: Cpp
+Standard: c++11
+
+UseCRLF: false
+ColumnLimit: 80
+
+DeriveLineEnding: false
+DerivePointerAlignment: false
+ExperimentalAutoDetectBinPacking: false
+
+DisableFormat: false
+
+########
+# Tabs #
+########
+UseTab: Never
+TabWidth: 3
+
+ConstructorInitializerIndentWidth: 6
+ContinuationIndentWidth: 3
+
+IndentWidth: 3
+#IndentCaseBlocks: true # Requires clang-11
+IndentCaseLabels: false
+#IndentExternBlock: NoIndent # Requires clang-11
+IndentGotoLabels: false
+IndentPPDirectives: AfterHash
+IndentWrappedFunctionNames: false
+AccessModifierOffset: -4 # -IndentWidth
+
+NamespaceIndentation: None
+
+##########
+# Braces #
+##########
+AlignAfterOpenBracket: Align
+AllowAllArgumentsOnNextLine: false
+AllowAllConstructorInitializersOnNextLine: false
+AllowAllParametersOfDeclarationOnNextLine: false
+BinPackArguments: false
+BinPackParameters: false
+
+Cpp11BracedListStyle: false
+
+########################
+# Whitespace Alignment #
+########################
+AlignConsecutiveAssignments: false
+#AlignConsecutiveBitFields: false # Requires clang-11
+AlignConsecutiveDeclarations: false
+AlignConsecutiveMacros: false
+AlignTrailingComments: false
+
+AlignEscapedNewlines: Left
+
+#AlignOperands: Align # Requires clang-11
+#BitFieldColonSpacing: Both # Requires clang-12
+
+PointerAlignment: Right
+#SpaceAroundPointerQualifiers: Both # Requires clang-12
+
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+#SpaceBeforeParens: ControlStatementsExceptForEachMacros # Requires clang-11
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceBeforeSquareBrackets: false
+SpaceInEmptyBlock: false
+SpaceInEmptyParentheses: false
+SpacesInAngles: false
+SpacesInCStyleCastParentheses: false
+SpacesInConditionalStatement: false
+SpacesInContainerLiterals: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+SpacesBeforeTrailingComments: 2
+
+############################
+# Multi-line constructions #
+############################
+AllowShortBlocksOnASingleLine: Empty
+AllowShortCaseLabelsOnASingleLine: false
+#AllowShortEnumsOnASingleLine: false # Requires clang-11
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLambdasOnASingleLine: All
+AllowShortLoopsOnASingleLine: false
+
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: Yes
+
+BreakBeforeBraces: Custom
+BraceWrapping:
+ AfterCaseLabel: false
+ AfterClass: false
+ AfterControlStatement: Never
+ AfterEnum: false
+ AfterFunction: true
+ AfterNamespace: true
+ AfterObjCDeclaration: false
+ AfterStruct: false
+ AfterUnion: false
+ BeforeCatch: false
+ BeforeElse: false
+# BeforeLambdaBody: false # Requires clang-11
+ IndentBraces: false
+ SplitEmptyFunction: true
+ SplitEmptyNamespace: true
+ SplitEmptyRecord: true
+
+BreakBeforeBinaryOperators: None
+BreakBeforeTernaryOperators: true
+
+BreakConstructorInitializers: AfterColon
+BreakInheritanceList: AfterColon
+
+BreakStringLiterals: false
+
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+
+#InsertTrailingCommas: Wrapped # Requires clang-11
+
+KeepEmptyLinesAtTheStartOfBlocks: false
+MaxEmptyLinesToKeep: 1
+
+SortUsingDeclarations: true
+
+############
+# Includes #
+############
+# TODO: Temporary config
+IncludeBlocks: Preserve
+SortIncludes: false
+# TODO: This requires additional work to clean up headers & includes first
+#IncludeBlocks: Regroup
+#SortIncludes: true
+#IncludeIsMainRegex: '(_test)?$'
+##IncludeIsMainSourceRegex: <default>
+#IncludeCategories:
+# - Regex: '^"'
+# Priority: 1
+
+############
+# Comments #
+############
+FixNamespaceComments: false
+
+#############
+# Penalties #
+#############
+# Taken from torvalds/kernel:.clang-format
+PenaltyBreakAssignment: 10
+PenaltyBreakBeforeFirstCallParameter: 30
+PenaltyBreakComment: 10
+PenaltyBreakFirstLessLess: 0
+PenaltyBreakString: 10
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 100
+PenaltyReturnTypeOnItsOwnLine: 60
+
+#######################
+# User-defined macros #
+#######################
+CommentPragmas: '^ IWYU pragma:'
+
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+
+#AttributeMacros: [] # Requires clang-12
+
+ForEachMacros: [
+ 'foreach_instr',
+ 'foreach_instr_safe',
+ 'hash_table_foreach',
+ 'LIST_FOR_EACH_ENTRY',
+ 'LIST_FOR_EACH_ENTRY_FROM',
+ 'LIST_FOR_EACH_ENTRY_FROM_REV',
+ 'LIST_FOR_EACH_ENTRY_SAFE',
+ 'LIST_FOR_EACH_ENTRY_SAFE_REV',
+ 'list_for_each_entry',
+ 'list_for_each_entry_from',
+ 'list_for_each_entry_from_rev',
+ 'list_for_each_entry_from_safe',
+ 'list_for_each_entry_rev',
+ 'list_for_each_entry_safe',
+ 'list_for_each_entry_safe_rev',
+ 'list_pair_for_each_entry',
+ 'pvr_csb_emit',
+ 'pvr_csb_emit_merge',
+ 'pvr_csb_pack',
+ 'nir_foreach_block',
+ 'nir_foreach_block_safe',
+ 'nir_foreach_function',
+ 'nir_foreach_instr',
+ 'nir_foreach_instr_safe',
+ 'nir_foreach_shader_in_variable',
+ 'nir_foreach_shader_out_variable',
+ 'nir_foreach_use',
+ 'nir_foreach_use_safe',
+ 'nir_foreach_variable_with_modes',
+ 'u_vector_foreach',
+ 'util_dynarray_foreach',
+ 'vk_foreach_struct',
+ 'vk_foreach_struct_const',
+# FIXME: vk_outarray_append doesn't fit here, remove
+# it when a better solution exists for it.
+ 'vk_outarray_append'
+]
+
+NamespaceMacros: [
+]
+
+StatementMacros: [
+]
+
+TypenameMacros: [
+]
+
+#WhitespaceSensitiveMacros: [] # Requires clang-11
--- /dev/null
+((nil . ((show-trailing-whitespace . t)))
+ (prog-mode
+ (indent-tabs-mode . nil)
+ (tab-width . 3)
+ (c-basic-offset . 3)
+ (c-file-style . "linux")
+ (fill-column . 80)
+ )
+ )
--- /dev/null
+[*.{c,h,cpp,hpp,cc,hh}]
+max_line_length = 80
--- /dev/null
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+libpowervr_common = static_library(
+ 'powervr_common',
+ [
+ 'pvr_device_info.c',
+ ],
+ include_directories : [
+ inc_include,
+ inc_src,
+ ],
+ c_args : [no_override_init_args],
+ gnu_symbol_visibility : 'hidden',
+)
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* TODO: This file is currently hand-maintained. However, the intention is to
+ * auto-generate it in the future based on the hwdefs.
+ */
+
+#include "assert.h"
+#include "errno.h"
+#include "pvr_device_info.h"
+
+const struct pvr_device_ident pvr_device_ident_4_V_2_51 = {
+ .device_id = 0x6250,
+ .series_name = "Rogue",
+ .public_name = "GX6250",
+};
+
+const struct pvr_device_features pvr_device_features_4_V_2_51 = {
+ .has_astc = true,
+ .has_cluster_grouping = true,
+ .has_common_store_size_in_dwords = true,
+ .has_compute = true,
+ .has_compute_morton_capable = true,
+ .has_compute_overlap = true,
+ .has_eight_output_registers = true,
+ .has_gs_rta_support = true,
+ .has_isp_max_tiles_in_flight = true,
+ .has_isp_samples_per_pixel = true,
+ .has_max_multisample = true,
+ .has_max_partitions = true,
+ .has_max_usc_tasks = true,
+ .has_num_clusters = true,
+ .has_num_raster_pipes = true,
+ .has_num_user_clip_planes = true,
+ .has_robust_buffer_access = true,
+ .has_slc_cache_line_size_bits = true,
+ .has_slc_mcu_cache_controls = true,
+ .has_tile_size_x = true,
+ .has_tile_size_y = true,
+ .has_tpu_array_textures = true,
+ .has_tpu_extended_integer_lookup = true,
+ .has_tpu_image_state_v2 = true,
+ .has_usc_f16sop_u8 = true,
+ .has_usc_min_output_registers_per_pix = true,
+ .has_uvs_banks = true,
+ .has_uvs_pba_entries = true,
+ .has_uvs_vtx_entries = true,
+ .has_vdm_cam_size = true,
+ .has_xt_top_infrastructure = true,
+ .has_zls_subtile = true,
+
+ .common_store_size_in_dwords = 1280U * 4U * 4U,
+ .isp_max_tiles_in_flight = 4U,
+ .isp_samples_per_pixel = 2U,
+ .max_multisample = 8U,
+ .max_partitions = 8U,
+ .max_usc_tasks = 56U,
+ .num_clusters = 2U,
+ .num_raster_pipes = 1U,
+ .num_user_clip_planes = 8U,
+ .slc_cache_line_size_bits = 512U,
+ .tile_size_x = 32U,
+ .tile_size_y = 32U,
+ .usc_min_output_registers_per_pix = 2U,
+ .uvs_banks = 8U,
+ .uvs_pba_entries = 320U,
+ .uvs_vtx_entries = 288U,
+ .vdm_cam_size = 256U,
+};
+
+const struct pvr_device_enhancements pvr_device_enhancements_4_40_2_51 = {
+ .has_ern35421 = true,
+ .has_ern38020 = true,
+ .has_ern38748 = true,
+ .has_ern42307 = true,
+};
+
+const struct pvr_device_quirks pvr_device_quirks_4_40_2_51 = {
+ .has_brn44079 = true,
+ .has_brn47727 = true,
+ .has_brn48492 = true,
+ .has_brn48545 = true,
+ .has_brn49032 = true,
+ .has_brn51210 = true,
+ .has_brn51764 = true,
+ .has_brn52354 = true,
+ .has_brn52942 = true,
+ .has_brn56279 = true,
+ .has_brn58839 = true,
+ .has_brn62269 = true,
+ .has_brn66011 = true,
+ .has_brn70165 = true,
+};
+
+const struct pvr_device_ident pvr_device_ident_33_V_11_3 = {
+ .device_id = 0x33011003,
+ .series_name = "A-Series",
+ .public_name = "AXE-1-16M",
+};
+
+const struct pvr_device_features pvr_device_features_33_V_11_3 = {
+ .has_common_store_size_in_dwords = true,
+ .has_compute = true,
+ .has_isp_max_tiles_in_flight = true,
+ .has_isp_samples_per_pixel = true,
+ .has_max_multisample = true,
+ .has_max_partitions = true,
+ .has_max_usc_tasks = true,
+ .has_num_clusters = true,
+ .has_num_raster_pipes = true,
+ .has_num_user_clip_planes = true,
+ .has_roguexe = true,
+ .has_screen_size8K = true,
+ .has_simple_internal_parameter_format = true,
+ .has_simple_internal_parameter_format_v2 = true,
+ .has_simple_parameter_format_version = true,
+ .has_slc_cache_line_size_bits = true,
+ .has_tile_size_x = true,
+ .has_tile_size_y = true,
+ .has_tile_size_16x16 = true,
+ .has_tpu_extended_integer_lookup = true,
+ .has_tpu_image_state_v2 = true,
+ .has_usc_f16sop_u8 = true,
+ .has_usc_min_output_registers_per_pix = true,
+ .has_usc_pixel_partition_mask = true,
+ .has_uvs_banks = true,
+ .has_uvs_pba_entries = true,
+ .has_uvs_vtx_entries = true,
+ .has_vdm_cam_size = true,
+
+ .common_store_size_in_dwords = 512U * 4U * 4U,
+ .isp_max_tiles_in_flight = 1U,
+ .isp_samples_per_pixel = 1U,
+ .max_multisample = 4U,
+ .max_partitions = 4U,
+ .max_usc_tasks = 24U,
+ .num_clusters = 1U,
+ .num_raster_pipes = 1U,
+ .num_user_clip_planes = 8U,
+ .simple_parameter_format_version = 2U,
+ .slc_cache_line_size_bits = 512U,
+ .tile_size_x = 16U,
+ .tile_size_y = 16U,
+ .usc_min_output_registers_per_pix = 1U,
+ .uvs_banks = 2U,
+ .uvs_pba_entries = 320U,
+ .uvs_vtx_entries = 288U,
+ .vdm_cam_size = 32U,
+
+ .has_s8xe = true,
+};
+
+const struct pvr_device_enhancements pvr_device_enhancements_33_15_11_3 = {
+ .has_ern35421 = true,
+ .has_ern38748 = true,
+ .has_ern42307 = true,
+ .has_ern45493 = true,
+};
+
+const struct pvr_device_quirks pvr_device_quirks_33_15_11_3 = {
+ .has_brn70165 = true,
+};
+
+const struct pvr_device_ident pvr_device_ident_36_V_104_796 = {
+ .device_id = 0x36104796,
+ .series_name = "B-Series",
+ .public_name = "BXS-4-64",
+};
+
+const struct pvr_device_features pvr_device_features_36_V_104_796 = {
+ .has_astc = true,
+ .has_common_store_size_in_dwords = true,
+ .has_compute = true,
+ .has_compute_overlap = true,
+ .has_gpu_multicore_support = true,
+ .has_gs_rta_support = true,
+ .has_isp_max_tiles_in_flight = true,
+ .has_isp_samples_per_pixel = true,
+ .has_max_multisample = true,
+ .has_max_partitions = true,
+ .has_max_usc_tasks = true,
+ .has_num_clusters = true,
+ .has_num_raster_pipes = true,
+ .has_num_user_clip_planes = true,
+ .has_paired_tiles = true,
+ .has_pds_ddmadt = true,
+ .has_roguexe = true,
+ .has_screen_size8K = true,
+ .has_simple_internal_parameter_format = true,
+ .has_simple_internal_parameter_format_v2 = true,
+ .has_simple_parameter_format_version = true,
+ .has_slc_cache_line_size_bits = true,
+ .has_tile_size_x = true,
+ .has_tile_size_y = true,
+ .has_tile_size_16x16 = true,
+ .has_tpu_extended_integer_lookup = true,
+ .has_tpu_image_state_v2 = true,
+ .has_usc_f16sop_u8 = true,
+ .has_usc_min_output_registers_per_pix = true,
+ .has_usc_pixel_partition_mask = true,
+ .has_uvs_banks = true,
+ .has_uvs_pba_entries = true,
+ .has_uvs_vtx_entries = true,
+ .has_vdm_cam_size = true,
+ .has_xpu_max_slaves = true,
+
+ .common_store_size_in_dwords = 1344U * 4U * 4U,
+ .isp_max_tiles_in_flight = 6U,
+ .isp_samples_per_pixel = 4U,
+ .max_multisample = 4U,
+ .max_partitions = 16U,
+ .max_usc_tasks = 156U,
+ .num_clusters = 1U,
+ .num_raster_pipes = 1U,
+ .num_user_clip_planes = 8U,
+ .simple_parameter_format_version = 2U,
+ .slc_cache_line_size_bits = 512U,
+ .tile_size_x = 16U,
+ .tile_size_y = 16U,
+ .usc_min_output_registers_per_pix = 2U,
+ .uvs_banks = 8U,
+ .uvs_pba_entries = 160U,
+ .uvs_vtx_entries = 144U,
+ .vdm_cam_size = 64U,
+ .xpu_max_slaves = 3U,
+
+ .has_s8xe = true,
+};
+
+const struct pvr_device_enhancements pvr_device_enhancements_36_53_104_796 = {
+ .has_ern35421 = true,
+ .has_ern38748 = true,
+ .has_ern42307 = true,
+ .has_ern45493 = true,
+};
+
+const struct pvr_device_quirks pvr_device_quirks_36_53_104_796 = {
+ .has_brn44079 = true,
+ .has_brn70165 = true,
+};
+
+/**
+ * Initialize PowerVR device information.
+ *
+ * \param info Device info structure to initialize.
+ * \param bvnc Packed BVNC.
+ * \return
+ * * 0 on success, or
+ * * -%ENODEV if the device is not supported.
+ */
+int pvr_device_info_init(struct pvr_device_info *info, uint64_t bvnc)
+{
+#define CASE_PACKED_BVNC_DEVICE_INFO(_b, _v, _n, _c) \
+ case PVR_BVNC_PACK(_b, _v, _n, _c): \
+ info->ident = pvr_device_ident_##_b##_V_##_n##_##_c; \
+ info->ident.b = _b; \
+ info->ident.n = _n; \
+ info->ident.v = _v; \
+ info->ident.c = _c; \
+ info->features = pvr_device_features_##_b##_V_##_n##_##_c; \
+ info->enhancements = pvr_device_enhancements_##_b##_##_v##_##_n##_##_c; \
+ info->quirks = pvr_device_quirks_##_b##_##_v##_##_n##_##_c; \
+ return 0
+
+ switch (bvnc) {
+ CASE_PACKED_BVNC_DEVICE_INFO(4, 40, 2, 51);
+ }
+
+#undef CASE_PACKED_BVNC_DEVICE_INFO
+
+ assert(!"Unsupported Device");
+
+ return -ENODEV;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DEVICE_INFO_H
+#define PVR_DEVICE_INFO_H
+
+/* TODO: This file is currently hand-maintained. However, the intention is to
+ * auto-generate it in the future based on the hwdefs.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "util/log.h"
+#include "util/macros.h"
+
+#define PVR_BVNC_PACK_SHIFT_B 48
+#define PVR_BVNC_PACK_SHIFT_V 32
+#define PVR_BVNC_PACK_SHIFT_N 16
+#define PVR_BVNC_PACK_SHIFT_C 0
+
+#define PVR_BVNC_PACK_MASK_B UINT64_C(0xFFFF000000000000)
+#define PVR_BVNC_PACK_MASK_V UINT64_C(0x0000FFFF00000000)
+#define PVR_BVNC_PACK_MASK_N UINT64_C(0x00000000FFFF0000)
+#define PVR_BVNC_PACK_MASK_C UINT64_C(0x000000000000FFFF)
+
+/**
+ * Packs B, V, N and C values into a 64-bit unsigned integer.
+ *
+ * The packed layout is as follows:
+ *
+ * \verbatim
+ * +--------+--------+--------+-------+
+ * | 63..48 | 47..32 | 31..16 | 15..0 |
+ * +========+========+========+=======+
+ * | B | V | N | C |
+ * +--------+--------+--------+-------+
+ * \endverbatim
+ *
+ * #pvr_get_packed_bvnc() should be used instead of this macro when a
+ * #pvr_device_information is available in order to ensure proper type checking.
+ *
+ * \param b Branch ID.
+ * \param v Version ID.
+ * \param n Number of scalable units.
+ * \param c Config ID.
+ * \return Packed BVNC.
+ *
+ * \sa #pvr_get_packed_bvnc(), #PVR_BVNC_UNPACK_B(), #PVR_BVNC_UNPACK_V(),
+ * #PVR_BVNC_UNPACK_N() and #PVR_BVNC_UNPACK_C()
+ */
+#define PVR_BVNC_PACK(b, v, n, c) \
+ ((((uint64_t)(b) << PVR_BVNC_PACK_SHIFT_B) & PVR_BVNC_PACK_MASK_B) | \
+ (((uint64_t)(v) << PVR_BVNC_PACK_SHIFT_V) & PVR_BVNC_PACK_MASK_V) | \
+ (((uint64_t)(n) << PVR_BVNC_PACK_SHIFT_N) & PVR_BVNC_PACK_MASK_N) | \
+ (((uint64_t)(c) << PVR_BVNC_PACK_SHIFT_C) & PVR_BVNC_PACK_MASK_C))
+
+/**
+ * Unpacks B value (branch ID) from packed BVNC.
+ *
+ * \param bvnc Packed BVNC.
+ * \return Branch ID.
+ *
+ * \sa #PVR_BVNC_UNPACK_V(), #PVR_BVNC_UNPACK_N(), #PVR_BVNC_UNPACK_C(),
+ * #pvr_get_packed_bvnc() and #PVR_BVNC_PACK()
+ */
+#define PVR_BVNC_UNPACK_B(bvnc) \
+ ((uint16_t)(((bvnc)&PVR_BVNC_PACK_MASK_B) >> PVR_BVNC_PACK_SHIFT_B))
+
+/**
+ * Unpacks V value (version ID) from packed BVNC.
+ *
+ * \param bvnc Packed BVNC.
+ * \return Version ID.
+ *
+ * \sa #PVR_BVNC_UNPACK_B(), #PVR_BVNC_UNPACK_N(), #PVR_BVNC_UNPACK_C(),
+ * #pvr_get_packed_bvnc() and #PVR_BVNC_PACK()
+ */
+#define PVR_BVNC_UNPACK_V(bvnc) \
+ ((uint16_t)(((bvnc)&PVR_BVNC_PACK_MASK_V) >> PVR_BVNC_PACK_SHIFT_V))
+
+/**
+ * Unpacks N value (number of scalable units) from packed BVNC.
+ *
+ * \param bvnc Packed BVNC.
+ * \return Number of scalable units.
+ *
+ * \sa #PVR_BVNC_UNPACK_B(), #PVR_BVNC_UNPACK_V(), #PVR_BVNC_UNPACK_C(),
+ * #pvr_get_packed_bvnc() and #PVR_BVNC_PACK()
+ */
+#define PVR_BVNC_UNPACK_N(bvnc) \
+ ((uint16_t)(((bvnc)&PVR_BVNC_PACK_MASK_N) >> PVR_BVNC_PACK_SHIFT_N))
+
+/**
+ * Unpacks C value (config ID) from packed BVNC.
+ *
+ * \param bvnc Packed BVNC.
+ * \return Config ID.
+ *
+ * \sa #PVR_BVNC_UNPACK_B(), #PVR_BVNC_UNPACK_V(), #PVR_BVNC_UNPACK_N(),
+ * #pvr_get_packed_bvnc() and #PVR_BVNC_PACK()
+ */
+#define PVR_BVNC_UNPACK_C(bvnc) \
+ ((uint16_t)(((bvnc)&PVR_BVNC_PACK_MASK_C) >> PVR_BVNC_PACK_SHIFT_C))
+
+/**
+ * Tests whether a physical device has a given feature.
+ *
+ * Feature names are derived from those found in #pvr_device_features by
+ * dropping the 'has_' prefix, which is applied by this macro.
+ *
+ * \param dev_info #pvr_device_info object associated with the target physical
+ * device.
+ * \param feature Device feature name.
+ *
+ * \return
+ * * true if the named feature is present in the hardware.
+ * * false if the named feature is not present in the hardware.
+ *
+ * \sa #PVR_FEATURE_VALUE() and #PVR_GET_FEATURE_VALUE()
+ */
+#define PVR_HAS_FEATURE(dev_info, feature) ((dev_info)->features.has_##feature)
+
+/**
+ * Gets a physical device feature value if feature is supported.
+ *
+ * Feature names are derived from those found in #pvr_device_features by
+ * dropping the 'has_' prefix.
+ *
+ * This macro should be used in preference to #PVR_GET_FEATURE_VALUE() as it has
+ * proper error handling.
+ *
+ * \param dev_info #pvr_device_info object associated with the target physical
+ * device.
+ * \param feature Feature name.
+ * \param value_out Feature value.
+ *
+ * \return
+ * * 0 on success, or
+ * * -%EINVAL if the named feature is not present in the hardware.
+ *
+ * \sa #PVR_HAS_FEATURE() and #PVR_GET_FEATURE_VALUE()
+ */
+#define PVR_FEATURE_VALUE(dev_info, feature, value_out) \
+ ({ \
+ const struct pvr_device_info *__dev_info = dev_info; \
+ int __ret = -EINVAL; \
+ if (__dev_info->features.has_##feature) { \
+ *(value_out) = __dev_info->features.feature; \
+ __ret = 0; \
+ } \
+ __ret; \
+ })
+
+/**
+ * Gets a physical device feature value if supported, but otherwise returns a
+ * default value.
+ *
+ * Feature names are derived from those found in #pvr_device_features by
+ * dropping the 'has_' prefix.
+ *
+ * #PVR_FEATURE_VALUE() should be used in preference to this macro when errors
+ * can be returned by the caller. This macro is intended for cases where errors
+ * can't be returned.
+ *
+ * \param dev_info #pvr_device_info object associated with the target
+ * physical device.
+ * \param feature Feature name.
+ * \param default_value Default feature value.
+ *
+ * \return Feature value.
+ *
+ * \sa #PVR_HAS_FEATURE() and #PVR_FEATURE_VALUE()
+ */
+#define PVR_GET_FEATURE_VALUE(dev_info, feature, default_value) \
+ ({ \
+ const struct pvr_device_info *__dev_info = dev_info; \
+ __typeof__(default_value) __ret = default_value; \
+ if (__dev_info->features.has_##feature) { \
+ __ret = __dev_info->features.feature; \
+ } else { \
+ mesa_logw("Missing " #feature \
+ " feature (defaulting to: " #default_value ")"); \
+ assert(0); \
+ } \
+ __ret; \
+ })
+
+/**
+ * Tests whether a physical device has a given enhancement.
+ *
+ * Enhancement numbers are derived from those found in #pvr_device_enhancements
+ * by dropping the 'has_ern' prefix, which is applied by this macro.
+ *
+ * \param dev_info #pvr_device_info object associated with the target physical
+ * device.
+ * \param number Enhancement number.
+ *
+ * \return
+ * * true if the enhancement is present in the hardware.
+ * * false if the enhancement is not present in the hardware.
+ */
+#define PVR_HAS_ERN(dev_info, number) ((dev_info)->enhancements.has_ern##number)
+
+/**
+ * Tests whether a physical device has a given quirk.
+ *
+ * Quirk numbers are derived from those found in #pvr_device_quirks by
+ * dropping the 'has_brn' prefix, which is applied by this macro.
+ *
+ * \param dev_info #pvr_device_info object associated with the target physical
+ * device.
+ * \param number Quirk number.
+ *
+ * \return
+ * * true if the quirk is present in the hardware.
+ * * false if the quirk is not present in the hardware.
+ */
+#define PVR_HAS_QUIRK(dev_info, number) ((dev_info)->quirks.has_brn##number)
+
+struct pvr_device_ident {
+ uint16_t b, v, n, c;
+ uint32_t device_id;
+ const char *series_name;
+ const char *public_name;
+};
+
+struct pvr_device_features {
+ bool has_astc : 1;
+ bool has_cluster_grouping : 1;
+ bool has_common_store_size_in_dwords : 1;
+ bool has_compute : 1;
+ bool has_compute_morton_capable : 1;
+ bool has_compute_overlap : 1;
+ bool has_eight_output_registers : 1;
+ bool has_gpu_multicore_support : 1;
+ bool has_gs_rta_support : 1;
+ bool has_isp_max_tiles_in_flight : 1;
+ bool has_isp_samples_per_pixel : 1;
+ bool has_max_multisample : 1;
+ bool has_max_partitions : 1;
+ bool has_max_usc_tasks : 1;
+ bool has_num_clusters : 1;
+ bool has_num_raster_pipes : 1;
+ bool has_num_user_clip_planes : 1;
+ bool has_paired_tiles : 1;
+ bool has_pds_ddmadt : 1;
+ bool has_robust_buffer_access : 1;
+ bool has_roguexe : 1;
+ bool has_screen_size8K : 1;
+ bool has_simple_internal_parameter_format : 1;
+ bool has_simple_internal_parameter_format_v2 : 1;
+ bool has_simple_parameter_format_version : 1;
+ bool has_slc_cache_line_size_bits : 1;
+ bool has_slc_mcu_cache_controls : 1;
+ bool has_tile_size_x : 1;
+ bool has_tile_size_y : 1;
+ bool has_tile_size_16x16 : 1;
+ bool has_tpu_array_textures : 1;
+ bool has_tpu_extended_integer_lookup : 1;
+ bool has_tpu_image_state_v2 : 1;
+ bool has_usc_f16sop_u8 : 1;
+ bool has_usc_min_output_registers_per_pix : 1;
+ bool has_usc_pixel_partition_mask : 1;
+ bool has_uvs_banks : 1;
+ bool has_uvs_pba_entries : 1;
+ bool has_uvs_vtx_entries : 1;
+ bool has_vdm_cam_size : 1;
+ bool has_xpu_max_slaves : 1;
+ bool has_xt_top_infrastructure : 1;
+ bool has_zls_subtile : 1;
+
+ uint32_t common_store_size_in_dwords;
+ uint32_t isp_max_tiles_in_flight;
+ uint32_t isp_samples_per_pixel;
+ uint32_t max_multisample;
+ uint32_t max_partitions;
+ uint32_t max_usc_tasks;
+ uint32_t num_clusters;
+ uint32_t num_raster_pipes;
+ uint32_t num_user_clip_planes;
+ uint32_t simple_parameter_format_version;
+ uint32_t slc_cache_line_size_bits;
+ uint32_t tile_size_x;
+ uint32_t tile_size_y;
+ uint32_t usc_min_output_registers_per_pix;
+ uint32_t uvs_banks;
+ uint32_t uvs_pba_entries;
+ uint32_t uvs_vtx_entries;
+ uint32_t vdm_cam_size;
+ uint32_t xpu_max_slaves;
+
+ /* Derived features. */
+ bool has_s8xe : 1;
+};
+
+struct pvr_device_enhancements {
+ bool has_ern35421 : 1;
+ bool has_ern38020 : 1;
+ bool has_ern38748 : 1;
+ bool has_ern42307 : 1;
+ bool has_ern45493 : 1;
+};
+
+struct pvr_device_quirks {
+ bool has_brn44079 : 1;
+ bool has_brn47727 : 1;
+ bool has_brn48492 : 1;
+ bool has_brn48545 : 1;
+ bool has_brn49032 : 1;
+ bool has_brn51210 : 1;
+ bool has_brn51764 : 1;
+ bool has_brn52354 : 1;
+ bool has_brn52942 : 1;
+ bool has_brn56279 : 1;
+ bool has_brn58839 : 1;
+ bool has_brn62269 : 1;
+ bool has_brn66011 : 1;
+ bool has_brn70165 : 1;
+};
+
+struct pvr_device_info {
+ struct pvr_device_ident ident;
+ struct pvr_device_features features;
+ struct pvr_device_enhancements enhancements;
+ struct pvr_device_quirks quirks;
+};
+
+/**
+ * Packs B, V, N and C values into a 64-bit unsigned integer.
+ *
+ * The packed layout is as follows:
+ *
+ * \verbatim
+ * +--------+--------+--------+-------+
+ * | 63..48 | 47..32 | 31..16 | 15..0 |
+ * +========+========+========+=======+
+ * | B | V | N | C |
+ * +--------+--------+--------+-------+
+ * \endverbatim
+ *
+ * This should be used in preference to #PVR_BVNC_PACK() when a
+ * #pvr_device_info is available in order to ensure proper type checking.
+ *
+ * \param dev_info Device information.
+ * \return Packed BVNC.
+ */
+static ALWAYS_INLINE uint64_t
+pvr_get_packed_bvnc(const struct pvr_device_info *dev_info)
+{
+ return PVR_BVNC_PACK(dev_info->ident.b,
+ dev_info->ident.v,
+ dev_info->ident.n,
+ dev_info->ident.c);
+}
+
+int pvr_device_info_init(struct pvr_device_info *info, uint64_t bvnc);
+
+#endif /* PVR_DEVICE_INFO_H */
--- /dev/null
+# encoding=utf-8
+
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# based on anv driver gen_pack_header.py which is:
+# Copyright © 2016 Intel Corporation
+
+# based on v3dv driver gen_pack_header.py which is:
+# Copyright (C) 2016 Broadcom
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import argparse
+import ast
+import xml.parsers.expat
+import re
+import sys
+import copy
+import os
+import textwrap
+
+license = """/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */"""
+
+pack_header = """%(license)s
+
+/* Enums, structures and pack functions for %(platform)s.
+ *
+ * This file has been generated, do not hand edit.
+ */
+
+#ifndef %(guard)s
+#define %(guard)s
+
+#include "csbgen/pvr_packet_helpers.h"
+
+"""
+
+def safe_name(name):
+ if not name[0].isalpha():
+ name = '_' + name
+
+ return name
+
+def num_from_str(num_str):
+ if num_str.lower().startswith('0x'):
+ return int(num_str, base=16)
+
+ if num_str.startswith('0') and len(num_str) > 1:
+ raise ValueError('Octal numbers not allowed')
+
+ return int(num_str)
+
+class Node:
+ def __init__(self, parent, name, name_is_safe = False):
+ self.parent = parent
+ if name_is_safe:
+ self.name = name
+ else:
+ self.name = safe_name(name)
+
+ @property
+ def full_name(self):
+ if self.name[0] == '_':
+ return self.parent.prefix + self.name.upper()
+
+ return self.parent.prefix + "_" + self.name.upper()
+
+ @property
+ def prefix(self):
+ return self.parent.prefix
+
+class Csbgen(Node):
+ def __init__(self, name, prefix, filename):
+ super().__init__(None, name.upper())
+ self.prefix_field = safe_name(prefix.upper())
+ self.filename = filename
+
+ self._defines = []
+ self._enums = {}
+ self._structs = {}
+
+ @property
+ def full_name(self):
+ return self.name + "_" + self.prefix_field
+
+ @property
+ def prefix(self):
+ return self.full_name
+
+ def add(self, element):
+ if isinstance(element, Enum):
+ if element.name in self._enums:
+ raise RuntimeError('Enum redefined. Enum: %s' % element.name)
+
+ self._enums[element.name] = element
+ elif isinstance(element, Struct):
+ if element.name in self._structs:
+ raise RuntimeError('Struct redefined. Struct: %s' % element.name)
+
+ self._structs[element.name] = element
+ elif isinstance(element, Define):
+ define_names = map(lambda d: d.full_name, self._defines)
+ if element.full_name in define_names:
+ raise RuntimeError('Define redefined. Define: %s' % element.full_name)
+
+ self._defines.append(element)
+ else:
+ raise RuntimeError('Element "%s" cannot be nested in csbgen.' %
+ type(element).__name__)
+
+ def _gen_guard(self):
+ return os.path.basename(self.filename).replace('.xml', '_h').upper()
+
+ def emit(self):
+ print(pack_header % {'license': license,
+ 'platform': self.name,
+ 'guard': self._gen_guard()})
+
+ for define in self._defines:
+ define.emit(self)
+
+ print()
+
+ for enum in self._enums.values():
+ enum.emit(self)
+
+ for struct in self._structs.values():
+ struct.emit(self)
+
+ print('#endif /* %s */' % self._gen_guard())
+
+ def is_known_struct(self, struct_name):
+ return struct_name in self._structs.keys()
+
+ def is_known_enum(self, enum_name):
+ return enum_name in self._enums.keys()
+
+ def get_enum(self, enum_name):
+ return self._enums[enum_name]
+
+class Enum(Node):
+ def __init__(self, parent, name):
+ super().__init__(parent, name)
+
+ self._values = {}
+
+ self.parent.add(self)
+
+ # We override prefix so that the values will contain the enum's name too.
+ @property
+ def prefix(self):
+ return self.full_name
+
+ def get_value(self, value_name):
+ return self._values[value_name]
+
+ def add(self, element):
+ if not isinstance(element, Value):
+ raise RuntimeError('Element cannot be nested in enum. ' +
+ 'Element Type: %s, Enum: %s' %
+ (type(element).__name__, self.full_name))
+
+ if element.name in self._values:
+ raise RuntimeError('Value is being redefined. Value: "%s"' % element.name)
+
+ self._values[element.name] = element
+
+ def emit(self, root):
+ # This check is invalid if tags other than Value can be nested within an enum.
+ if not self._values.values():
+ raise RuntimeError('Enum definition is empty. Enum: "%s"' % self.full_name)
+
+ print('enum %s {' % self.full_name)
+ for value in self._values.values():
+ value.emit()
+ print('};\n')
+
+class Value(Node):
+ def __init__(self, parent, name, value):
+ super().__init__(parent, name)
+
+ self.value = int(value)
+
+ self.parent.add(self)
+
+ def emit(self):
+ print(' %-36s = %6d,' % (self.full_name, self.value))
+
+class Struct(Node):
+ def __init__(self, parent, name, length):
+ super().__init__(parent, name)
+
+ self.length = int(length)
+ self.size = self.length * 32
+
+ if self.length <= 0:
+ raise ValueError('Struct length must be greater than 0. ' +
+ 'Struct: "%s".' % self.full_name)
+
+ self._children = {}
+
+ self.parent.add(self)
+
+ @property
+ def fields(self):
+ # TODO: Should we cache? See TODO in equivalent Condition getter.
+
+ fields = []
+ for child in self._children.values():
+ if isinstance(child, Condition):
+ fields += child.fields
+ else:
+ fields.append(child)
+
+ return fields
+
+ @property
+ def prefix(self):
+ return self.full_name
+
+ def add(self, element):
+ # We don't support conditions and field having the same name.
+ if isinstance(element, Field):
+ if element.name in self._children.keys():
+ raise ValueError('Field is being redefined. ' +
+ 'Field: "%s", Struct: "%s"' %
+ (element.name, self.full_name))
+
+ self._children[element.name] = element
+
+ elif isinstance(element, Condition):
+ # We only save ifs, and ignore the rest. The rest will be linked to
+ # the if condition so we just need to call emit() on the if and the
+ # rest will also be emitted.
+ if element.type == 'if':
+ self._children[element.name] = element
+ else:
+ if element.name not in self._children.keys():
+ raise RuntimeError('Unknown condition: "%s"' % element.name)
+
+ else:
+ raise RuntimeError('Element cannot be nested in struct. ' +
+ 'Element Type: %s, Struct: %s' %
+ (type(element).__name__, self.full_name))
+
+ def _emit_header(self, root):
+ fields = filter(lambda f: hasattr(f, 'default'), self.fields)
+
+ default_fields = []
+ for field in fields:
+ if field.is_builtin_type:
+ default_fields.append(" .%-35s = %6d" %
+ (field.name, field.default))
+ else:
+ if not root.is_known_enum(field.type):
+ # Default values should not apply to structures
+ raise RuntimeError('Unknown type. Field: "%s" Type: "%s"' %
+ (field.name, field.type))
+
+ enum = root.get_enum(field.type)
+
+ try:
+ value = enum.get_value(field.default)
+ except KeyError:
+ raise ValueError('Unknown enum value. ' +
+ 'Value: "%s", Enum: "%s", Field: "%s"' %
+ (field.default, enum.full_name, field.name))
+
+ default_fields.append(" .%-35s = %s" %
+ (field.name, value.full_name))
+
+ print('#define %-40s\\' % (self.full_name + '_header'))
+ print(", \\\n".join(default_fields))
+ print('')
+
+ def _emit_helper_macros(self, root):
+ fields_with_defines = filter(lambda f: f.defines, self.fields)
+
+ for field in fields_with_defines:
+ print("/* Helper macros for %s */" % (field.name))
+
+ for define in field.defines:
+ define.emit(root)
+
+ print()
+
+ def _emit_pack_function(self, root):
+ print(textwrap.dedent("""\
+ static inline __attribute__((always_inline)) void
+ %s_pack(__attribute__((unused)) void * restrict dst,
+ %s__attribute__((unused)) const struct %s * restrict values)
+ {""") % (self.full_name, ' ' * len(self.full_name), self.full_name))
+
+ group = Group(0, 1, self.size, self.fields)
+ (dwords, length) = group.collect_dwords_and_length()
+ if length:
+ # Cast dst to make header C++ friendly
+ print(" uint32_t * restrict dw = (uint32_t * restrict) dst;")
+
+ group.emit_pack_function(root, dwords, length)
+
+ print("}\n")
+
+
+ def emit(self, root):
+ print('#define %-33s %6d' % (self.full_name + "_length", self.length))
+
+ self._emit_header(root)
+
+ self._emit_helper_macros(root)
+
+ print("struct %s {" % self.full_name)
+ for child in self._children.values():
+ child.emit(root)
+ print("};\n")
+
+ self._emit_pack_function(root)
+
+class Field(Node):
+ def __init__(self, parent, name, start, end, type, default=None, shift=None):
+ super().__init__(parent, name)
+
+ self.start = int(start)
+ self.end = int(end)
+ self.type = type
+
+ self._defines = {}
+
+ self.parent.add(self)
+
+ if self.start > self.end:
+ raise ValueError('Start cannot be after end. ' +
+ 'Start: %d, End: %d, Field: "%s"' %
+ (self.start, self.end, self.name))
+
+ if self.type == 'bool' and self.end != self.start:
+ raise ValueError('Bool field can only be 1 bit long. ' +
+ 'Field "%s"' % self.name)
+
+ if default is not None:
+ if not self.is_builtin_type:
+ # Assuming it's an enum type.
+ self.default = safe_name(default)
+ else:
+ self.default = num_from_str(default)
+
+ if shift is not None:
+ if self.type != 'address':
+ raise RuntimeError('Only address fields can have a shift ' +
+ 'attribute. Field: "%s"' % self.name)
+
+ self.shift = int(shift)
+
+ Define(self, "ALIGNMENT", 2 ** self.shift)
+ else:
+ if self.type == 'address':
+ raise RuntimeError('Field of address type ' +
+ 'requires a shift attribute. Field "%s"' %
+ self.name)
+
+ @property
+ def defines(self):
+ return self._defines.values()
+
+ # We override prefix so that the defines will contain the field's name too.
+ @property
+ def prefix(self):
+ return self.full_name
+
+ @property
+ def is_builtin_type(self):
+ builtins = {'address', 'bool', 'float', 'mbo', 'offset', 'int', 'uint'}
+ return self.type in builtins
+
+ def _get_c_type(self, root):
+ if self.type == 'address':
+ return '__pvr_address_type'
+ elif self.type == 'bool':
+ return 'bool'
+ elif self.type == 'float':
+ return 'float'
+ elif self.type == 'offset':
+ return 'uint64_t'
+ elif self.type == 'int':
+ return 'int32_t'
+ elif self.type == 'uint':
+ if self.end - self.start < 32:
+ return 'uint32_t'
+ elif self.end - self.self < 64:
+ return 'uint64_t'
+
+ raise RuntimeError('No known C type found to hold %d bit sized value. ' +
+ 'Field: "%s"' %
+ (self.end - self.start, self.name))
+ elif root.is_known_struct(self.type):
+ return 'struct ' + self.type
+ elif root.is_known_enum(self.type):
+ return 'enum ' + root.get_enum(self.type).full_name
+ raise RuntimeError('Unknown type. Type: "%s", Field: "%s"' %
+ (self.type, self.name))
+
+ def add(self, element):
+ if self.type == 'mbo':
+ raise RuntimeError('No element can be nested in an mbo field. ' +
+ 'Element Type: %s, Field: %s' %
+ (type(element).__name__, self.name))
+
+ if isinstance(element, Define):
+ if element.name in self._defines:
+ raise RuntimeError('Duplicate define. Define: "%s"' %
+ element.name)
+
+ self._defines[element.name] = element
+ else:
+ raise RuntimeError('Element cannot be nested in a field. ' +
+ 'Element Type: %s, Field: %s' %
+ (type(element).__name__, self.name))
+
+ def emit(self, root):
+ if self.type == 'mbo':
+ return
+
+ print(" %-36s %s;" % (self._get_c_type(root), self.name))
+
+class Define(Node):
+ def __init__(self, parent, name, value):
+ super().__init__(parent, name)
+
+ self.value = value
+
+ self.parent.add(self)
+
+ def emit(self, root):
+ print("#define %-40s %d" % (self.full_name, self.value))
+
+class Condition(Node):
+ def __init__(self, parent, name, type):
+ super().__init__(parent, name, name_is_safe = True)
+
+ self.type = type
+ if not Condition._is_valid_type(self.type):
+ raise RuntimeError('Unknown type: "%s"' % self.name)
+
+ self._children = {}
+
+ # This is the link to the next branch for the if statement so either
+ # elif, else, or endif. They themselves will also have a link to the
+ # next branch up until endif which terminates the chain.
+ self._child_branch = None
+
+ self.parent.add(self)
+
+ @property
+ def fields(self):
+ # TODO: Should we use some kind of state to indicate the all of the
+ # child nodes have been added and then cache the fields in here on the
+ # first call so that we don't have to traverse them again per each call?
+ # The state could be changed wither when we reach the endif and pop from
+ # the context, or when we start emitting.
+
+ fields = []
+
+ for child in self._children.values():
+ if isinstance(child, Condition):
+ fields += child.fields
+ else:
+ fields.append(child)
+
+ if self._child_branch is not None:
+ fields += self._child_branch.fields
+
+ return fields
+
+ def _is_valid_type(type):
+ types = {'if', 'elif', 'else', 'endif'}
+ return type in types
+
+ def _is_compatible_child_branch(self, branch):
+ types = ['if', 'elif', 'else', 'endif']
+ idx = types.index(self.type)
+ return (branch.type in types[idx + 1:] or
+ self.type == 'elif' and branch.type == 'elif')
+
+ def _add_branch(self, branch):
+ if branch.type == 'elif' and branch.name == self.name:
+ raise RuntimeError('Elif branch cannot have same check as previous branch. ' +
+ 'Check: "%s"' % (branch.name))
+
+ if not self._is_compatible_child_branch(branch):
+ raise RuntimeError('Invalid branch. Check: "%s", Type: "%s"' %
+ (branch.name, branch.type))
+
+ self._child_branch = branch
+
+ # Returns the name of the if condition. This is used for elif branches since
+ # they have a different name than the if condition thus we have to traverse
+ # the chain of branches.
+ # This is used to discriminate nested if conditions from branches since
+ # branches like 'endif' and 'else' will have the same name as the 'if' (the
+ # elif is an exception) while nested conditions will have different names.
+ #
+ # TODO: Redo this to improve speed? Would caching this be helpful? We could
+ # just save the name of the if instead of having to walk towards it whenever
+ # a new condition is being added.
+ def _top_branch_name(self):
+ if self.type == 'if':
+ return self.name
+
+ return self.parent._top_branch_name()
+
+ def add(self, element):
+ if isinstance(element, Field):
+ if element.name in self._children.keys():
+ raise ValueError('Duplicate field. Field: "%s"' % element.name)
+
+ self._children[element.name] = element
+ elif isinstance(element, Condition):
+ if element.type == 'elif' or self._top_branch_name() == element.name:
+ self._add_branch(element)
+ else:
+ if element.type != 'if':
+ raise RuntimeError('Branch of an unopened if condition. ' +
+ 'Check: "%s", Type: "%s".' % (element.name, element.type))
+
+ # This is a nested condition and we made sure that the name
+ # doesn't match _top_branch_name() so we can recognize the else
+ # and endif.
+ # We recognized the elif by its type however its name differs
+ # from the if condition thus when we add an if condition with
+ # the same name as the elif nested in it, the _top_branch_name()
+ # check doesn't hold true as the name matched the elif and not
+ # the if statement which the elif was a branch of, thus the
+ # nested if condition is not recognized as an invalid branch of
+ # the outer if statement.
+ # Sample:
+ # <condition type="if" check="ROGUEXE"/>
+ # <condition type="elif" check="COMPUTE"/>
+ # <condition type="if" check="COMPUTE"/>
+ # <condition type="endif" check="COMPUTE"/>
+ # <condition type="endif" check="COMPUTE"/>
+ # <condition type="endif" check="ROGUEXE"/>
+ #
+ # We fix this by checking the if condition name against its
+ # parent.
+ if element.name == self.name:
+ raise RuntimeError('Invalid if condition. Check: "%s"' %
+ element.name)
+
+ self._children[element.name] = element
+ else:
+ raise RuntimeError('Element cannot be nested in a condition. ' +
+ 'Element Type: %s, Check: %s' %
+ (type(element).__name__, self.name))
+
+ def emit(self, root):
+ if self.type == "if":
+ print("/* if %s is supported use: */" % (self.name))
+ elif self.type == "elif":
+ print("/* else if %s is supported use: */" % (self.name))
+ elif self.type == "else":
+ print("/* else %s is not-supported use: */" % (self.name))
+ elif self.type == "endif":
+ print("/* endif %s */" % (self.name))
+ return
+ else:
+ raise RuntimeError('Unknown condition type. Implementation error.')
+
+ for child in self._children.values():
+ child.emit(root)
+
+ self._child_branch.emit(root)
+
+class Group(object):
+ def __init__(self, start, count, size, fields):
+ self.start = start
+ self.count = count
+ self.size = size
+ self.fields = fields
+
+ class DWord:
+ def __init__(self):
+ self.size = 32
+ self.fields = []
+ self.addresses = []
+
+ def collect_dwords(self, dwords, start, dim):
+ for field in self.fields:
+ index = (start + field.start) // 32
+ if index not in dwords:
+ dwords[index] = self.DWord()
+
+ clone = copy.copy(field)
+ clone.start = clone.start + start
+ clone.end = clone.end + start
+ clone.dim = dim
+ dwords[index].fields.append(clone)
+
+ if field.type == "address":
+ # assert dwords[index].address == None
+ dwords[index].addresses.append(clone)
+
+ # Coalesce all the dwords covered by this field. The two cases we
+ # handle are where multiple fields are in a 64 bit word (typically
+ # and address and a few bits) or where a single struct field
+ # completely covers multiple dwords.
+ while index < (start + field.end) // 32:
+ if index + 1 in dwords and \
+ not dwords[index] == dwords[index + 1]:
+ dwords[index].fields.extend(dwords[index + 1].fields)
+ dwords[index].addresses.extend(dwords[index + 1].addresses)
+ dwords[index].size = 64
+ dwords[index + 1] = dwords[index]
+ index = index + 1
+
+ def collect_dwords_and_length(self):
+ dwords = {}
+ self.collect_dwords(dwords, 0, "")
+
+ # Determine number of dwords in this group. If we have a size, use
+ # that, since that'll account for MBZ dwords at the end of a group
+ # (like dword 8 on BDW+ 3DSTATE_HS). Otherwise, use the largest dword
+ # index we've seen plus one.
+ if self.size > 0:
+ length = self.size // 32
+ elif dwords:
+ length = max(dwords.keys()) + 1
+ else:
+ length = 0
+
+ return (dwords, length)
+
+ def emit_pack_function(self, root, dwords, length):
+ for index in range(length):
+ # Handle MBZ dwords
+ if index not in dwords:
+ print("")
+ print(" dw[%d] = 0;" % index)
+ continue
+
+ # For 64 bit dwords, we aliased the two dword entries in the dword
+ # dict it occupies. Now that we're emitting the pack function,
+ # skip the duplicate entries.
+ dw = dwords[index]
+ if index > 0 and index - 1 in dwords and dw == dwords[index - 1]:
+ continue
+
+ # Special case: only one field and it's a struct at the beginning
+ # of the dword. In this case we pack directly into the
+ # destination. This is the only way we handle embedded structs
+ # larger than 32 bits.
+ if len(dw.fields) == 1:
+ field = dw.fields[0]
+ name = field.name + field.dim
+ if root.is_known_struct(field.type) and field.start % 32 == 0:
+ print("")
+ print(" %s_pack(data, &dw[%d], &values->%s);" %
+ (self.parser.gen_prefix(safe_name(field.type)),
+ index, name))
+ continue
+
+ # Pack any fields of struct type first so we have integer values
+ # to the dword for those fields.
+ field_index = 0
+ for field in dw.fields:
+ if isinstance(field, Field) and root.is_known_struct(field.type):
+ name = field.name + field.dim
+ print("")
+ print(" uint32_t v%d_%d;" % (index, field_index))
+ print(" %s_pack(data, &v%d_%d, &values->%s);" %
+ (self.parser.gen_prefix(safe_name(field.type)),
+ index, field_index, name))
+ field_index = field_index + 1
+
+ print("")
+ dword_start = index * 32
+ address_count = len(dw.addresses);
+
+ if dw.size == 32 and not dw.addresses:
+ v = None
+ print(" dw[%d] =" % index)
+ elif len(dw.fields) > address_count:
+ v = "v%d" % index
+ print(" const uint%d_t %s =" % (dw.size, v))
+ else:
+ v = "0"
+
+ field_index = 0
+ non_address_fields = []
+ for field in dw.fields:
+ if field.type != "mbo":
+ name = field.name + field.dim
+
+ if field.type == "mbo":
+ non_address_fields.append("__pvr_mbo(%d, %d)" %
+ (field.start - dword_start,
+ field.end - dword_start))
+ elif field.type == "address":
+ pass
+ elif field.type == "uint":
+ non_address_fields.append("__pvr_uint(values->%s, %d, %d)" %
+ (name, field.start - dword_start,
+ field.end - dword_start))
+ elif root.is_known_enum(field.type):
+ non_address_fields.append("__pvr_uint(values->%s, %d, %d)" %
+ (name, field.start - dword_start,
+ field.end - dword_start))
+ elif field.type == "int":
+ non_address_fields.append("__pvr_sint(values->%s, %d, %d)" %
+ (name, field.start - dword_start,
+ field.end - dword_start))
+ elif field.type == "bool":
+ non_address_fields.append("__pvr_uint(values->%s, %d, %d)" %
+ (name, field.start - dword_start,
+ field.end - dword_start))
+ elif field.type == "float":
+ non_address_fields.append("__pvr_float(values->%s)" % name)
+ elif field.type == "offset":
+ non_address_fields.append(
+ "__pvr_offset(values->%s,"" %d, %d)" %
+ (name, field.start - dword_start,
+ field.end - dword_start))
+ elif field.is_struct_type():
+ non_address_fields.append("__pvr_uint(v%d_%d, %d, %d)" %
+ (index, field_index,
+ field.start - dword_start,
+ field.end - dword_start))
+ field_index = field_index + 1
+ else:
+ non_address_fields.append("/* unhandled field %s,"
+ " type %s */\n" %
+ (name, field.type))
+
+ if non_address_fields:
+ print(" |\n".join(" " + f for f in non_address_fields) +
+ ";")
+
+ if dw.size == 32:
+ for i in range(address_count):
+ print(" dw[%d] = __pvr_address("
+ "values->%s, %d, %d, %d) | %s;" %
+ (index, dw.addresses[i].name + field.dim,
+ dw.addresses[i].shift, dw.addresses[i].start - dword_start,
+ dw.addresses[i].end - dword_start, v))
+ continue
+
+ v_accumulated_addr = ""
+ for i in range(address_count):
+ v_address = "v%d_address" % i
+ v_accumulated_addr += "v%d_address" % i
+ print(" const uint64_t %s =\n "
+ " __pvr_address(values->%s, %d, %d, %d);" %
+ (v_address, dw.addresses[i].name + field.dim, dw.addresses[i].shift,
+ dw.addresses[i].start - dword_start,
+ dw.addresses[i].end - dword_start))
+ if i < (address_count - 1):
+ v_accumulated_addr += " |\n "
+
+ if dw.addresses:
+ if len(dw.fields) > address_count:
+ print(" dw[%d] = %s | %s;" % (index, v_accumulated_addr, v))
+ print(" dw[%d] = (%s >> 32) | (%s >> 32);" %
+ (index + 1, v_accumulated_addr, v))
+ continue
+ else:
+ v = v_accumulated_addr
+
+ print(" dw[%d] = %s;" % (index, v))
+ print(" dw[%d] = %s >> 32;" % (index + 1, v))
+
+class Parser(object):
+ def __init__(self):
+ self.parser = xml.parsers.expat.ParserCreate()
+ self.parser.StartElementHandler = self.start_element
+ self.parser.EndElementHandler = self.end_element
+
+ self.context = []
+
+ def start_element(self, name, attrs):
+ if not name == "csbgen":
+ parent = self.context[-1]
+
+ if name == "csbgen":
+ if self.context:
+ raise RuntimeError('Can only have 1 csbgen block and it has ' +
+ 'to contain all of the other elements.')
+
+ csbgen = Csbgen(attrs["name"], attrs["prefix"], self.filename)
+ self.context.append(csbgen)
+
+ elif name == "struct":
+ struct = Struct(parent , attrs["name"], attrs["length"])
+ self.context.append(struct)
+
+ elif name == "field":
+ default = None
+ if "default" in attrs.keys():
+ default = attrs["default"]
+
+ shift = None
+ if "shift" in attrs.keys():
+ shift = attrs["shift"]
+
+ field = Field(parent,
+ name = attrs["name"],
+ start = int(attrs["start"]),
+ end = int(attrs["end"]),
+ type = attrs["type"],
+ default = default,
+ shift = shift)
+ self.context.append(field)
+
+ elif name == "enum":
+ enum = Enum(parent, attrs["name"])
+ self.context.append(enum)
+
+ elif name == "value":
+ value = Value(parent, attrs["name"], ast.literal_eval(attrs["value"]))
+ self.context.append(value)
+
+ elif name == "define":
+ define = Define(parent, attrs["name"], ast.literal_eval(attrs["value"]))
+ self.context.append(define)
+
+ elif name == "condition":
+ condition = Condition(parent, name=attrs["check"], type=attrs["type"])
+
+ # Starting with the if statement we push it in the context. For each
+ # branch following (elif, and else) we assign the top of stack as
+ # its parent, pop() and push the new condition. So per branch we end
+ # up having [..., struct, condition]. We don't push an endif since
+ # it's not supposed to have any children and it's supposed to close
+ # the whole if statement.
+
+ if condition.type != 'if':
+ # Remove the parent condition from the context. We were peeking
+ # before, now we pop().
+ self.context.pop()
+
+ if condition.type == 'endif':
+ if not isinstance(parent, Condition):
+ raise RuntimeError('Cannot close unopened or already ' +
+ 'closed condition. Condition: "%s"' % condition.name)
+ else:
+ self.context.append(condition)
+
+ else:
+ raise RuntimeError('Unknown tag: "%s"' % name)
+
+ def end_element(self, name):
+ if name == 'condition':
+ element = self.context[-1]
+ if not isinstance(element, Condition) and not isinstance(element, Struct):
+ raise RuntimeError("Expected condition or struct tag to be closed.")
+
+ return
+
+ element = self.context.pop()
+
+ if name == "struct":
+ if not isinstance(element, Struct):
+ raise RuntimeError("Expected struct tag to be closed.")
+ elif name == "field":
+ if not isinstance(element, Field):
+ raise RuntimeError("Expected field tag to be closed.")
+ elif name == "enum":
+ if not isinstance(element, Enum):
+ raise RuntimeError("Expected enum tag to be closed.")
+ elif name == "value":
+ if not isinstance(element, Value):
+ raise RuntimeError("Expected value tag to be closed.")
+ elif name == "define":
+ if not isinstance(element, Define):
+ raise RuntimeError("Expected define tag to be closed.")
+ elif name == "csbgen":
+ if not isinstance(element, Csbgen):
+ raise RuntimeError("""Expected csbgen tag to be closed.
+ Some tags may have not been closed""")
+
+ element.emit()
+ else:
+ raise RuntimeError('Unknown closing element: "%s"' % name)
+
+ def parse(self, filename):
+ file = open(filename, "rb")
+ self.filename = filename
+ self.parser.ParseFile(file)
+ file.close()
+
+if len(sys.argv) < 2:
+ print("No input xml file specified")
+ sys.exit(1)
+
+input_file = sys.argv[1]
+
+p = Parser()
+p.parse(input_file)
--- /dev/null
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+pvr_xml_files = [
+ 'rogue_cdm.xml',
+ 'rogue_cr.xml',
+ 'rogue_ipf.xml',
+ 'rogue_lls.xml',
+ 'rogue_pbestate.xml',
+ 'rogue_pds.xml',
+ 'rogue_ppp.xml',
+ 'rogue_texstate.xml',
+ 'rogue_vdm.xml',
+]
+
+pvr_xml_pack = []
+foreach f : pvr_xml_files
+ _name = '@0@.h'.format(f.split('.')[0])
+ pvr_xml_pack += custom_target(
+ _name,
+ input : ['gen_pack_header.py', f],
+ output : _name,
+ command : [prog_python, '@INPUT@'],
+ capture : true,
+ )
+endforeach
+
+dep_csbgen = declare_dependency(sources : [pvr_xml_pack])
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_PACKET_HELPERS_H
+#define PVR_PACKET_HELPERS_H
+
+#include <assert.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#ifndef __pvr_validate_value
+# define __pvr_validate_value(x)
+#endif
+
+#ifdef NDEBUG
+# define NDEBUG_UNUSED __attribute__((unused))
+#else
+# define NDEBUG_UNUSED
+#endif
+
+#ifndef __pvr_address_type
+# error #define __pvr_address_type before including this file
+#endif
+
+#ifndef __pvr_get_address
+# error #define __pvr_get_address before including this file
+#endif
+
+union __pvr_value {
+ float f;
+ uint32_t dw;
+};
+
+static inline __attribute__((always_inline)) uint64_t __pvr_mbo(uint32_t start,
+ uint32_t end)
+{
+ return (~0ull >> (64 - (end - start + 1))) << start;
+}
+
+static inline __attribute__((always_inline)) uint64_t
+__pvr_uint(uint64_t v, uint32_t start, NDEBUG_UNUSED uint32_t end)
+{
+ __pvr_validate_value(v);
+
+#ifndef NDEBUG
+ const int width = end - start + 1;
+ if (width < 64) {
+ const uint64_t max = (1ull << width) - 1;
+ assert(v <= max);
+ }
+#endif
+
+ return v << start;
+}
+
+static inline __attribute__((always_inline)) uint64_t
+__pvr_sint(int64_t v, uint32_t start, uint32_t end)
+{
+ const int width = end - start + 1;
+
+ __pvr_validate_value(v);
+
+#ifndef NDEBUG
+ if (width < 64) {
+ const int64_t max = (1ll << (width - 1)) - 1;
+ const int64_t min = -(1ll << (width - 1));
+ assert(min <= v && v <= max);
+ }
+#endif
+
+ const uint64_t mask = ~0ull >> (64 - width);
+
+ return (v & mask) << start;
+}
+
+static inline __attribute__((always_inline)) uint64_t
+__pvr_offset(uint64_t v,
+ NDEBUG_UNUSED uint32_t start,
+ NDEBUG_UNUSED uint32_t end)
+{
+ __pvr_validate_value(v);
+#ifndef NDEBUG
+ uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start;
+
+ assert((v & ~mask) == 0);
+#endif
+
+ return v;
+}
+
+static inline __attribute__((always_inline)) uint64_t
+__pvr_address(__pvr_address_type address,
+ uint32_t shift,
+ uint32_t start,
+ uint32_t end)
+{
+ uint64_t addr_u64 = __pvr_get_address(address);
+ uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start;
+
+ return ((addr_u64 >> shift) << start) & mask;
+}
+
+static inline __attribute__((always_inline)) uint32_t __pvr_float(float v)
+{
+ __pvr_validate_value(v);
+ return ((union __pvr_value){ .f = (v) }).dw;
+}
+
+static inline __attribute__((always_inline)) uint64_t
+__pvr_sfixed(float v, uint32_t start, uint32_t end, uint32_t fract_bits)
+{
+ __pvr_validate_value(v);
+
+ const float factor = (1 << fract_bits);
+
+#ifndef NDEBUG
+ const float max = ((1 << (end - start)) - 1) / factor;
+ const float min = -(1 << (end - start)) / factor;
+ assert(min <= v && v <= max);
+#endif
+
+ const int64_t int_val = llroundf(v * factor);
+ const uint64_t mask = ~0ull >> (64 - (end - start + 1));
+
+ return (int_val & mask) << start;
+}
+
+static inline __attribute__((always_inline)) uint64_t
+__pvr_ufixed(float v,
+ uint32_t start,
+ NDEBUG_UNUSED uint32_t end,
+ uint32_t fract_bits)
+{
+ __pvr_validate_value(v);
+
+ const float factor = (1 << fract_bits);
+
+#ifndef NDEBUG
+ const float max = ((1 << (end - start + 1)) - 1) / factor;
+ const float min = 0.0f;
+ assert(min <= v && v <= max);
+#endif
+
+ const uint64_t uint_val = llroundf(v * factor);
+
+ return uint_val << start;
+}
+
+#undef NDEBUG_UNUSED
+
+#endif /* PVR_PACKET_HELPERS_H */
--- /dev/null
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="CDMCTRL">
+
+ <enum name="BLOCK_TYPE">
+ <value name="COMPUTE_KERNEL" value="0"/>
+ <value name="STREAM_LINK" value="1"/>
+ <value name="STREAM_TERMINATE" value="2"/>
+ </enum>
+
+ <enum name="USC_TARGET">
+ <value name="ALL" value="0"/>
+ <value name="ANY" value="1"/>
+ </enum>
+
+ <enum name="SD_TYPE">
+ <value name="NONE" value="0"/>
+ <value name="PDS" value="1"/>
+ <value name="USC" value="2"/>
+ </enum>
+
+ <struct name="KERNEL0" length="1">
+ <field name="block_type" start="30" end="31" type="BLOCK_TYPE" default="COMPUTE_KERNEL"/>
+ <field name="indirect_present" start="29" end="29" type="bool"/>
+ <field name="global_offsets_present" start="28" end="28" type="bool"/>
+ <field name="event_object_present" start="27" end="27" type="bool"/>
+ <field name="usc_common_size" start="18" end="26" type="uint">
+ <define name="UNIT_SIZE" value="64"/>
+ <define name="MAX_SIZE" value="256"/>
+ </field>
+ <field name="usc_unified_size" start="12" end="17" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ <field name="pds_temp_size" start="8" end="11" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ <field name="pds_data_size" start="2" end="7" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ <field name="usc_target" start="1" end="1" type="USC_TARGET"/>
+ <field name="fence" start="0" end="0" type="bool"/>
+ </struct>
+
+ <struct name="KERNEL1" length="1">
+ <field name="data_addr" start="4" end="31" shift="4" type="address"/>
+ <field name="sd_type" start="2" end="3" type="SD_TYPE"/>
+ <field name="usc_common_shared" start="1" end="1" type="bool"/>
+ </struct>
+
+ <struct name="KERNEL2" length="1">
+ <field name="code_addr" start="4" end="31" shift="4" type="address"/>
+ <field name="one_wg_per_task" start="0" end="0" type="bool"/>
+ </struct>
+
+ <struct name="KERNEL3" length="1">
+ <field name="workgroup_x" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="KERNEL4" length="1">
+ <field name="workgroup_y" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="KERNEL5" length="1">
+ <field name="workgroup_z" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="KERNEL6" length="1">
+ <field name="indirect_addrmsb" start="0" end="7" shift="32" type="address"/>
+ </struct>
+
+ <struct name="KERNEL7" length="1">
+ <field name="indirect_addrlsb" start="2" end="31" shift="2" type="address"/>
+ </struct>
+
+ <struct name="KERNEL8" length="1">
+ <field name="max_instances" start="27" end="31" type="uint">
+ <define name="MAX_SIZE" value="31"/>
+ </field>
+ <field name="workgroup_size_x" start="18" end="26" type="uint"/>
+ <field name="workgroup_size_y" start="9" end="17" type="uint"/>
+ <field name="workgroup_size_z" start="0" end="8" type="uint"/>
+ </struct>
+
+ <struct name="KERNEL9" length="1">
+ <field name="global_offset_x" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="KERNEL10" length="1">
+ <field name="global_offset_y" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="KERNEL11" length="1">
+ <field name="global_offset_z" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="STREAM_LINK0" length="1">
+ <field name="block_type" start="30" end="31" type="BLOCK_TYPE" default="STREAM_LINK"/>
+ <field name="link_addrmsb" start="0" end="7" shift="32" type="address"/>
+ </struct>
+
+ <struct name="STREAM_LINK1" length="1">
+ <field name="link_addrlsb" start="2" end="31" shift="2" type="address"/>
+ </struct>
+
+ <struct name="STREAM_TERMINATE" length="1">
+ <field name="block_type" start="30" end="31" type="BLOCK_TYPE" default="STREAM_TERMINATE"/>
+ </struct>
+
+</csbgen>
--- /dev/null
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="CR">
+
+ <define name="PM_VHEAP_TABLE_SIZE" value="0x180" />
+
+ <enum name="COMP_IADDR_TYPE">
+ <value name="INDIRECT_1TILE" value="0"/>
+ <value name="INDIRECT_4TILE" value="1"/>
+ </enum>
+
+ <enum name="COMPRESS_SIZE">
+ <value name="BLOCK_8X8" value="0"/>
+ <value name="BLOCK_16X4" value="1"/>
+ </enum>
+
+ <enum name="DIR_TYPE">
+ <value name="TL2BR" value="0"/>
+ <value name="TR2BL" value="1"/>
+ <value name="BL2TR" value="2"/>
+ <value name="BR2TL" value="3"/>
+ </enum>
+
+ <enum name="ISP_AA_MODE_TYPE">
+ <value name="AA_NONE" value="0"/>
+ <value name="AA_2X" value="1"/>
+ <value name="AA_4X" value="2"/>
+ <value name="AA_8X" value="3"/>
+ </enum>
+
+ <enum name="ISP_RENDER_MODE_TYPE">
+ <value name="NORM" value="0"/>
+ <value name="FAST_2D" value="1"/>
+ <value name="FAST_SCALE" value="2"/>
+ </enum>
+
+ <enum name="MEMLAYOUT">
+ <value name="LINEAR" value="0"/>
+ <value name="TWIDDLE_2D" value="1"/>
+ <value name="TWIDDLE_3D" value="2"/>
+ <value name="TILED" value="3"/>
+ </enum>
+
+ <enum name="MODE_TYPE">
+ <value name="DX9" value="0"/>
+ <value name="DX10" value="1"/>
+ <value name="OGL" value="2"/>
+ </enum>
+
+ <enum name="PIPE_NUM">
+ <value name="PIPE_ONE" value="0"/>
+ <value name="PIPE_TWO" value="1"/>
+ <value name="PIPE_THREE" value="2"/>
+ <value name="PIPE_FOUR" value="3"/>
+ <value name="PIPE_FIVE" value="4"/>
+ <value name="PIPE_SIX" value="5"/>
+ <value name="PIPE_SEVEN" value="6"/>
+ <value name="PIPE_EIGHT" value="7"/>
+ <value name="PIPE_NINE" value="8"/>
+ <value name="PIPE_TEN" value="9"/>
+ <value name="PIPE_ELEVEN" value="10"/>
+ <value name="PIPE_TWELVE" value="11"/>
+ <value name="PIPE_THIRTEEN" value="12"/>
+ <value name="PIPE_FOURTEEN" value="13"/>
+ <value name="PIPE_FIFTEEN" value="14"/>
+ <value name="PIPE_SIXTEEN" value="15"/>
+ </enum>
+
+ <enum name="PIXEL_WIDTH">
+ <value name="2REGISTERS" value="0"/>
+ <value name="4REGISTERS" value="1"/>
+ <value name="8REGISTERS" value="2"/>
+ <value name="1REGISTER" value="3"/>
+ </enum>
+
+ <enum name="ROTATION_TYPE">
+ <value name="0_DEG" value="0"/>
+ <value name="90_DEG" value="1"/>
+ <value name="180_DEG" value="2"/>
+ <value name="270_DEG" value="3"/>
+ </enum>
+
+ <enum name="SIZE">
+ <value name="1_PIXEL" value="0"/>
+ <value name="2_PIXEL" value="1"/>
+ <value name="4_PIXEL" value="2"/>
+ <value name="8_PIXEL" value="3"/>
+ <value name="16_PIXEL" value="4"/>
+ <value name="32_PIXEL" value="5"/>
+ <value name="64_PIXEL" value="6"/>
+ <value name="128_PIXEL" value="7"/>
+ <value name="256_PIXEL" value="8"/>
+ <value name="512_PIXEL" value="9"/>
+ <value name="1K_PIXEL" value="10"/>
+ <value name="2K_PIXEL" value="11"/>
+ <value name="4K_PIXEL" value="12"/>
+ <value name="8K_PIXEL" value="13"/>
+ <value name="16K_PIXEL" value="14"/>
+ </enum>
+
+ <enum name="SWIZ">
+ <value name="SOURCE_CHAN0" value="0"/>
+ <value name="SOURCE_CHAN1" value="1"/>
+ <value name="SOURCE_CHAN2" value="2"/>
+ <value name="SOURCE_CHAN3" value="3"/>
+ <value name="ONE" value="4"/>
+ <value name="ZERO" value="5"/>
+ </enum>
+
+ <enum name="TFBC_LOSSY">
+ <value name="LOSSLESS" value="0"/>
+ <value name="LOSSY_75" value="1"/>
+ <value name="LOSSY_50" value="2"/>
+ <value name="LOSSY_25" value="3"/>
+ </enum>
+
+ <enum name="TWOCOMP_GAMMA">
+ <value name="GAMMA_BOTTOM_CHANNEL" value="0"/>
+ <value name="GAMMA_BOTH_CHANNELS" value="1"/>
+ </enum>
+
+ <enum name="ZLOADFORMAT_TYPE">
+ <value name="F32Z" value="0"/>
+ <value name="24BITINT" value="1"/>
+ <value name="16BITINT" value="2"/>
+ <value name="F64Z" value="3"/>
+ </enum>
+
+ <enum name="ZSTOREFORMAT_TYPE">
+ <value name="F32Z" value="0"/>
+ <value name="24BITINT" value="1"/>
+ <value name="16BITINT" value="2"/>
+ <value name="F64Z" value="3"/>
+ </enum>
+
+ <struct name="PM_MTILE_ARRAY" length="2">
+ <field name="base_addr" start="4" end="39" shift="4" type="address"/>
+ </struct>
+
+ <struct name="PM_VHEAP_TABLE" length="2">
+ <field name="base_addr" start="4" end="39" shift="4" type="address"/>
+ </struct>
+
+ <struct name="PM_MLIST0_BASE" length="2">
+ <field name="addr" start="4" end="39" shift="4" type="address"/>
+ </struct>
+
+ <struct name="VDM_CTRL_STREAM_BASE" length="2">
+ <field name="addr" start="2" end="39" shift="2" type="address"/>
+ </struct>
+
+ <struct name="VDM_CALL_STACK_POINTER" length="2">
+ <field name="addr" start="3" end="39" shift="3" type="address"/>
+ </struct>
+
+ <struct name="VDM_CONTEXT_STATE_BASE" length="2">
+ <field name="addr" start="4" end="39" shift="4" type="address"/>
+ </struct>
+
+ <struct name="VDM_CONTEXT_STORE_TASK0" length="2">
+ <field name="pds_state1" start="32" end="63" type="uint"/>
+ <field name="pds_state0" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="VDM_CONTEXT_STORE_TASK1" length="1">
+ <field name="pds_state2" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="VDM_CONTEXT_STORE_TASK2" length="2">
+ <field name="stream_out2" start="32" end="63" type="uint"/>
+ <field name="stream_out1" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="VDM_CONTEXT_RESUME_TASK0" length="2">
+ <field name="pds_state1" start="32" end="63" type="uint"/>
+ <field name="pds_state0" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="VDM_CONTEXT_RESUME_TASK1" length="1">
+ <field name="pds_state2" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="VDM_CONTEXT_RESUME_TASK2" length="2">
+ <field name="stream_out2" start="32" end="63" type="uint"/>
+ <field name="stream_out1" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="CDM_CONTEXT_STATE_BASE" length="2">
+ <field name="addr" start="4" end="39" shift="4" type="address"/>
+ </struct>
+
+ <struct name="CDM_CONTEXT_PDS0" length="2">
+ <field name="data_addr" start="36" end="63" shift="4" type="address"/>
+ <field name="code_addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="CDM_CTRL_STREAM_BASE" length="2">
+ <field name="addr" start="2" end="39" shift="2" type="address"/>
+ </struct>
+
+ <struct name="CDM_CONTEXT_PDS1" length="1">
+ <field name="pds_seq_dep" start="29" end="29" type="bool"/>
+ <field name="usc_seq_dep" start="28" end="28" type="bool"/>
+ <!-- false=All, true=Any -->
+ <field name="target" start="27" end="27" type="bool"/>
+ <field name="unified_size" start="21" end="26" type="uint"/>
+ <field name="common_shared" start="20" end="20" type="bool"/>
+ <field name="common_size" start="11" end="19" type="uint">
+ <define name="UNIT_SIZE" value="64"/>
+ </field>
+ <field name="temp_size" start="7" end="10" type="uint"/>
+ <field name="data_size" start="1" end="6" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ <field name="fence" start="0" end="0" type="bool"/>
+ </struct>
+
+ <struct name="CDM_TERMINATE_PDS" length="2">
+ <field name="data_addr" start="36" end="63" shift="4" type="address"/>
+ <field name="code_addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="CDM_TERMINATE_PDS1" length="1">
+ <field name="pds_seq_dep" start="29" end="29" type="bool"/>
+ <field name="usc_seq_dep" start="28" end="28" type="bool"/>
+ <field name="target" start="27" end="27" type="bool"/>
+ <field name="unified_size" start="21" end="26" type="uint"/>
+ <field name="common_shared" start="20" end="20" type="bool"/>
+ <field name="common_size" start="11" end="19" type="uint"/>
+ <field name="temp_size" start="7" end="10" type="uint"/>
+ <field name="data_size" start="1" end="6" type="uint"/>
+ <field name="fence" start="0" end="0" type="bool"/>
+ </struct>
+
+ <struct name="CDM_CONTEXT_LOAD_PDS0" length="2">
+ <field name="data_addr" start="36" end="63" shift="4" type="address"/>
+ <field name="code_addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="COMPUTE_CLUSTER" length="1">
+ <field name="mask" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="PDS_CTRL" length="2">
+ <field name="sm_overlap_enable" start="55" end="55" type="bool"/>
+ <condition type="if" check="ROGUEXE"/>
+ <condition type="if" check="COMPUTE"/>
+ <field name="roguexe_max_num_cdm_tasks" start="24" end="31" type="uint"/>
+ <condition type="endif" check="COMPUTE"/>
+ <condition type="if" check="NUM_RASTER_PIPES > 0"/>
+ <field name="roguexe_max_num_pdm_tasks" start="16" end="23" type="uint"/>
+ <condition type="endif" check="NUM_RASTER_PIPES > 0"/>
+ <condition type="if" check="NUM_TA > 0"/>
+ <field name="roguexe_max_num_vdm_tasks" start="8" end="15" type="uint"/>
+ <condition type="endif" check="NUM_TA > 0"/>
+ <condition type="else" check="ROGUEXE"/>
+ <condition type="if" check="COMPUTE"/>
+ <field name="max_num_cdm_tasks" start="24" end="30" type="uint"/>
+ <condition type="endif" check="COMPUTE"/>
+ <field name="max_num_pdm_tasks" start="16" end="22" type="uint"/>
+ <field name="max_num_vdm_tasks" start="8" end="14" type="uint"/>
+ <condition type="endif" check="ROGUEXE"/>
+ </struct>
+
+ <struct name="EVENT_PIXEL_PDS_CODE" length="1">
+ <field name="addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="EVENT_PIXEL_PDS_DATA" length="1">
+ <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+ <field name="addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="EVENT_PIXEL_PDS_INFO" length="1">
+ <field name="usc_sr_size" start="9" end="14" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ <field name="temp_stride" start="5" end="8" type="uint">
+ <define name="UNIT_SIZE" value="4"/>
+ </field>
+ <field name="const_size" start="0" end="4" type="uint">
+ <define name="UNIT_SIZE" value="4"/>
+ </field>
+ </struct>
+
+ <struct name="PDS_BGRND0_BASE" length="2">
+ <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+ <field name="texunicode_addr" start="36" end="63" shift="4" type="address"/>
+ <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+ <field name="shader_addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="PDS_BGRND1_BASE" length="2">
+ <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+ <field name="texturedata_addr" start="36" end="63" shift="4" type="address"/>
+ <!-- Unused in the Vulkan driver. -->
+ <field name="varying_addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="PDS_BGRND2_BASE" length="2">
+ <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+ <field name="uniformdata_addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="PDS_BGRND3_SIZEINFO" length="2">
+ <field name="usc_sharedsize" start="55" end="63" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ <field name="pds_batchnum" start="32" end="45" type="uint"/>
+ <field name="pds_uniformsize" start="23" end="31" type="uint">
+ <define name="UNIT_SIZE" value="4"/>
+ </field>
+ <field name="pds_texturestatesize" start="16" end="22" type="uint">
+ <define name="UNIT_SIZE" value="4"/>
+ </field>
+ <field name="pds_varyingsize" start="10" end="15" type="uint">
+ <define name="UNIT_SIZE" value="4"/>
+ </field>
+ <field name="usc_varyingsize" start="4" end="9" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ <field name="pds_tempsize" start="0" end="3" type="uint">
+ <define name="UNIT_SIZE" value="4"/>
+ </field>
+ </struct>
+
+ <struct name="TE_AA" length="1">
+ <condition type="if" check="SIMPLE_INTERNAL_PARAMETER_FORMAT && ISP_SAMPLES_PER_PIXEL > 2"/>
+ <field name="y2" start="3" end="3" type="bool"/>
+ <condition type="endif" check="SIMPLE_INTERNAL_PARAMETER_FORMAT && ISP_SAMPLES_PER_PIXEL > 2"/>
+ <field name="y" start="2" end="2" type="bool"/>
+ <field name="x" start="1" end="1" type="bool"/>
+ <field name="x2" start="0" end="0" type="bool"/>
+ </struct>
+
+ <struct name="TE_MTILE1" length="1">
+ <field name="x1" start="18" end="26" type="uint"/>
+ <field name="x2" start="9" end="17" type="uint"/>
+ <field name="x3" start="0" end="8" type="uint"/>
+ </struct>
+
+ <struct name="TE_MTILE2" length="1">
+ <field name="y1" start="18" end="26" type="uint"/>
+ <field name="y2" start="9" end="17" type="uint"/>
+ <field name="y3" start="0" end="8" type="uint"/>
+ </struct>
+
+ <struct name="TE_SCREEN" length="1">
+ <field name="ymax" start="12" end="20" type="uint"/>
+ <field name="xmax" start="0" end="8" type="uint"/>
+ </struct>
+
+ <struct name="TE_PSG" length="1">
+ <condition type="if" check="ROGUEXE"/>
+ <condition type="if" check="TILE_REGION_PROTECTION"/>
+ <field name="force_protect" start="22" end="22" type="uint"/>
+ <condition type="endif" check="TILE_REGION_PROTECTION"/>
+ <field name="cs_size" start="21" end="21" type="uint"/>
+ <field name="enable_pwr_gate_state" start="20" end="20" type="bool"/>
+ <condition type="endif" check="ROGUEXE"/>
+ <field name="enable_context_state_restore" start="19" end="19" type="bool"/>
+ <field name="zonlyrender" start="18" end="18" type="bool"/>
+ <field name="completeonterminate" start="17" end="17" type="bool"/>
+ <field name="cache_bypass" start="14" end="14" type="bool"/>
+ <field name="forcenewstate" start="13" end="13" type="bool"/>
+ <field name="region_stride" start="0" end="10" type="uint">
+ <define name="UNIT_SIZE" value="4096"/>
+ </field>
+ </struct>
+
+ <!-- FIXME: This is only a partial definition as (at the time of writing)
+ csbgen doesn't support multiple address fields within structure.
+ -->
+ <!-- FIXME: When csbgen supports conditional structs, make this
+ conditional on NUM_TA > 0.
+ -->
+ <struct name="TE_PSGREGION_ADDR" length="2">
+ <field name="base" start="6" end="33" shift="6" type="address"/>
+ </struct>
+
+ <!-- FIXME: This is only a partial definition as (at the time of writing)
+ csbgen doesn't support multiple address fields within structure.
+ -->
+ <struct name="TE_TPC_ADDR" length="2">
+ <field name="base" start="6" end="33" shift="6" type="address"/>
+ </struct>
+
+ <struct name="PPP_MULTISAMPLECTL" length="2">
+ <condition type="if" check="MAX_MULTISAMPLE == 8"/>
+ <field name="msaa_y7" start="60" end="63" type="uint"/>
+ <field name="msaa_x7" start="56" end="59" type="uint"/>
+ <field name="msaa_y6" start="52" end="55" type="uint"/>
+ <field name="msaa_x6" start="48" end="51" type="uint"/>
+ <field name="msaa_y5" start="44" end="47" type="uint"/>
+ <field name="msaa_x5" start="40" end="43" type="uint"/>
+ <field name="msaa_y4" start="36" end="39" type="uint"/>
+ <field name="msaa_x4" start="32" end="35" type="uint"/>
+ <condition type="endif" check="MAX_MULTISAMPLE == 8"/>
+ <field name="msaa_y3" start="28" end="31" type="uint"/>
+ <field name="msaa_x3" start="24" end="27" type="uint"/>
+ <field name="msaa_y2" start="20" end="23" type="uint"/>
+ <field name="msaa_x2" start="16" end="19" type="uint"/>
+ <field name="msaa_y1" start="12" end="15" type="uint"/>
+ <field name="msaa_x1" start="8" end="11" type="uint"/>
+ <field name="msaa_y0" start="4" end="7" type="uint"/>
+ <field name="msaa_x0" start="0" end="3" type="uint"/>
+ </struct>
+
+ <struct name="PPP_CTRL" length="1">
+ <field name="vpt_scissor" start="12" end="12" type="bool"/>
+ <field name="flush_mode" start="11" end="11" type="uint"/>
+ <field name="bfcull_restrict_clip" start="10" end="10" type="bool"/>
+ <field name="fixed_point_format" start="9" end="9" type="uint"/>
+ <field name="default_point_size" start="8" end="8" type="bool"/>
+ <field name="bfcull1_disable" start="7" end="7" type="bool"/>
+ <field name="bfcull2_disable" start="6" end="6" type="bool"/>
+ <field name="fccull_disable" start="5" end="5" type="bool"/>
+ <field name="oscull_disable" start="4" end="4" type="bool"/>
+ <field name="socull_disable" start="2" end="2" type="bool"/>
+ <field name="wclampen" start="1" end="1" type="bool"/>
+ <field name="opengl" start="0" end="0" type="bool"/>
+ </struct>
+
+ <struct name="PPP_SCREEN" length="1">
+ <field name="pixymax" start="16" end="30" type="uint"/>
+ <field name="pixxmax" start="0" end="14" type="uint"/>
+ </struct>
+
+ <!-- FIXME: This is only a partial definition as (at the time of writing)
+ csbgen doesn't support multiple address fields within structure.
+ -->
+ <struct name="TA_RTC_ADDR" length="2">
+ <field name="base" start="6" end="33" shift="6" type="address"/>
+ </struct>
+
+ <struct name="TA_CONTEXT_STATE_BASE" length="2">
+ <field name="addr" start="4" end="39" shift="4" type="address"/>
+ </struct>
+
+ <struct name="ISP_RENDER" length="1">
+ <field name="disable_eomt" start="5" end="5" type="bool"/>
+ <field name="resume" start="4" end="4" type="bool"/>
+ <field name="dir_type" start="2" end="3" type="DIR_TYPE"/>
+ <field name="mode_type" start="0" end="1" type="ISP_RENDER_MODE_TYPE"/>
+ </struct>
+
+ <struct name="ISP_RENDER_ORIGIN" length="1">
+ <field name="x" start="16" end="25" type="uint"/>
+ <field name="y" start="0" end="9" type="uint"/>
+ </struct>
+
+ <struct name="ISP_MTILE_SIZE" length="1">
+ <field name="x" start="16" end="25" type="uint"/>
+ <field name="y" start="0" end="9" type="uint"/>
+ </struct>
+
+ <struct name="ISP_BGOBJDEPTH" length="1">
+ <field name="value" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="ISP_BGOBJVALS" length="1">
+ <field name="enablebgtag" start="9" end="9" type="bool"/>
+ <field name="mask" start="8" end="8" type="bool"/>
+ <field name="stencil" start="0" end="7" type="uint"/>
+ </struct>
+
+ <struct name="ISP_AA" length="1">
+ <field name="mode" start="0" end="1" type="ISP_AA_MODE_TYPE"/>
+ </struct>
+
+ <struct name="ISP_CTL" length="1">
+ <field name="skip_init_hdrs" start="31" end="31" type="bool"/>
+ <field name="line_style" start="30" end="30" type="bool"/>
+ <field name="line_style_pix" start="29" end="29" type="bool"/>
+ <field name="pair_tiles_vert" start="28" end="28" type="bool"/>
+ <field name="pair_tiles" start="27" end="27" type="bool"/>
+ <field name="creq_buf_en" start="26" end="26" type="bool"/>
+ <field name="tile_age_en" start="25" end="25" type="bool"/>
+ <field name="isp_sample_pos_mode" start="23" end="24" type="MODE_TYPE"/>
+ <field name="num_tiles_per_usc" start="21" end="22" type="uint"/>
+ <field name="dbias_is_int" start="20" end="20" type="bool"/>
+ <field name="overlap_check_mode" start="19" end="19" type="bool"/>
+ <field name="pt_upfront_depth_disable" start="18" end="18" type="bool"/>
+ <field name="process_empty_tiles" start="17" end="17" type="bool"/>
+ <field name="sample_pos" start="16" end="16" type="bool"/>
+ <field name="pipe_enable" start="12" end="15" type="PIPE_NUM"/>
+ <field name="valid_id" start="4" end="9" type="uint"/>
+ <field name="upass_start" start="0" end="3" type="uint"/>
+ </struct>
+
+ <struct name="ISP_ZLSCTL" length="2">
+ <field name="zlsextent_y_s" start="48" end="57" type="uint"/>
+ <field name="zlsextent_x_s" start="38" end="47" type="uint"/>
+ <field name="stencil_extent_enable" start="37" end="37" type="bool"/>
+ <field name="zlsextent_y_z" start="27" end="36" type="uint"/>
+ <field name="zstoreformat" start="25" end="26" type="ZSTOREFORMAT_TYPE"/>
+ <field name="zloadformat" start="23" end="24" type="ZLOADFORMAT_TYPE"/>
+ <field name="fb_storeen" start="22" end="22" type="bool"/>
+ <field name="fb_loaden" start="21" end="21" type="bool"/>
+ <field name="mstoreen" start="20" end="20" type="bool"/>
+ <field name="zstoreen" start="19" end="19" type="bool"/>
+ <field name="sstoreen" start="18" end="18" type="bool"/>
+ <field name="storetwiddled" start="17" end="17" type="bool"/>
+ <field name="mloaden" start="16" end="16" type="bool"/>
+ <field name="zloaden" start="15" end="15" type="bool"/>
+ <field name="sloaden" start="14" end="14" type="bool"/>
+ <field name="loadtwiddled" start="13" end="13" type="bool"/>
+ <field name="zlsextent_x_z" start="3" end="12" type="uint"/>
+ <field name="forcezstore" start="2" end="2" type="bool"/>
+ <field name="forcezload" start="1" end="1" type="bool"/>
+ <field name="zonlyrender" start="0" end="0" type="bool"/>
+ </struct>
+
+ <struct name="ISP_ZLOAD_BASE" length="2">
+ <field name="addr" start="4" end="39" shift="4" type="address"/>
+ </struct>
+
+ <struct name="ISP_STENCIL_LOAD_BASE" length="2">
+ <field name="addr" start="4" end="39" shift="4" type="address"/>
+ <field name="enable" start="0" end="0" type="bool"/>
+ </struct>
+
+ <struct name="ISP_SCISSOR_BASE" length="2">
+ <field name="addr" start="2" end="39" shift="2" type="address"/>
+ </struct>
+
+ <struct name="ISP_DBIAS_BASE" length="2">
+ <field name="addr" start="2" end="39" shift="2" type="address"/>
+ </struct>
+
+ <struct name="ISP_ZLS_PIXELS" length="1">
+ <field name="y" start="15" end="29" type="uint"/>
+ <field name="x" start="0" end="14" type="uint"/>
+ </struct>
+
+ <struct name="PBE_WORD0_MRT0" length="2">
+ <condition type="if" check="TFBC"/>
+ <field name="tfbc_lossy" start="62" end="63" type="TFBC_LOSSY"/>
+ <condition type="endif" check="TFBC"/>
+ <field name="x_rsrvd" start="63" end="63" type="bool"/>
+ <field name="pair_tiles" start="60" end="60" type="uint"/>
+ <field name="comp_iaddr_mode" start="60" end="60" type="COMP_IADDR_TYPE"/>
+ <field name="x_rsrvd2" start="59" end="59" type="bool"/>
+ <field name="comp_cor_enable" start="59" end="59" type="bool"/>
+ <field name="dither" start="58" end="58" type="bool"/>
+ <field name="tilerelative" start="57" end="57" type="bool"/>
+ <field name="downscale" start="56" end="56" type="bool"/>
+ <field name="size_z" start="52" end="55" type="SIZE"/>
+ <field name="rotation" start="50" end="51" type="ROTATION_TYPE"/>
+ <field name="linestride" start="34" end="49" type="uint"/>
+ <field name="memlayout" start="32" end="33" type="MEMLAYOUT"/>
+ <field name="swiz_chan3" start="29" end="31" type="SWIZ"/>
+ <field name="swiz_chan2" start="26" end="28" type="SWIZ"/>
+ <field name="swiz_chan1" start="23" end="25" type="SWIZ"/>
+ <field name="swiz_chan0" start="20" end="22" type="SWIZ"/>
+ <field name="minclip_x" start="6" end="19" type="uint"/>
+ <field name="twocomp_gamma" start="5" end="5" type="TWOCOMP_GAMMA"/>
+ <field name="gamma" start="4" end="4" type="bool"/>
+ <field name="compression" start="3" end="3" type="bool"/>
+ <field name="compress_size" start="2" end="2" type="COMPRESS_SIZE"/>
+ <field name="comp_indirect_table" start="1" end="1" type="bool"/>
+ <condition type="if" check="PBE_YFLIP"/>
+ <field name="y_flip" start="0" end="0" type="bool"/>
+ <condition type="endif" check="PBE_YFLIP"/>
+ </struct>
+
+ <struct name="FRAG_SCREEN" length="1">
+ <field name="ymax" start="16" end="30" type="uint"/>
+ <field name="xmax" start="0" end="14" type="uint"/>
+ </struct>
+
+ <struct name="TPU" length="1">
+ <condition type="if" check="PDSL0SIZE > 0"/>
+ <field name="mcu_pds_l0_off" start="8" end="8" type="bool"/>
+ <condition type="endif" check="PDSL0SIZE > 0"/>
+ <condition type="if" check="TPU_CEM_DATAMASTER_GLOBAL_REGISTERS"/>
+ <field name="tag_cem_64_face_packing" start="7" end="7" type="bool"/>
+ <condition type="endif" check="TPU_CEM_DATAMASTER_GLOBAL_REGISTERS"/>
+ <field name="tag_enable_mmu_prefetch" start="6" end="6" type="bool"/>
+ <field name="tag_cem_4k_face_packing" start="5" end="5" type="bool"/>
+ <field name="madd_config_l0off" start="4" end="4" type="bool"/>
+ <field name="tag_cem_face_packing" start="3" end="3" type="bool"/>
+ <field name="tag_cemedge_dontfilter" start="2" end="2" type="bool"/>
+ <condition type="if" check="TPU_CEM_USG_NORMALISATION"/>
+ <field name="tag_cemgrad_dontnegate" start="1" end="1" type="bool"/>
+ <condition type="endif" check="TPU_CEM_USG_NORMALISATION"/>
+ <field name="madd_config_dxt35_transovr" start="0" end="0" type="bool"/>
+ </struct>
+
+ <struct name="TPU_BORDER_COLOUR_TABLE_PDM" length="2">
+ <field name="border_colour_table_address" start="0" end="37" shift="2" type="address"/>
+ </struct>
+
+ <struct name="TPU_BORDER_COLOUR_TABLE_VDM" length="2">
+ <field name="border_colour_table_address" start="0" end="37" shift="2" type="address"/>
+ </struct>
+
+ <struct name="TPU_BORDER_COLOUR_TABLE_CDM" length="2">
+ <field name="border_colour_table_address" start="0" end="37" shift="2" type="address"/>
+ </struct>
+
+ <struct name="USC_PIXEL_OUTPUT_CTRL" length="1">
+ <field name="partition_mask" start="3" end="20" type="uint"/>
+ <field name="enable_4th_partition" start="2" end="2" type="bool"/>
+ <field name="width" start="0" end="1" type="PIXEL_WIDTH"/>
+ </struct>
+
+</csbgen>
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_HWDEFS_H
+#define ROGUE_HWDEFS_H
+
+#include "rogue_cdm.h"
+#include "rogue_cr.h"
+#include "rogue_ipf.h"
+#include "rogue_lls.h"
+#include "rogue_pbestate.h"
+#include "rogue_pds.h"
+#include "rogue_ppp.h"
+#include "rogue_texstate.h"
+#include "rogue_vdm.h"
+
+#endif /* ROGUE_HWDEFS_H */
--- /dev/null
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="IPF">
+
+ <define name="TILE_SIZE_PIXELS" value="32"/>
+
+ <struct name="SCISSOR_WORD_0" length="1">
+ <field name="scw0_xmin" start="16" end="31" type="uint"/>
+ <field name="scw0_xmax" start="0" end="15" type="uint"/>
+ </struct>
+
+ <struct name="SCISSOR_WORD_1" length="1">
+ <field name="scw1_ymin" start="16" end="31" type="uint"/>
+ <field name="scw1_ymax" start="0" end="15" type="uint"/>
+ </struct>
+
+</csbgen>
--- /dev/null
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="LLS">
+
+ <!-- Size of the CDM's context state buffer in bytes. -->
+ <define name="CDM_CONTEXT_RESUME_BUFFER_SIZE" value="72"/>
+ <define name="CDM_CONTEXT_RESUME_BUFFER_ALIGNMENT" value="16"/>
+
+ <!-- Size of the PDS's persistent-temporary register context state buffer in bytes. -->
+ <define name="PDS_PERSISTENT_TEMPS_BUFFER_ALIGNMENT" value="16"/>
+ <define name="PDS_PERSISTENT_TEMPS_BUFFER_SIZE" value="128"/>
+
+ <!-- Size of the TA's context state buffer in bytes. -->
+ <define name="TA_STATE_BUFFER_ALIGNMENT" value="16"/>
+ <define name="TA_STATE_BUFFER_SIZE" value="484"/>
+
+ <!-- Size of the USC's shared register context state buffer in bytes. -->
+ <define name="USC_SHARED_REGS_BUFFER_ALIGNMENT" value="16"/>
+ <define name="USC_SHARED_REGS_BUFFER_SIZE" value="16384"/>
+
+ <!-- Size of the VDM's context resume control stream buffer in bytes. -->
+ <define name="VDM_CONTEXT_RESUME_BUFFER_ALIGNMENT" value="16"/>
+ <define name="VDM_CONTEXT_RESUME_BUFFER_SIZE" value="92"/>
+
+
+</csbgen>
--- /dev/null
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="PBESTATE">
+
+ <enum name="COMP_IADDR_TYPE">
+ <value name="INDIRECT_1TILE" value="0"/>
+ <value name="INDIRECT_4TILE" value="1"/>
+ </enum>
+
+ <enum name="COMPRESS_SIZE">
+ <value name="BLOCK_8X8" value="0"/>
+ <value name="BLOCK_16X4" value="1"/>
+ </enum>
+
+ <enum name="COMPRESS_SIZE_EXT">
+ <value name="BLOCK_8X8_16X4" value="0"/>
+ <value name="BLOCK_32X2_RSRVD" value="1"/>
+ </enum>
+
+ <enum name="COMPRESSION">
+ <value name="DISABLED" value="0"/>
+ <value name="ENABLED" value="1"/>
+ </enum>
+
+ <enum name="LOSSY">
+ <value name="DISABLED" value="0"/>
+ <value name="ENABLED" value="1"/>
+ </enum>
+
+ <enum name="MEMLAYOUT">
+ <value name="LINEAR" value="0"/>
+ <value name="TWIDDLE_2D" value="1"/>
+ <value name="TWIDDLE_3D" value="2"/>
+ <value name="TILED" value="3"/>
+ </enum>
+
+ <enum name="PACKMODE">
+ <value name="U8U8U8U8" value="0x0"/>
+ <value name="S8S8S8S8" value="0x1"/>
+ <value name="X8U8S8S8" value="0x2"/>
+ <value name="X8S8S8U8" value="0x3"/>
+ <value name="A1R5G5B5" value="0x4"/>
+ <value name="R5G5B5A1" value="0x5"/>
+ <value name="A4R4G4B4" value="0x6"/>
+ <value name="A8R3G3B2" value="0x7"/>
+ <value name="U16U16U16U16" value="0x8"/>
+ <value name="S16S16S16S16" value="0x9"/>
+ <value name="F16F16F16F16" value="0xa"/>
+ <value name="U32U32U32U32" value="0xb"/>
+ <value name="S32S32S32S32" value="0xc"/>
+ <value name="F32F32F32F32" value="0xd"/>
+ <value name="A2R10B10G10" value="0xe"/>
+ <value name="R10B10G10A2" value="0xf"/>
+ <value name="A2F10F10F10" value="0x10"/>
+ <value name="F10F10F10A2" value="0x11"/>
+ <value name="U8U8U8" value="0x12"/>
+ <value name="S8S8S8" value="0x13"/>
+ <value name="R5G6B5" value="0x14"/>
+ <value name="R5SG5SB6" value="0x15"/>
+ <value name="B6G5SR5S" value="0x16"/>
+ <value name="U16U16U16" value="0x17"/>
+ <value name="S16S16S16" value="0x18"/>
+ <value name="F16F16F16" value="0x19"/>
+ <value name="U32U32U32" value="0x1a"/>
+ <value name="S32S32S32" value="0x1b"/>
+ <value name="F11F11F10" value="0x1c"/>
+ <value name="F10F11F11" value="0x1d"/>
+ <value name="SE9995" value="0x1e"/>
+ <value name="F32F32F32" value="0x1f"/>
+ <value name="X24U8F32" value="0x20"/>
+ <value name="X24X8F32" value="0x21"/>
+ <value name="X24G8X32" value="0x22"/>
+ <value name="U8U8" value="0x23"/>
+ <value name="S8S8" value="0x24"/>
+ <value name="U16U16" value="0x25"/>
+ <value name="S16S16" value="0x26"/>
+ <value name="F16F16" value="0x27"/>
+ <value name="U32U32" value="0x28"/>
+ <value name="S32S32" value="0x29"/>
+ <value name="F32F32" value="0x2a"/>
+ <value name="U24ST8" value="0x2b"/>
+ <value name="ST8U24" value="0x2c"/>
+ <value name="X8U24" value="0x2d"/>
+ <value name="U8X24" value="0x2e"/>
+ <value name="U8" value="0x2f"/>
+ <value name="S8" value="0x30"/>
+ <value name="U16" value="0x31"/>
+ <value name="S16" value="0x32"/>
+ <value name="F16" value="0x33"/>
+ <value name="U32" value="0x34"/>
+ <value name="S32" value="0x35"/>
+ <value name="F32" value="0x36"/>
+ <value name="PBYTE" value="0x37"/>
+ <value name="PWORD" value="0x38"/>
+ <value name="ARGBV16_XR10" value="0x39"/>
+ <value name="A2_XRBIAS_U10U10U10" value="0x3a"/>
+ <value name="YUV" value="0x3b"/>
+ <value name="U10U10U10_XRBIAS_A2" value="0x3c"/>
+ <value name="INVALID" value="0xFFFFFFFF"/>
+ </enum>
+
+ <enum name="PAIR_TILES">
+ <value name="DISABLED" value="0"/>
+ <value name="ENABLED" value="1"/>
+ </enum>
+
+ <enum name="REG_WORD0_LINESTRIDE">
+ <value name="ALIGNSHIFT" value="1"/>
+ <value name="ALIGNSIZE" value="2"/>
+ <value name="ALIGNSHIFT_PBE_STRIDE_ALIGN_1PIXEL_ENABLED" value="0"/>
+ <value name="ALIGNSIZE_PBE_STRIDE_ALIGN_1PIXEL_ENABLED" value="1"/>
+ </enum>
+
+ <enum name="ROTATION_TYPE">
+ <value name="0_DEG" value="0"/>
+ <value name="90_DEG" value="1"/>
+ <value name="180_DEG" value="2"/>
+ <value name="270_DEG" value="3"/>
+ </enum>
+
+ <enum name="SIZE">
+ <value name="1_PIXEL" value="0"/>
+ <value name="2_PIXEL" value="1"/>
+ <value name="4_PIXEL" value="2"/>
+ <value name="8_PIXEL" value="3"/>
+ <value name="16_PIXEL" value="4"/>
+ <value name="32_PIXEL" value="5"/>
+ <value name="64_PIXEL" value="6"/>
+ <value name="128_PIXEL" value="7"/>
+ <value name="256_PIXEL" value="8"/>
+ <value name="512_PIXEL" value="9"/>
+ <value name="1K_PIXEL" value="10"/>
+ <value name="2K_PIXEL" value="11"/>
+ <value name="4K_PIXEL" value="12"/>
+ <value name="8K_PIXEL" value="13"/>
+ <value name="16K_PIXEL" value="14"/>
+ </enum>
+
+ <enum name="SOURCE_FORMAT">
+ <value name="F16_PER_CHANNEL" value="0"/>
+ <value name="8_PER_CHANNEL" value="1"/>
+ </enum>
+
+ <enum name="SOURCE_POS">
+ <value name="START_BIT0" value="0"/>
+ <value name="START_BIT32" value="1"/>
+ <value name="START_BIT64" value="2"/>
+ <value name="START_BIT96" value="3"/>
+ </enum>
+
+ <enum name="SWIZ">
+ <value name="SOURCE_CHAN0" value="0"/>
+ <value name="SOURCE_CHAN1" value="1"/>
+ <value name="SOURCE_CHAN2" value="2"/>
+ <value name="SOURCE_CHAN3" value="3"/>
+ <value name="ONE" value="4"/>
+ <value name="ZERO" value="5"/>
+ </enum>
+
+ <enum name="TFBC_LOSSY">
+ <value name="LOSSLESS" value="0"/>
+ <value name="LOSSY75" value="1"/>
+ <value name="LOSSY50" value="2"/>
+ <value name="LOSSY25" value="3"/>
+ </enum>
+
+ <enum name="TWOCOMP_GAMMA">
+ <value name="GAMMA_BOTTOM_CHANNEL" value="0"/>
+ <value name="GAMMA_BOTH_CHANNELS" value="1"/>
+ </enum>
+
+ <enum name="Y_FLIP">
+ <value name="DISABLED" value="0"/>
+ <value name="ENABLED" value="1"/>
+ </enum>
+
+ <enum name="YUV_DOWNSCALE">
+ <value name="NO_DOWNSCALING" value="0"/>
+ <value name="ONE_SAMPLE_LEFT" value="1"/>
+ <value name="EMPTY" value="2"/>
+ <value name="TWO_SAMPLES" value="3"/>
+ <value name="FOUR_SAMPLES" value="4"/>
+ <value name="ONE_SAMPLE_RIGHT" value="5"/>
+ </enum>
+
+ <enum name="YUV_PMODE">
+ <value name="UV8_420_2PLANE" value="0"/>
+ <value name="U8_420_3PLANE" value="1"/>
+ <value name="V8_420_3PLANE" value="2"/>
+ <value name="YUV8_422_1PLANE" value="3"/>
+ <value name="UV8_422_2PLANE" value="4"/>
+ <value name="UV8_444_2PLANE" value="5"/>
+ <value name="Y8_23PLANE" value="6"/>
+ <value name="U8_444_3PLANE" value="7"/>
+ <value name="V8_444_3PLANE" value="8"/>
+ <value name="YUV10_444_1PLANE" value="9"/>
+ <value name="RESERVED_YUV1" value="10"/>
+ <value name="RESERVED_YUV2" value="11"/>
+ <value name="YUV10_422_1PLANE" value="12"/>
+ <value name="UV10_420_2PLANE" value="13"/>
+ <value name="Y10_23PLANE" value="14"/>
+ <value name="UV16_420_2PLANE" value="15"/>
+ <value name="UV16_422_2PLANE" value="16"/>
+ <value name="UV16_444_2PLANE" value="17"/>
+ <value name="Y16_23PLANE" value="18"/>
+ <value name="U16_444_3PLANE" value="19"/>
+ <value name="V16_444_3PLANE" value="20"/>
+ </enum>
+
+ <struct name="STATE_WORD0" length="1">
+ <field name="address_low" start="0" end="31" shift="2" type="address"/>
+ </struct>
+
+ <struct name="STATE_WORD1" length="1">
+ <condition type="if" check="8_OUTPUT_REGISTERS"/>
+ <field name="source_pos_offset_128" start="28" end="28" type="bool"/>
+ <condition type="endif" check="8_OUTPUT_REGISTERS"/>
+ <field name="yuv_pmode" start="23" end="27" type="YUV_PMODE"/>
+ <field name="yuv_downscale" start="20" end="22" type="YUV_DOWNSCALE"/>
+ <field name="source_format" start="19" end="19" type="SOURCE_FORMAT"/>
+ <field name="mrt_index" start="16" end="18" type="uint"/>
+ <field name="source_pos" start="14" end="15" type="SOURCE_POS"/>
+ <field name="norm" start="13" end="13" type="bool"/>
+ <field name="packmode" start="7" end="12" type="PACKMODE"/>
+ <field name="emptytile" start="6" end="6" type="bool"/>
+ <field name="address_high" start="0" end="5" shift="34" type="address"/>
+ </struct>
+
+ <struct name="REG_WORD0" length="2">
+ <field name="tfbc_lossy" start="62" end="63" type="TFBC_LOSSY">
+ <define name="LOSSY37_75_TFBC_LOSSY_37_PERCENT_ENABLED" value="1"/>
+ </field>
+ <field name="lossy" start="62" end="62" type="LOSSY"/>
+ <field name="compress_size_ext" start="61" end="61" type="COMPRESS_SIZE_EXT"/>
+ <field name="comp_iaddr_mode" start="60" end="60" type="COMP_IADDR_TYPE"/>
+ <field name="comp_cor_enable" start="59" end="59" type="bool"/>
+ <field name="dither" start="58" end="58" type="bool"/>
+ <field name="tilerelative" start="57" end="57" type="bool"/>
+ <field name="downscale" start="56" end="56" type="bool"/>
+ <field name="size_z" start="52" end="55" type="SIZE"/>
+ <field name="rotation" start="50" end="51" type="ROTATION_TYPE"/>
+ <field name="linestride" start="34" end="49" type="uint">
+ <define name="UNIT_SIZE" value="2"/>
+ </field>
+ <field name="memlayout" start="32" end="33" type="MEMLAYOUT"/>
+ <field name="swiz_chan3" start="29" end="31" type="SWIZ"/>
+ <field name="swiz_chan2" start="26" end="28" type="SWIZ"/>
+ <field name="swiz_chan1" start="23" end="25" type="SWIZ"/>
+ <field name="swiz_chan0" start="20" end="22" type="SWIZ"/>
+ <field name="minclip_x" start="6" end="19" type="uint"/>
+ <field name="twocomp_gamma" start="5" end="5" type="TWOCOMP_GAMMA"/>
+ <field name="gamma" start="4" end="4" type="bool"/>
+ <field name="compression" start="3" end="3" type="COMPRESSION"/>
+ <field name="compress_size" start="2" end="2" type="COMPRESS_SIZE"/>
+ <field name="comp_indirect_table" start="1" end="1" type="bool"/>
+ <field name="y_flip" start="0" end="0" type="Y_FLIP"/>
+ </struct>
+
+ <struct name="REG_WORD1" length="2">
+ <field name="size_x" start="60" end="63" type="SIZE"/>
+ <field name="minclip_y" start="46" end="59" type="uint"/>
+ <field name="maxclip_x" start="32" end="45" type="uint"/>
+ <field name="size_y" start="28" end="31" type="SIZE"/>
+ <field name="zslice" start="14" end="27" type="uint"/>
+ <field name="maxclip_y" start="0" end="13" type="uint"/>
+ </struct>
+
+ <struct name="REG_WORD2" length="2">
+ <field name="pair_tiles" start="46" end="46" type="PAIR_TILES">
+ <!-- TODO: Do we need this? -->
+ <define name="SHIFT" value="46"/>
+ </field>
+ <field name="surface_y_size" start="32" end="45" type="uint"/>
+ <field name="sw_bytemask" start="0" end="31" type="uint"/>
+ </struct>
+
+</csbgen>
--- /dev/null
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="PDSINST">
+
+ <enum name="CMODE_LD">
+ <value name="CACHED" value="0"/>
+ <value name="BYPASS" value="1"/>
+ <value name="FORCE_LINE_FILL" value="2"/>
+ </enum>
+
+ <enum name="DOUTD_DEST">
+ <value name="UNIFIED_STORE" value="0"/>
+ <value name="COMMON_STORE" value="1"/>
+ </enum>
+
+ <enum name="DOUTI_SHADEMODEL">
+ <value name="FLAT_VERTEX0" value="0"/>
+ <value name="FLAT_VERTEX1" value="1"/>
+ <value name="FLAT_VERTEX2" value="2"/>
+ <value name="GOURUAD" value="3"/>
+ </enum>
+
+ <enum name="DOUTI_SIZE">
+ <value name="1D" value="0"/>
+ <value name="2D" value="1"/>
+ <value name="3D" value="2"/>
+ <value name="4D" value="3"/>
+ </enum>
+
+ <enum name="DOUTU_SAMPLE_RATE">
+ <value name="INSTANCE" value="0"/>
+ <value name="SELECTIVE" value="1"/>
+ <value name="FULL" value="2"/>
+ </enum>
+
+ <enum name="SLC_MODE_LD">
+ <value name="BYPASS" value="0"/>
+ <value name="CACHED" value="1"/>
+ <value name="CACHED_RD_NA" value="3"/>
+ </enum>
+
+ <enum name="WORDSIZE">
+ <value name="ONE" value="0"/>
+ <value name="TWO" value="1"/>
+ <value name="THREE" value="2"/>
+ <value name="FOUR" value="3"/>
+ </enum>
+
+ <struct name="DOUTU_SRC0" length="2">
+ <field name="dual_phase" start="41" end ="41" type="bool"/>
+ <field name="temps" start="35" end="40" type="uint">
+ <define name="UNIT_SIZE" value="2"/>
+ </field>
+ <field name="sample_rate" start="33" end="34" type="DOUTU_SAMPLE_RATE"/>
+ <field name="exe_off" start="2" end="31" shift="2" type="address"/>
+ </struct>
+
+ <struct name="DOUT_FIELDS_DOUTD_SRC0" length="2">
+ <condition type="if" check="SLC_MCU_CACHE_CONTROLS"/>
+ <field name="slcmode" start="60" end="61" type="SLC_MODE_LD"/>
+ <condition type="endif" check="SLC_MCU_CACHE_CONTROLS"/>
+ <field name="doffset" start="40" end="52" type="uint"/>
+ <field name="sbase" start="0" end="39" shift="0" type="address"/>
+ </struct>
+
+ <struct name="DOUT_FIELDS_DOUTD_SRC1" length="1">
+ <field name="last" start="31" end="31" type="bool"/>
+ <field name="wordsize" start="29" end="30" type="WORDSIZE"/>
+ <field name="dest" start="28" end="28" type="DOUTD_DEST"/>
+ <field name="cmode" start="26" end="27" type="CMODE_LD"/>
+ <field name="a0" start="13" end="25" type="uint">
+ <define name="UNIT_SIZE" value="4"/>
+ </field>
+ <field name="repeat" start="12" end="12" type="bool"/>
+ <field name="bsize" start="0" end="11" type="uint"/>
+ </struct>
+
+ <struct name="DOUT_FIELDS_DOUTI_SRC" length="1">
+ <field name="depthbias" start="27" end="27" type="bool"/>
+ <field name="primitiveid" start="26" end="26" type="bool"/>
+ <field name="shademodel" start="24" end="25" type="DOUTI_SHADEMODEL"/>
+ <field name="pointsprite" start="23" end="23" type="bool"/>
+ <field name="wraps" start="22" end="22" type="bool"/>
+ <field name="wrapv" start="21" end="21" type="bool"/>
+ <field name="wrapu" start="20" end="20" type="bool"/>
+ <field name="size" start="18" end="19" type="DOUTI_SIZE"/>
+ <field name="f16" start="17" end="17" type="bool"/>
+ <field name="perspective" start="16" end="16" type="bool"/>
+ <field name="f32_offset" start="8" end="15" type="uint"/>
+ <field name="f16_offset" start="0" end="7" type="uint"/>
+ </struct>
+
+</csbgen>
--- /dev/null
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="TA">
+
+ <enum name="CLIP_MODE">
+ <value name="NO_FRONT_OR_REAR" value="0"/>
+ <value name="FRONT_REAR" value="1"/>
+ <value name="FRONT_REAR_DEPTH" value="2"/>
+ </enum>
+
+ <enum name="CMPMODE">
+ <value name="NEVER" value="0"/>
+ <value name="LESS" value="1"/>
+ <value name="EQUAL" value="2"/>
+ <value name="LESS_OR_EQUAL" value="3"/>
+ <value name="GREATER" value="4"/>
+ <value name="NOT_EQUAL" value="5"/>
+ <value name="GREATER_OR_EQUAL" value="6"/>
+ <value name="ALWAYS" value="7"/>
+ </enum>
+
+ <enum name="CULLMODE">
+ <value name="NO_CULLING" value="0"/>
+ <value name="CULL_CW" value="1"/>
+ <value name="CULL_CCW" value="2"/>
+ </enum>
+
+ <enum name="FLATSHADE">
+ <value name="VTX_VERTEX_0" value="1"/>
+ <value name="VTX_VERTEX_1" value="2"/>
+ <value name="VTX_VERTEX_2" value="3"/>
+ </enum>
+
+ <enum name="GS_OUTPUT_TOPOLOGY">
+ <value name="POINT_LIST" value="0"/>
+ <value name="LINE_STRIP" value="1"/>
+ <value name="TRI_STRIP" value="2"/>
+ </enum>
+
+ <enum name="ISPB_STENCILOP">
+ <value name="KEEP" value="0"/>
+ <value name="ZERO" value="1"/>
+ <value name="REPLACE" value="2"/>
+ <value name="INCREMENT_SATURATE" value="3"/>
+ <value name="DECREMENT_SATURATE" value="4"/>
+ <value name="INVERT" value="5"/>
+ <value name="INCREMENT" value="6"/>
+ <value name="DECREMENT" value="7"/>
+ </enum>
+
+ <enum name="OBJTYPE">
+ <value name="TRIANGLE" value="0"/>
+ <value name="LINE" value="1"/>
+ <value name="SPRITE_10UV" value="2"/>
+ <value name="SPRITE_UV" value="3"/>
+ <value name="SPRITE_01UV" value="4"/>
+ <value name="LINE_FILLED_TRIANGLE" value="5"/>
+ <value name="POINT_FILLED_TRIANGLE" value="6"/>
+ <value name="TESSELLATED_OBJECT_NO_GS" value="7"/>
+ <value name="TESSELLATED_OBJECT_WITH_GS" value="8"/>
+ </enum>
+
+ <!--
+ TODO: Add support for "ifs" in csbgen root element.
+ -->
+ <enum name="PASSTYPE">
+ <value name="OPAQUE" value="0"/>
+ <value name="TRANSLUCENT" value="1"/>
+ <value name="PUNCH_THROUGH" value="2"/>
+ <value name="VIEWPORT_OBJECT" value="3"/>
+ <value name="FAST_PUNCH_THROUGH" value="4"/>
+ <value name="DEPTH_FEEDBACK" value="5"/>
+ <value name="ANTI_ALIASED" value="6"/>
+ </enum>
+
+ <enum name="REGION_CLIP_MODE">
+ <value name="NONE" value="0"/>
+ <value name="OUTSIDE" value="1"/>
+ </enum>
+
+ <struct name="STATE_HEADER" length="1">
+ <field name="not_final_term" start="26" end="26" type="bool"/>
+ <field name="pres_terminate" start="25" end="25" type="bool"/>
+ <field name="context_switch" start="24" end="24" type="bool"/>
+ <field name="pres_stream_out_program" start="23" end="23" type="bool"/>
+ <field name="pres_stream_out_size" start="22" end="22" type="bool"/>
+ <field name="pres_ppp_ctrl" start="21" end="21" type="bool"/>
+ <field name="pres_varying_word2" start="20" end="20" type="bool"/>
+ <field name="pres_varying_word1" start="19" end="19" type="bool"/>
+ <field name="pres_varying_word0" start="18" end="18" type="bool"/>
+ <field name="pres_outselects" start="17" end="17" type="bool"/>
+ <field name="pres_wclamp" start="16" end="16" type="bool"/>
+ <field name="view_port_count" start="12" end="15" type="uint"/>
+ <field name="pres_viewport" start="11" end="11" type="bool"/>
+ <field name="pres_region_clip" start="10" end="10" type="bool"/>
+ <field name="pres_pds_state_ptr3" start="9" end="9" type="bool"/>
+ <field name="pres_pds_state_ptr2" start="8" end="8" type="bool"/>
+ <field name="pres_pds_state_ptr1" start="7" end="7" type="bool"/>
+ <field name="pres_pds_state_ptr0" start="6" end="6" type="bool"/>
+ <field name="pres_ispctl_dbsc" start="5" end="5" type="bool"/>
+ <field name="pres_ispctl_bb" start="4" end="4" type="bool"/>
+ <field name="pres_ispctl_ba" start="3" end="3" type="bool"/>
+ <field name="pres_ispctl_fb" start="2" end="2" type="bool"/>
+ <field name="pres_ispctl_fa" start="1" end="1" type="bool"/>
+ <field name="pres_ispctl" start="0" end="0" type="bool"/>
+ </struct>
+
+ <struct name="STATE_ISPCTL" length="1">
+ <field name="validid" start="26" end="31" type="uint"/>
+ <field name="upass" start="22" end="25" type="uint"/>
+ <field name="tagwritedisable" start="21" end="21" type="bool"/>
+ <field name="ovgmtestdisable" start="20" end="20" type="bool"/>
+ <field name="two_sided" start="19" end="19" type="bool"/>
+ <field name="bpres" start="18" end="18" type="bool"/>
+ <field name="dbenable" start="17" end="17" type="bool"/>
+ <field name="scenable" start="16" end="16" type="bool"/>
+ <field name="vistest" start="15" end="15" type="bool"/>
+ <field name="visbool" start="14" end="14" type="bool"/>
+ <field name="visreg" start="0" end="13" type="uint"/>
+ </struct>
+
+ <struct name="STATE_ISPA" length="1">
+ <field name="objtype" start="28" end="31" type="OBJTYPE"/>
+ <field name="passtype" start="24" end="26" type="PASSTYPE"/>
+ <field name="ovgvispassmaskop" start="23" end="23" type="bool"/>
+ <field name="maskval" start="22" end="22" type="bool"/>
+ <field name="dwritedisable" start="21" end="21" type="bool"/>
+ <field name="dfbztestenable" start="20" end="20" type="bool"/>
+ <field name="dcmpmode" start="17" end="19" type="CMPMODE"/>
+ <field name="linefilllastpixel" start="16" end="16" type="bool"/>
+ <field name="pointlinewidth" start="8" end="15" type="uint">
+ <define name="SIZE_MAX" value="255"/>
+ </field>
+ <field name="sref" start="0" end="7" type="uint"/>
+ </struct>
+
+ <struct name="STATE_ISPB" length="1">
+ <field name="scmpmode" start="25" end="27" type="CMPMODE"/>
+ <field name="sop1" start="22" end="24" type="ISPB_STENCILOP"/>
+ <field name="sop2" start="19" end="21" type="ISPB_STENCILOP"/>
+ <field name="sop3" start="16" end="18" type="ISPB_STENCILOP"/>
+ <field name="scmpmask" start="8" end="15" type="uint"/>
+ <field name="swmask" start="0" end="7" type="uint"/>
+ </struct>
+
+ <struct name="REGION_CLIP0" length="1">
+ <field name="mode" start="31" end="31" type="REGION_CLIP_MODE"/>
+ <field name="left" start="16" end="24" type="uint"/>
+ <field name="right" start="0" end="8" type="uint"/>
+ </struct>
+
+ <struct name="REGION_CLIP1" length="1">
+ <field name="top" start="16" end="24" type="uint"/>
+ <field name="bottom" start="0" end="8" type="uint"/>
+ </struct>
+
+ <struct name="STATE_ISPDBSC" length="1">
+ <field name="dbindex" start="16" end="31" type="uint"/>
+ <field name="scindex" start="0" end="15" type="uint"/>
+ </struct>
+
+ <struct name="OUTPUT_SEL" length="1">
+ <field name="vtxsize" start="24" end="31" type="uint"/>
+ <field name="tsp_unclamped_z_pres" start="21" end="21" type="bool"/>
+ <field name="render_tgt_pres" start="20" end="20" type="bool"/>
+ <field name="vpt_tgt_pres" start="19" end="19" type="bool"/>
+ <field name="psprite_size_pres" start="18" end="18" type="bool"/>
+ <field name="isp_position_depth_clamp_z" start="17" end="17" type="bool"/>
+ <field name="rhw_pres" start="16" end="16" type="bool"/>
+ <field name="cullplane7" start="15" end="15" type="bool"/>
+ <field name="cullplane6" start="14" end="14" type="bool"/>
+ <field name="cullplane5" start="13" end="13" type="bool"/>
+ <field name="cullplane4" start="12" end="12" type="bool"/>
+ <field name="cullplane3" start="11" end="11" type="bool"/>
+ <field name="cullplane2" start="10" end="10" type="bool"/>
+ <field name="cullplane1" start="9" end="9" type="bool"/>
+ <field name="cullplane0" start="8" end="8" type="bool"/>
+ <field name="plane7" start="7" end="7" type="bool"/>
+ <field name="plane6" start="6" end="6" type="bool"/>
+ <field name="plane5" start="5" end="5" type="bool"/>
+ <field name="plane4" start="4" end="4" type="bool"/>
+ <field name="plane3" start="3" end="3" type="bool"/>
+ <field name="plane2" start="2" end="2" type="bool"/>
+ <field name="plane1" start="1" end="1" type="bool"/>
+ <field name="plane0" start="0" end="0" type="bool"/>
+ </struct>
+
+ <struct name="STATE_VARYING0" length="1">
+ <condition type="if" check="TEXTURE_WRAP_VARYING"/>
+ <field name="f32_linear_wrap" start="24" end="31" type="uint"/>
+ <condition type="endif" check="TEXTURE_WRAP_VARYING"/>
+ <field name="f32_npc" start="16" end="23" type="uint"/>
+ <field name="f32_flat" start="8" end="15" type="uint"/>
+ <field name="f32_linear" start="0" end="7" type="uint"/>
+ </struct>
+
+ <struct name="STATE_VARYING1" length="1">
+ <field name="f16_npc" start="24" end="31" type="uint"/>
+ <field name="f16_flat" start="16" end="23" type="uint"/>
+ <field name="f16_linear" start="8" end="15" type="uint"/>
+ <condition type="if" check="TEXTURE_WRAP_VARYING"/>
+ <field name="f32_npc_wrap" start="0" end=" 7" type="uint"/>
+ <condition type="endif" check="TEXTURE_WRAP_VARYING"/>
+ </struct>
+
+ <struct name="STATE_TERMINATE0" length="1">
+ <field name="clip_right" start="18" end="26" type="uint">
+ <define name="BLOCK_SIZE_IN_PIXELS" value="32"/>
+ </field>
+ <field name="clip_top" start="9" end="17" type="uint">
+ <define name="BLOCK_SIZE_IN_PIXELS" value="32"/>
+ </field>
+ <field name="clip_bottom" start="0" end="8" type="uint">
+ <define name="BLOCK_SIZE_IN_PIXELS" value="32"/>
+ </field>
+ </struct>
+
+ <struct name="STATE_TERMINATE1" length="1">
+ <field name="clip_left" start="23" end="31" type="uint">
+ <define name="BLOCK_SIZE_IN_PIXELS" value="32"/>
+ </field>
+ <field name="render_target" start="0" end="10" type="uint"/>
+ </struct>
+
+ <struct name="STATE_STREAM_OUT1" length="1">
+ <field name="sync" start="10" end="10" type="bool"/>
+ <field name="pds_data_size" start="4" end="9" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ <field name="pds_temp_size" start="0" end="3" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ </struct>
+
+ <struct name="STATE_STREAM_OUT2" length="1">
+ <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+ <field name="pds_data_addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="STATE_PDS_SHADERBASE" length="1">
+ <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+ <field name="addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="STATE_PDS_TEXUNICODEBASE" length="1">
+ <field name="addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="STATE_PDS_VARYINGBASE" length="1">
+ <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+ <field name="addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="STATE_PDS_TEXTUREDATABASE" length="1">
+ <field name="addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="STATE_PDS_UNIFORMDATABASE" length="1">
+ <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+ <field name="addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="STATE_PDS_SIZEINFO1" length="1">
+ <field name="pds_uniformsize" start="23" end="31" type="uint">
+ <define name="UNIT_SIZE" value="4"/>
+ </field>
+ <field name="pds_texturestatesize" start="16" end="22" type="uint">
+ <define name="UNIT_SIZE" value="4"/>
+ </field>
+ <field name="pds_varyingsize" start="10" end="15" type="uint">
+ <define name="UNIT_SIZE" value="4"/>
+ </field>
+ <field name="usc_varyingsize" start="4" end="9" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ <define name="MAX_SIZE" value="63"/>
+ </field>
+ <field name="pds_tempsize" start="0" end="3" type="uint">
+ <define name="UNIT_SIZE" value="4"/>
+ </field>
+ </struct>
+
+ <struct name="STATE_PDS_SIZEINFO2" length="1">
+ <field name="usc_sharedsize" start="23" end="31" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ <field name="pds_tri_merge_disable" start="14" end="14" type="bool"/>
+ <field name="pds_batchnum" start="0" end="13" type="uint"/>
+ </struct>
+
+ <struct name="STATE_PPP_CTRL" length="1">
+ <field name="trp" start="16" end="16" type="bool"/>
+ <field name="prim_msaa" start="15" end="15" type="bool"/>
+ <field name="gs_output_topology" start="13" end="14" type="GS_OUTPUT_TOPOLOGY"/>
+ <field name="pres_prim_id" start="12" end="12" type="bool"/>
+ <field name="clip_mode" start="10" end="11" type="CLIP_MODE"/>
+ <field name="drawclippededges" start="9" end="9" type="bool"/>
+ <field name="flatshade_vtx" start="7" end="8" type="FLATSHADE"/>
+ <field name="pretransform" start="6" end="6" type="bool"/>
+ <field name="wclampen" start="5" end="5" type="bool"/>
+ <field name="wbuffen" start="4" end="4" type="bool"/>
+ <field name="resetbbox" start="3" end="3" type="bool"/>
+ <field name="updatebbox" start="2" end="2" type="bool"/>
+ <field name="cullmode" start="0" end="1" type="CULLMODE"/>
+ </struct>
+
+</csbgen>
--- /dev/null
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="TEXSTATE">
+
+ <enum name="ADDRMODE">
+ <value name="REPEAT" value="0"/>
+ <value name="FLIP" value="1"/>
+ <value name="CLAMP_TO_EDGE" value="2"/>
+ <value name="FLIP_ONCE_THEN_CLAMP" value="3"/>
+ <value name="CLAMP_TO_BORDER" value="4"/>
+ <value name="OGL_CLAMP" value="5"/>
+ </enum>
+
+ <enum name="ANISOCTL">
+ <value name="DISABLED" value="0"/>
+ <value name="X2" value="1"/>
+ <value name="X4" value="2"/>
+ <value name="X8" value="3"/>
+ <value name="X16" value="4"/>
+ </enum>
+
+ <enum name="CLAMP">
+ <value name="MIN" value="0"/>
+ <value name="MAX" value="959"/>
+ <value name="INTEGER_BITS" value="4"/>
+ <value name="FRACTIONAL_BITS" value="6"/>
+ </enum>
+
+ <enum name="CMP_MODE">
+ <value name="NEVER" value="0"/>
+ <value name="LESS" value="1"/>
+ <value name="EQUAL" value="2"/>
+ <value name="LESSEQUAL" value="3"/>
+ <value name="GREATER" value="4"/>
+ <value name="NOTEQUAL" value="5"/>
+ <value name="GREATEREQUAL" value="6"/>
+ <value name="ALWAYS" value="7"/>
+ </enum>
+
+ <enum name="COMPRESSION_LEVEL">
+ <value name="LOSSLESS" value="0"/>
+ <value name="LOSSY_75" value="1"/>
+ <value name="LOSSY_50" value="2"/>
+ <value name="LOSSY_25" value="3"/>
+ </enum>
+
+ <enum name="COMPRESSION_MODE">
+ <value name="NONE" value="0"/>
+ <value name="TPU" value="1"/>
+ <value name="FB_DIRECT_8X8" value="2"/>
+ <value name="FB_DIRECT_16X4" value="3"/>
+ <value name="FB_INDIRECT_1TILE_8X8" value="4"/>
+ <value name="FB_INDIRECT_1TILE_16X4" value="5"/>
+ <value name="FB_INDIRECT_4TILE_8X8" value="6"/>
+ <value name="FB_INDIRECT_4TILE_16X4" value="7"/>
+ </enum>
+
+ <enum name="DADJUST">
+ <value name="MIN_UINT" value="0"/>
+ <value name="ZERO_UINT" value="4095"/>
+ <value name="MAX_UINT" value="8191"/>
+ <value name="INTEGER_BITS" value="5"/>
+ <value name="FRACTIONAL_BITS" value="8"/>
+ </enum>
+
+ <enum name="FILTER">
+ <value name="POINT" value="0"/>
+ <value name="LINEAR" value="1"/>
+ <value name="BICUBIC" value="2"/>
+ </enum>
+
+ <enum name="FORMAT">
+ <value name="U8" value="0"/>
+ <value name="S8" value="1"/>
+ <value name="A4R4G4B4" value="2"/>
+ <value name="A8R3G3B2" value="3"/>
+ <value name="A1R5G5B5" value="4"/>
+ <value name="R5G6B5" value="5"/>
+ <value name="R5sG5sB6" value="6"/>
+ <value name="U8U8" value="7"/>
+ <value name="S8S8" value="8"/>
+ <value name="U16" value="9"/>
+ <value name="S16" value="10"/>
+ <value name="F16" value="11"/>
+ <value name="U8U8U8U8" value="12"/>
+ <value name="S8S8S8S8" value="13"/>
+ <value name="A2R10B10G10" value="14"/>
+ <value name="U16U16" value="15"/>
+ <value name="S16S16" value="16"/>
+ <value name="F16F16" value="17"/>
+ <value name="F32" value="18"/>
+ <value name="F32_SIGNMASK" value="19"/>
+ <value name="X8U8S8S8" value="20"/>
+ <value name="X8U24" value="21"/>
+ <value name="ST8U24" value="22"/>
+ <value name="U8X24" value="23"/>
+ <value name="U32" value="24"/>
+ <value name="S32" value="25"/>
+ <value name="SE9995" value="26"/>
+ <value name="F11F11F10" value="27"/>
+ <value name="F16F16F16F16" value="28"/>
+ <value name="U16U16U16U16" value="29"/>
+ <value name="S16S16S16S16" value="30"/>
+ <value name="F16F16F16" value="31"/>
+ <value name="U16U16U16" value="32"/>
+ <value name="S16S16S16" value="33"/>
+ <value name="F32F32" value="34"/>
+ <value name="U32U32" value="35"/>
+ <value name="S32S32" value="36"/>
+ <value name="X24U8F32" value="37"/>
+ <value name="X24X8F32" value="38"/>
+ <value name="X24G8X32" value="39"/>
+ <value name="YUV420_2PLANE" value="54"/>
+ <value name="YVU420_2PLANE" value="55"/>
+ <value name="YUV420_3PLANE" value="56"/>
+ <value name="YVU420_3PLANE" value="57"/>
+ <value name="U8U8U8" value="58"/>
+ <value name="S8S8S8" value="59"/>
+ <value name="A2F10F10F10" value="60"/>
+ <value name="F32F32F32F32" value="61"/>
+ <value name="U32U32U32U32" value="62"/>
+ <value name="S32S32S32S32" value="63"/>
+ <value name="F32F32F32" value="64"/>
+ <value name="U32U32U32" value="65"/>
+ <value name="S32S32S32" value="66"/>
+ <value name="A2_XRBIAS_U10U10U10" value="67"/>
+ <value name="O8" value="81"/>
+ <value name="O8O8" value="82"/>
+ <value name="R5G5B5A1" value="83"/>
+ <value name="B6G5sR5s" value="84"/>
+ <value name="R10B10G10A2" value="85"/>
+ <value name="X8S8S8U8" value="86"/>
+ <value name="U24ST8" value="87"/>
+ <value name="F10F11F11" value="88"/>
+ <value name="VYUY" value="89"/>
+ <value name="UYVY" value="90"/>
+ <value name="YVYU" value="91"/>
+ <value name="YUYV" value="92"/>
+ <value name="F10F10F10A2" value="93"/>
+ <value name="YUV420_2PLANE_MACRO_BLOCK" value="94"/>
+ <value name="YVU420_2PLANE_MACRO_BLOCK" value="95"/>
+ <value name="ARGBV16_XR10" value="96"/>
+ <value name="YVU8_422_2PLANE_PACK8" value="97"/>
+ <value name="YVU8_444_2PLANE_PACK8" value="98"/>
+ <value name="YVU10_444_1PLANE_PACK10" value="99"/>
+ <value name="YVU10_422_2PLANE_PACK16" value="100"/>
+ <value name="YVU10_420_2PLANE_PACK16" value="101"/>
+ <value name="YVU10_444_2PLANE_PACK16" value="102"/>
+ <value name="YUV8_422_2PLANE_PACK8" value="103"/>
+ <value name="YUV8_444_3PLANE_PACK8" value="104"/>
+ <value name="YUV10_444_3PLANE_PACK16" value="105"/>
+ <value name="YVU10_420_2PLANE_PACK10" value="106"/>
+ <value name="YUV10_420_2PLANE_PACK10" value="107"/>
+ <value name="YVU10_422_2PLANE_PACK10" value="108"/>
+ <value name="YUV10_422_2PLANE_PACK10" value="109"/>
+ <value name="YUV10_444_3PLANE_PACK10" value="110"/>
+ <value name="R8G8_B8G8" value="111"/>
+ <value name="G8R8_G8B8" value="112"/>
+ <value name="YVYU_IMPLIED_CSC" value="113"/>
+ <value name="VYUY_IMPLIED_CSC" value="114"/>
+ <value name="YUV10_420_2PLANE_PACK10_R" value="115"/>
+ <value name="YVU10_420_2PLANE_PACK10_R" value="116"/>
+ <value name="YUV10_420_2PLANE_PACK10_T" value="117"/>
+ <value name="YVU10_420_2PLANE_PACK10_T" value="118"/>
+ <value name="YUV8_420_2PLANE_PACK8_P" value="119"/>
+ <value name="YVU8_420_2PLANE_PACK8_P" value="120"/>
+ <value name="YUV8_420_2PLANE_PACK8_F" value="121"/>
+ <value name="YVU8_420_2PLANE_PACK8_F" value="122"/>
+ <value name="COMPRESSED_ASTC_4x4" value="0"/>
+ <value name="COMPRESSED_ASTC_5x4" value="1"/>
+ <value name="COMPRESSED_ASTC_5x5" value="2"/>
+ <value name="COMPRESSED_ASTC_6x5" value="3"/>
+ <value name="COMPRESSED_ASTC_6x6" value="4"/>
+ <value name="COMPRESSED_ASTC_8x5" value="5"/>
+ <value name="COMPRESSED_ASTC_8x6" value="6"/>
+ <value name="COMPRESSED_ASTC_8x8" value="7"/>
+ <value name="COMPRESSED_ASTC_10x5" value="8"/>
+ <value name="COMPRESSED_ASTC_10x6" value="9"/>
+ <value name="COMPRESSED_ASTC_10x8" value="10"/>
+ <value name="COMPRESSED_ASTC_10x10" value="11"/>
+ <value name="COMPRESSED_ASTC_12x10" value="12"/>
+ <value name="COMPRESSED_ASTC_12x12" value="13"/>
+ <value name="COMPRESSED_PVRT2BPP" value="40"/>
+ <value name="COMPRESSED_PVRT4BPP" value="41"/>
+ <value name="COMPRESSED_PVRTII2BPP" value="42"/>
+ <value name="COMPRESSED_PVRTII4BPP" value="43"/>
+ <value name="COMPRESSED_UBC1" value="44"/>
+ <value name="COMPRESSED_UBC2" value="45"/>
+ <value name="COMPRESSED_UBC3" value="46"/>
+ <value name="COMPRESSED_UBC4" value="47"/>
+ <value name="COMPRESSED_SBC4" value="48"/>
+ <value name="COMPRESSED_UBC5" value="49"/>
+ <value name="COMPRESSED_SBC5" value="50"/>
+ <value name="COMPRESSED_UBC6" value="51"/>
+ <value name="COMPRESSED_SBC6" value="52"/>
+ <value name="COMPRESSED_UBC7" value="53"/>
+ <value name="COMPRESSED_ETC2_RGB" value="68"/>
+ <value name="COMPRESSED_ETC2A_RGBA" value="69"/>
+ <value name="COMPRESSED_ETC2_PUNCHTHROUGHA" value="70"/>
+ <value name="COMPRESSED_EAC_R11_UNSIGNED" value="71"/>
+ <value name="COMPRESSED_EAC_R11_SIGNED" value="72"/>
+ <value name="COMPRESSED_EAC_RG11_UNSIGNED" value="73"/>
+ <value name="COMPRESSED_EAC_RG11_SIGNED" value="74"/>
+ <value name="INVALID" value="0xFFFFFFFF"/>
+ </enum>
+
+ <enum name="GAMMA">
+ <value name="OFF" value="0"/>
+ <value name="ON" value="1"/>
+ </enum>
+
+ <enum name="SWIZ">
+ <value name="SRCCHAN_0" value="0"/>
+ <value name="SRCCHAN_1" value="1"/>
+ <value name="SRCCHAN_2" value="2"/>
+ <value name="SRCCHAN_3" value="3"/>
+ <value name="SRC_ONE" value="4"/>
+ <value name="SRC_ZERO" value="5"/>
+ </enum>
+
+ <enum name="TEXTYPE">
+ <value name="1D" value="0"/>
+ <value name="2D" value="1"/>
+ <value name="3D" value="2"/>
+ <value name="CUBE" value="3"/>
+ <value name="STRIDE" value="4"/>
+ <value name="PAGETILE" value="5"/>
+ <value name="BUFFER_LOOKUP" value="6"/>
+ </enum>
+
+ <enum name="TWOCOMP_GAMMA">
+ <value name="OFF" value="0"/>
+ <value name="R" value="1"/>
+ <value name="RG" value="3"/>
+ </enum>
+
+ <struct name="IMAGE_WORD0" length="2">
+ <field name="smpcnt" start="62" end="63" type="uint"/>
+ <field name="height" start="48" end="61" type="uint"/>
+ <field name="width" start="34" end="47" type="uint"/>
+ <field name="texformat" start="27" end="33" type="FORMAT"/>
+ <field name="minlod" start="17" end="26" type="uint"/>
+ <field name="swiz0" start="14" end="16" type="SWIZ"/>
+ <field name="swiz1" start="11" end="13" type="SWIZ"/>
+ <field name="swiz2" start="8" end="10" type="SWIZ"/>
+ <field name="swiz3" start="5" end="7" type="SWIZ"/>
+ <field name="twocomp_gamma" start="3" end="4" type="TWOCOMP_GAMMA"/>
+ <field name="gamma" start="3" end="3" type="GAMMA"/>
+ <field name="textype" start="0" end="2" type="TEXTYPE"/>
+ </struct>
+
+ <struct name="IMAGE_WORD1" length="2">
+ <field name="baselevel" start="60" end="63" type="uint"/>
+ <field name="alpha_msb" start="59" end="59" type="bool"/>
+ <field name="border" start="58" end="58" type="bool"/>
+ <condition type="if" check="TPU_IMAGE_STATE_V2"/>
+ <condition type="if" check="TFBC"/>
+ <field name="lossy_compression_mode" start="56" end="57" type="COMPRESSION_LEVEL"/>
+ <condition type="endif" check="TFBC"/>
+ <field name="tpu_image_state_v2_compression_mode" start="54" end="56" type="COMPRESSION_MODE"/>
+ <condition type="else" check="TPU_IMAGE_STATE_V2"/>
+ <field name="tile_size" start="57" end="57" type="bool"/>
+ <field name="index_lookup" start="56" end="56" type="bool"/>
+ <field name="frame_buffer_compression" start="55" end="55" type="bool"/>
+ <field name="frame_buffer_compression_addressing_mode" start="54" end="54" type="bool"/>
+ <condition type="endif" check="TPU_IMAGE_STATE_V2"/>
+ <field name="texaddr" start="16" end="53" shift="2" type="address"/>
+ <field name="mipmaps_present" start="15" end="15" type="bool"/>
+ <field name="depth" start="4" end="14" type="uint"/>
+ <field name="num_mip_levels" start="0" end="3" type="uint"/>
+ </struct>
+
+ <struct name="STRIDE_IMAGE_WORD1" length="2">
+ <field name="num_mip_levels" start="60" end="63" type="uint"/>
+ <field name="alpha_msb" start="59" end="59" type="bool"/>
+ <condition type="if" check="TPU_IMAGE_STATE_V2"/>
+ <condition type="if" check="TFBC"/>
+ <field name="lossy_compression_mode" start="56" end="57" type="COMPRESSION_LEVEL"/>
+ <condition type="endif" check="TFBC"/>
+ <field name="tpu_image_state_v2_compression_mode" start="54" end="56" type="COMPRESSION_MODE"/>
+ <condition type="else" check="TPU_IMAGE_STATE_V2"/>
+ <field name="tile_size" start="57" end="57" type="bool"/>
+ <field name="index_lookup" start="56" end="56" type="bool"/>
+ <field name="frame_buffer_compression" start="55" end="55" type="bool"/>
+ <field name="frame_buffer_compression_addressing_mode" start="54" end="54" type="bool"/>
+ <condition type="endif" check="TPU_IMAGE_STATE_V2"/>
+ <field name="texaddr" start="16" end="53" shift="2" type="address"/>
+ <field name="mipmaps_present" start="15" end="15" type="bool"/>
+ <field name="stride" start="0" end="14" type="uint"/>
+ </struct>
+
+ <struct name="SAMPLER" length="2">
+ <field name="texaddr_plane2_lo" start="50" end="63" shift="2" type="address"/>
+ <field name="cmp_mode" start="59" end="61" type="CMP_MODE"/>
+ <field name="addrmode_w" start="56" end="58" type="ADDRMODE"/>
+ <field name="bordercolor_index" start="50" end="55" type="uint"/>
+ <field name="non_normalized_coords" start="49" end="49" type="bool"/>
+ <field name="lumakey_alphamult" start="48" end="48" type="bool"/>
+ <field name="lumakey" start="47" end="47" type="bool"/>
+ <field name="addrmode_v" start="44" end="46" type="ADDRMODE"/>
+ <field name="addrmode_u" start="41" end="43" type="ADDRMODE"/>
+ <field name="mipfilter" start="40" end="40" type="bool"/>
+ <field name="minfilter" start="38" end="39" type="FILTER"/>
+ <field name="magfilter" start="36" end="37" type="FILTER"/>
+ <field name="anisoctl" start="33" end="35" type="ANISOCTL"/>
+ <field name="maxlod" start="23" end="32" type="CLAMP"/>
+ <field name="minlod" start="13" end="22" type="CLAMP"/>
+ <field name="dadjust" start="0" end="12" type="DADJUST"/>
+ </struct>
+
+</csbgen>
--- /dev/null
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="VDMCTRL">
+
+ <enum name="BLOCK_TYPE">
+ <value name="PPP_STATE_UPDATE" value="0"/>
+ <value name="PDS_STATE_UPDATE" value="1"/>
+ <value name="VDM_STATE_UPDATE" value="2"/>
+ <value name="INDEX_LIST" value="3"/>
+ <value name="STREAM_LINK" value="4"/>
+ <value name="STREAM_RETURN" value="5"/>
+ <value name="STREAM_TERMINATE" value="6"/>
+ <value name="CONTROL" value="7"/>
+ </enum>
+
+ <enum name="DM_TARGET">
+ <value name="VDM" value="0"/>
+ <value name="DDM" value="1"/>
+ </enum>
+
+ <enum name="FLATSHADE_CONTROL">
+ <value name="VERTEX_0" value="0"/>
+ <value name="VERTEX_1" value="1"/>
+ <value name="VERTEX_2" value="2"/>
+ </enum>
+
+ <enum name="INDEX_SIZE">
+ <value name="B8" value="0"/>
+ <value name="B16" value="1"/>
+ <value name="B32" value="2"/>
+ </enum>
+
+ <enum name="PRIMITIVE_TOPOLOGY">
+ <value name="POINT_LIST" value="0"/>
+ <value name="LINE_LIST" value="1"/>
+ <value name="LINE_LIST_ADJ" value="2"/>
+ <value name="LINE_STRIP" value="3"/>
+ <value name="LINE_STRIP_ADJ" value="4"/>
+ <value name="LINE_LOOP" value="5"/>
+ <value name="TRI_LIST" value="6"/>
+ <value name="TRI_LIST_ADJ" value="7"/>
+ <value name="TRI_LIST_EDGE" value="8"/>
+ <value name="TRI_STRIP" value="9"/>
+ <value name="TRI_STRIP_ADJ" value="10"/>
+ <value name="TRI_FAN" value="11"/>
+ <value name="PATCH_LIST" value="12"/>
+ </enum>
+
+ <enum name="SD_TYPE">
+ <value name="NONE" value="0"/>
+ <value name="PDS" value="1"/>
+ <value name="USC" value="2"/>
+ </enum>
+
+ <enum name="USC_TARGET">
+ <value name="ALL" value="0"/>
+ <value name="ANY" value="1"/>
+ </enum>
+
+ <enum name="UVS_SCRATCH_SIZE_SELECT">
+ <value name="FIVE" value="0"/>
+ <value name="ONE" value="1"/>
+ </enum>
+
+ <struct name="PPP_STATE0" length="1">
+ <field name="block_type" start="29" end="31" type="BLOCK_TYPE" default="PPP_STATE_UPDATE"/>
+ <field name="word_count" start="8" end="15" type="uint"/>
+ <field name="addrmsb" start="0" end="7" shift="32" type="address"/>
+ </struct>
+
+ <struct name="PPP_STATE1" length="1">
+ <field name="addrlsb" start="2" end="31" shift="2" type="address"/>
+ </struct>
+
+ <struct name="PDS_STATE0" length="1">
+ <field name="block_type" start="29" end="31" type="BLOCK_TYPE" default="PDS_STATE_UPDATE"/>
+ <field name="dm_target" start="28" end="28" type="DM_TARGET"/>
+ <field name="usc_target" start="25" end="25" type="USC_TARGET"/>
+ <field name="usc_common_size" start="16" end="24" type="uint">
+ <define name="UNIT_SIZE" value="64"/>
+ </field>
+ <field name="usc_unified_size" start="10" end="15" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ <field name="pds_temp_size" start="6" end="9" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ <field name="pds_data_size" start="0" end="5" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ </struct>
+
+ <struct name="PDS_STATE1" length="1">
+ <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+ <field name="pds_data_addr" start="4" end="31" shift="4" type="address"/>
+ <field name="sd_type" start="2" end="3" type="SD_TYPE"/>
+ <field name="sd_next_type" start="0" end="1" type="SD_TYPE"/>
+ </struct>
+
+ <struct name="PDS_STATE2" length="1">
+ <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+ <field name="pds_code_addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="STREAM_LINK0" length="1">
+ <field name="block_type" start="29" end="31" type="BLOCK_TYPE" default="STREAM_LINK"/>
+ <field name="with_return" start="28" end="28" type="bool"/>
+ <field name="compare_present" start="27" end="27" type="bool"/>
+ <field name="compare_mode" start="24" end="26" type="uint"/>
+ <field name="compare_data" start="8" end="23" type="uint"/>
+ <field name="link_addrmsb" start="0" end="7" shift="32" type="address"/>
+ </struct>
+
+ <struct name="STREAM_LINK1" length="1">
+ <field name="link_addrlsb" start="2" end="31" shift="2" type="address"/>
+ </struct>
+
+ <struct name="STREAM_RETURN" length="1">
+ <field name="block_type" start="29" end="31" type="BLOCK_TYPE" default="STREAM_RETURN"/>
+ </struct>
+
+ <struct name="STREAM_TERMINATE" length="1">
+ <field name="block_type" start="29" end="31" type="BLOCK_TYPE" default="STREAM_TERMINATE"/>
+ <field name="context" start="0" end="0" type="bool"/>
+ </struct>
+
+ <struct name="VDM_STATE0" length="1">
+ <field name="block_type" start="29" end="31" type="BLOCK_TYPE" default="VDM_STATE_UPDATE"/>
+ <field name="cut_index_present" start="28" end="28" type="bool"/>
+ <field name="vs_data_addr_present" start="27" end="27" type="bool"/>
+ <field name="vs_other_present" start="26" end="26" type="bool"/>
+ <field name="ds_present" start="24" end="24" type="bool"/>
+ <field name="gs_present" start="23" end="23" type="bool"/>
+ <field name="hs_present" start="22" end="22" type="bool"/>
+ <field name="cam_size" start="7" end="14" type="uint"/>
+ <field name="uvs_scratch_size_select" start="6" end="6" type="UVS_SCRATCH_SIZE_SELECT"/>
+ <field name="cut_index_enable" start="5" end="5" type="bool"/>
+ <field name="tess_enable" start="4" end="4" type="bool"/>
+ <field name="gs_enable" start="3" end="3" type="bool"/>
+ <field name="flatshade_control" start="1" end="2" type="FLATSHADE_CONTROL"/>
+ <field name="generate_primitive_id" start="0" end="0" type="bool"/>
+ </struct>
+
+ <struct name="VDM_STATE1" length="1">
+ <field name="cut_index" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="VDM_STATE2" length="1">
+ <field name="vs_pds_data_base_addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="VDM_STATE3" length="1">
+ <field name="vs_pds_code_base_addr" start="4" end="31" shift="4" type="address"/>
+ </struct>
+
+ <struct name="VDM_STATE4" length="1">
+ <field name="vs_output_size" start="0" end="7" type="uint">
+ <define name="UNIT_SIZE" value="4"/>
+ </field>
+ </struct>
+
+ <struct name="VDM_STATE5" length="1">
+ <field name="vs_max_instances_ext" start="31" end="31" type="bool"/>
+ <field name="vs_max_instances" start="25" end="29" type="uint"/>
+ <field name="vs_usc_common_size" start="16" end="24" type="uint">
+ <define name="UNIT_SIZE" value="64"/>
+ </field>
+ <field name="vs_usc_unified_size" start="10" end="15" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ <field name="vs_pds_temp_size" start="6" end="9" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ <field name="vs_pds_data_size" start="0" end="5" type="uint">
+ <define name="UNIT_SIZE" value="16"/>
+ </field>
+ </struct>
+
+ <struct name="INDEX_LIST0" length="1">
+ <field name="block_type" start="29" end="31" type="BLOCK_TYPE" default="INDEX_LIST"/>
+ <field name="index_addr_present" start="28" end="28" type="bool"/>
+ <field name="index_count_present" start="27" end="27" type="bool"/>
+ <field name="index_instance_count_present" start="26" end="26" type="bool"/>
+ <field name="index_offset_present" start="25" end="25" type="bool"/>
+ <field name="start_present" start="24" end="24" type="bool"/>
+ <field name="indirect_addr_present" start="23" end="23" type="bool"/>
+ <field name="split_count_present" start="22" end="22" type="bool"/>
+ <condition type="if" check="VDM_DEGENERATE_CULLING"/>
+ <field name="degen_cull_enable" start="19" end="19" type="bool"/>
+ <condition type="endif" check="VDM_DEGENERATE_CULLING"/>
+ <field name="index_size" start="17" end="18" type="INDEX_SIZE"/>
+ <field name="patch_count" start="12" end="16" type="uint"/>
+ <field name="primitive_topology" start="8" end="11" type="PRIMITIVE_TOPOLOGY"/>
+ <field name="index_base_addrmsb" start="0" end="7" shift="32" type="address"/>
+ </struct>
+
+ <struct name="INDEX_LIST1" length="1">
+ <field name="index_base_addrlsb" start="0" end="31" shift="0" type="address"/>
+ </struct>
+
+ <struct name="INDEX_LIST2" length="1">
+ <field name="index_count" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="INDEX_LIST3" length="1">
+ <field name="instance_count" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="INDEX_LIST4" length="1">
+ <field name="index_offset" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="INDEX_LIST5" length="1">
+ <field name="start_index" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="INDEX_LIST6" length="1">
+ <field name="start_instance" start="0" end="31" type="uint"/>
+ </struct>
+
+ <struct name="INDEX_LIST7" length="1">
+ <field name="indirect_base_addrmsb" start="0" end="7" shift="32" type="address"/>
+ </struct>
+
+ <struct name="INDEX_LIST8" length="1">
+ <field name="indirect_base_addrlsb" start="2" end="31" shift="2" type="address"/>
+ </struct>
+
+ <struct name="INDEX_LIST9" length="1">
+ <field name="split_count" start="0" end="15" type="uint"/>
+ </struct>
+
+</csbgen>
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* This file is based on rgxdefs.h and should only contain object-like macros.
+ * Any function-like macros or inline functions should instead appear in
+ * rogue_hw_utils.h.
+ */
+
+#ifndef ROGUE_HW_DEFS_H
+#define ROGUE_HW_DEFS_H
+
+#include <stdint.h>
+
+#include "util/macros.h"
+
+#define ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT 12U
+#define ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE \
+ BITFIELD_BIT(ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT)
+
+/* ISP triangle merging constants. */
+/* tan(15) (0x3E8930A3) */
+#define ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR 0.267949f
+/* tan(60) (0x3FDDB3D7) */
+#define ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR 1.732051f
+#define ROGUE_ISP_MERGE_SCALE_FACTOR 16.0f
+
+#define ROGUE_MAX_INSTR_BYTES 32U
+
+/* MList entry stride in bytes */
+#define ROGUE_MLIST_ENTRY_STRIDE 4U
+
+/* VCE & TE share virtual space and Alist. */
+#define ROGUE_NUM_PM_ADDRESS_SPACES 2U
+
+/* PM Maximum addressable limit (as determined by the size field of the
+ * PM_*_FSTACK registers).
+ */
+#define ROGUE_PM_MAX_PB_VIRT_ADDR_SPACE UINT64_C(0x400000000)
+
+/* Vheap entry size in bytes. */
+#define ROGUE_PM_VHEAP_ENTRY_SIZE 4U
+
+#define ROGUE_RTC_SIZE_IN_BYTES 256U
+
+#define ROGUE_NUM_VCE 1U
+
+#define ROGUE_NUM_TEAC 1U
+
+#define ROGUE_NUM_TE 1U
+
+/* Tail pointer size in bytes. */
+#define ROGUE_TAIL_POINTER_SIZE 8U
+
+/* Tail pointer cache line size. */
+#define ROGUE_TE_TPC_CACHE_LINE_SIZE 64U
+
+#define ROGUE_MAX_VERTEX_SHARED_REGISTERS 1024U
+
+#define ROGUE_MAX_PIXEL_SHARED_REGISTERS 1024U
+
+/* Number of CR_PDS_BGRND values that need setting up. */
+#define ROGUE_NUM_CR_PDS_BGRND_WORDS 3U
+
+/* Number of PBESTATE_REG_WORD values that need setting up. */
+#define ROGUE_NUM_PBESTATE_REG_WORDS 3U
+
+/* Number of PBESTATE_STATE_WORD values that need setting up. */
+#define ROGUE_NUM_PBESTATE_STATE_WORDS 2U
+
+/* Number of TEXSTATE_IMAGE_WORD values that need setting up. */
+#define ROGUE_NUM_TEXSTATE_IMAGE_WORDS 2U
+
+#define ROGUE_MAX_RENDER_TARGETS 2048U
+
+/* 12 dwords reserved for shared register management. The first dword is the
+ * number of shared register blocks to reload. Should be a multiple of 4 dwords,
+ * size in bytes.
+ */
+#define ROGUE_LLS_SHARED_REGS_RESERVE_SIZE 48U
+
+#define ROGUE_USC_TASK_PROGRAM_SIZE 512U
+
+#define ROGUE_CSRM_LINE_SIZE_IN_DWORDS (64U * 4U * 4U)
+
+/* The maximum amount of local memory which can be allocated by a single kernel
+ * (in dwords/32-bit registers).
+ *
+ * ROGUE_CDMCTRL_USC_COMMON_SIZE_UNIT_SIZE is in bytes so we divide by four.
+ */
+#define ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS \
+ ((ROGUE_CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE * \
+ ROGUE_CDMCTRL_KERNEL0_USC_COMMON_SIZE_MAX_SIZE) >> \
+ 2)
+
+#define ROGUE_MAX_INSTANCES_PER_TASK \
+ (ROGUE_CDMCTRL_KERNEL8_MAX_INSTANCES_MAX_SIZE + 1U)
+
+/* Optimal number for packing work groups into a slot. */
+#define ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK 8U
+
+/* The maximum number of pixel task instances which might be running overlapped
+ * with compute. Once we have 8 pixel task instances we have a complete set and
+ * task will be able to run and allocations will be freed.
+ */
+#define ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES 7U
+
+#endif /* ROGUE_HW_DEFS_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* This file is based on rgxdefs.h and should only contain function-like macros
+ * and inline functions. Any object-like macros should instead appear in
+ * rogue_hw_defs.h.
+ */
+
+#ifndef ROGUE_HW_UTILS_H
+#define ROGUE_HW_UTILS_H
+
+#include <stdint.h>
+
+#include "pvr_winsys.h"
+
+#define __pvr_address_type pvr_dev_addr_t
+#define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr).addr
+
+#include "csbgen/rogue_cdm.h"
+#include "csbgen/rogue_lls.h"
+
+#undef __pvr_get_address
+#undef __pvr_address_type
+
+#include "rogue_hw_defs.h"
+#include "pvr_device_info.h"
+#include "util/compiler.h"
+#include "util/macros.h"
+
+static inline void
+rogue_get_isp_samples_per_tile_xy(const struct pvr_device_info *dev_info,
+ uint32_t samples,
+ uint32_t *const x_out,
+ uint32_t *const y_out)
+{
+ const uint32_t tile_size_x =
+ PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
+ const uint32_t tile_size_y =
+ PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
+ const uint32_t samples_per_pixel =
+ PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0U);
+
+#if !defined(NDEBUG)
+ switch (samples_per_pixel) {
+ case 1:
+ case 2:
+ case 4:
+ break;
+ default:
+ assert(!"Unsupported ISP samples per pixel");
+ }
+#endif
+
+ *x_out = tile_size_x;
+ *y_out = tile_size_y;
+
+ switch (samples) {
+ default:
+ assert(!"Unsupported number of samples");
+ FALLTHROUGH;
+ case 1:
+ break;
+ case 2:
+ if (samples_per_pixel == 2 || samples_per_pixel == 4)
+ *y_out *= 2;
+
+ break;
+ case 4:
+ if (samples_per_pixel == 2 || samples_per_pixel == 4)
+ *x_out *= 2;
+
+ if (samples_per_pixel == 2)
+ *y_out *= 2;
+
+ break;
+ case 8:
+ *y_out *= 2;
+ break;
+ }
+}
+
+static inline uint64_t
+rogue_get_min_free_list_size(const struct pvr_device_info *dev_info)
+{
+ uint64_t min_num_pages;
+
+ if (PVR_HAS_FEATURE(dev_info, roguexe)) {
+ if (PVR_HAS_QUIRK(dev_info, 66011))
+ min_num_pages = 40U;
+ else
+ min_num_pages = 25U;
+ } else {
+ min_num_pages = 50U;
+ }
+
+ return min_num_pages << ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
+}
+
+static inline uint32_t
+rogue_get_max_num_vdm_pds_tasks(const struct pvr_device_info *dev_info)
+{
+ /* Default value based on the minimum value found in all existing cores. */
+ uint32_t max_usc_tasks = PVR_GET_FEATURE_VALUE(dev_info, max_usc_tasks, 24U);
+
+ /* FIXME: Where does the 9 come from? */
+ return max_usc_tasks - 9;
+}
+
+static inline uint32_t
+rogue_get_max_output_regs_per_pixel(const struct pvr_device_info *dev_info)
+{
+ if (PVR_HAS_FEATURE(dev_info, eight_output_registers))
+ return 8U;
+
+ return 4U;
+}
+
+static inline void
+rogue_get_num_macrotiles_xy(const struct pvr_device_info *dev_info,
+ uint32_t *const x_out,
+ uint32_t *const y_out)
+{
+ uint32_t version;
+
+ if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version))
+ version = 0;
+
+ if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ||
+ version == 2) {
+ *x_out = 4;
+ *y_out = 4;
+ } else {
+ *x_out = 1;
+ *y_out = 1;
+ }
+}
+
+static inline uint32_t
+rogue_get_macrotile_array_size(const struct pvr_device_info *dev_info)
+{
+ uint32_t num_macrotiles_x;
+ uint32_t num_macrotiles_y;
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
+ return 0;
+
+ rogue_get_num_macrotiles_xy(dev_info, &num_macrotiles_x, &num_macrotiles_y);
+
+ return num_macrotiles_x * num_macrotiles_y * 8U;
+}
+
+/* To get the number of required Bernado/Phantom(s), divide the number of
+ * clusters by 4 and round up.
+ */
+static inline uint32_t
+rogue_get_num_phantoms(const struct pvr_device_info *dev_info)
+{
+ return DIV_ROUND_UP(PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U), 4U);
+}
+
+/* Region header size in bytes. */
+static inline uint32_t
+rogue_get_region_header_size(const struct pvr_device_info *dev_info)
+{
+ uint32_t version;
+
+ if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version))
+ version = 0;
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
+ version == 2) {
+ return 6;
+ }
+
+ return 5;
+}
+
+/* Return the total reserved size of partition in dwords. */
+static inline uint32_t
+rogue_get_total_reserved_partition_size(const struct pvr_device_info *dev_info)
+{
+ uint32_t tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0);
+ uint32_t tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0);
+ uint32_t max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0);
+
+ if (tile_size_x == 16 && tile_size_y == 16) {
+ return tile_size_x * tile_size_y * max_partitions *
+ PVR_GET_FEATURE_VALUE(dev_info,
+ usc_min_output_registers_per_pix,
+ 0);
+ }
+
+ return max_partitions * 1024U;
+}
+
+static inline uint32_t
+rogue_get_render_size_max(const struct pvr_device_info *dev_info)
+{
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
+ if (!PVR_HAS_FEATURE(dev_info, screen_size8K))
+ return 4096U;
+
+ return 8192U;
+}
+
+#define rogue_get_render_size_max_x(dev_info) \
+ rogue_get_render_size_max(dev_info)
+
+#define rogue_get_render_size_max_y(dev_info) \
+ rogue_get_render_size_max(dev_info)
+
+static inline uint32_t
+rogue_get_slc_cache_line_size(const struct pvr_device_info *dev_info)
+{
+ return PVR_GET_FEATURE_VALUE(dev_info, slc_cache_line_size_bits, 8U) / 8U;
+}
+
+static inline uint32_t pvr_get_max_user_vertex_output_components(
+ const struct pvr_device_info *dev_info)
+{
+ const uint32_t uvs_pba_entries =
+ PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 0U);
+ const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 0U);
+
+ if (uvs_banks <= 8U && uvs_pba_entries == 160U)
+ return 64U;
+
+ return 128U;
+}
+
+static inline uint32_t
+rogue_get_reserved_shared_size(const struct pvr_device_info *dev_info)
+{
+ uint32_t common_store_size_in_dwords =
+ PVR_GET_FEATURE_VALUE(dev_info,
+ common_store_size_in_dwords,
+ 512U * 4U * 4U);
+ uint32_t reserved_shared_size =
+ common_store_size_in_dwords - (256U * 4U) -
+ rogue_get_total_reserved_partition_size(dev_info);
+
+ if (PVR_HAS_QUIRK(dev_info, 44079)) {
+ uint32_t common_store_split_point = (768U * 4U * 4U);
+
+ return MIN2(common_store_split_point - (256U * 4U), reserved_shared_size);
+ }
+
+ return reserved_shared_size;
+}
+
+static inline uint32_t
+rogue_max_compute_shared_registers(const struct pvr_device_info *dev_info)
+{
+ if (PVR_HAS_FEATURE(dev_info, compute))
+ return 2U * 1024U;
+
+ return 0U;
+}
+
+static inline uint32_t
+rogue_get_max_coeffs(const struct pvr_device_info *dev_info)
+{
+ uint32_t max_coeff_additional_portion = ROGUE_MAX_VERTEX_SHARED_REGISTERS;
+ uint32_t pending_allocation_shared_regs = 2U * 1024U;
+ uint32_t pending_allocation_coeff_regs = 0U;
+ uint32_t num_phantoms = rogue_get_num_phantoms(dev_info);
+ uint32_t tiles_in_flight =
+ PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0);
+ uint32_t max_coeff_pixel_portion =
+ DIV_ROUND_UP(tiles_in_flight, num_phantoms);
+
+ max_coeff_pixel_portion *= ROGUE_MAX_PIXEL_SHARED_REGISTERS;
+
+ /* Compute tasks on cores with BRN48492 and without compute overlap may lock
+ * up without two additional lines of coeffs.
+ */
+ if (PVR_HAS_QUIRK(dev_info, 48492) &&
+ !PVR_HAS_FEATURE(dev_info, compute_overlap)) {
+ pending_allocation_coeff_regs = 2U * 1024U;
+ }
+
+ if (PVR_HAS_ERN(dev_info, 38748))
+ pending_allocation_shared_regs = 0U;
+
+ if (PVR_HAS_ERN(dev_info, 38020)) {
+ max_coeff_additional_portion +=
+ rogue_max_compute_shared_registers(dev_info);
+ }
+
+ return rogue_get_reserved_shared_size(dev_info) +
+ pending_allocation_coeff_regs -
+ (max_coeff_pixel_portion + max_coeff_additional_portion +
+ pending_allocation_shared_regs);
+}
+
+static inline uint32_t
+rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info *dev_info)
+{
+ if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
+ const uint32_t max_num_cores =
+ PVR_GET_FEATURE_VALUE(dev_info, xpu_max_slaves, 0U) + 1U;
+ const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+ const uint32_t cdm_context_resume_buffer_stride =
+ ALIGN_POT(ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE, cache_line_size);
+
+ return cdm_context_resume_buffer_stride * max_num_cores;
+ }
+
+ return ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE;
+}
+
+static inline uint32_t rogue_get_cdm_context_resume_buffer_alignment(
+ const struct pvr_device_info *dev_info)
+{
+ if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support))
+ return rogue_get_slc_cache_line_size(dev_info);
+
+ return ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_ALIGNMENT;
+}
+
+static inline uint32_t
+rogue_get_cdm_max_local_mem_size_regs(const struct pvr_device_info *dev_info)
+{
+ uint32_t available_coeffs_in_dwords = rogue_get_max_coeffs(dev_info);
+
+ if (PVR_HAS_QUIRK(dev_info, 48492) && PVR_HAS_FEATURE(dev_info, roguexe) &&
+ !PVR_HAS_FEATURE(dev_info, compute_overlap)) {
+ /* Driver must not use the 2 reserved lines. */
+ available_coeffs_in_dwords -= ROGUE_CSRM_LINE_SIZE_IN_DWORDS * 2;
+ }
+
+ /* The maximum amount of local memory available to a kernel is the minimum
+ * of the total number of coefficient registers available and the max common
+ * store allocation size which can be made by the CDM.
+ *
+ * If any coeff lines are reserved for tessellation or pixel then we need to
+ * subtract those too.
+ */
+ return MIN2(available_coeffs_in_dwords,
+ ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS);
+}
+
+static inline uint32_t
+rogue_get_compute_max_work_group_size(const struct pvr_device_info *dev_info)
+{
+ /* The number of tasks which can be executed per USC - Limited to 16U by the
+ * CDM.
+ */
+ const uint32_t max_tasks_per_usc = 16U;
+
+ if (!PVR_HAS_ERN(dev_info, 35421)) {
+ /* Barriers on work-groups > 32 instances aren't supported. */
+ return ROGUE_MAX_INSTANCES_PER_TASK;
+ }
+
+ return ROGUE_MAX_INSTANCES_PER_TASK * max_tasks_per_usc;
+}
+
+#endif /* ROGUE_HW_UTILS_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* This file is based on rgx_fwif_shared.h and rgx_fwif_client.h. It contains
+ * information about the firmware that is needed by the driver.
+ */
+
+#ifndef PVR_ROGUE_FW_H
+#define PVR_ROGUE_FW_H
+
+/**
+ * Maximum PB free list size supported by RGX and Services.
+ *
+ * Maximum PB free list size must ensure that no PM address space can be fully
+ * used, because if the full address space was used it would wrap and corrupt
+ * itself. Since there are two freelists (local is always minimum sized) this
+ * can be described as following three conditions being met:
+ *
+ * Minimum PB + Maximum PB < ALIST PM address space size (16GB)
+ * Minimum PB + Maximum PB < TE PM address space size (16GB) / NUM_TE_PIPES
+ * Minimum PB + Maximum PB < VCE PM address space size (16GB) / NUM_VCE_PIPES
+ *
+ * Since the max of NUM_TE_PIPES and NUM_VCE_PIPES is 4, we have a hard limit
+ * of 4GB minus the Minimum PB. For convenience we take the smaller power-of-2
+ * value of 2GB. This is far more than any normal application would request
+ * or use.
+ */
+#define ROGUE_FREE_LIST_MAX_SIZE (2ULL * 1024ULL * 1024ULL * 1024ULL)
+
+/* FIXME: This will change based on the firmware configuration, which will vary
+ * depending on the BVNC and firmware version. The powervr KM driver allows this
+ * information to be queried, but the pvrsrvkm KM driver doesn't. This
+ * information should really be queried from the winsys.
+ */
+/* Indicates the number of Render Target Datas in a Render Target Dataset. */
+#define ROGUE_NUM_RTDATAS 2U
+
+#endif /* PVR_ROGUE_FW_H */
--- /dev/null
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+inc_imagination = include_directories([
+ '.',
+ 'common',
+ 'include',
+])
+
+if with_imagination_vk
+ subdir('common')
+ subdir('csbgen')
+ subdir('rogue')
+ subdir('vulkan')
+endif
--- /dev/null
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+libpowervr_rogue_files = files(
+ 'nir/rogue_nir_constreg.c',
+ 'nir/rogue_nir_lower_io.c',
+ 'nir/rogue_nir_pfo.c',
+
+ 'rogue.c',
+ 'rogue_build_data.c',
+ 'rogue_compiler.c',
+ 'rogue_constreg.c',
+ 'rogue_dump.c',
+ 'rogue_encode.c',
+ 'rogue_encoders.c',
+ 'rogue_instr.c',
+ 'rogue_nir.c',
+ 'rogue_operand.c',
+ 'rogue_regalloc.c',
+ 'rogue_shader.c',
+ 'rogue_util.c',
+ 'rogue_validate.c',
+)
+
+libpowervr_rogue = shared_library(
+ 'powervr_rogue',
+ libpowervr_rogue_files,
+ include_directories : [
+ inc_imagination,
+ inc_include,
+ inc_compiler,
+ inc_src,
+ inc_mapi,
+ inc_mesa,
+ inc_gallium,
+ inc_gallium_aux,
+ ],
+ c_args : [c_msvc_compat_args, no_override_init_args],
+ gnu_symbol_visibility : 'hidden',
+ dependencies : [idep_mesautil, idep_nir, dep_csbgen],
+ install : true,
+)
+
+rogue_compiler = executable(
+ 'rogue_compiler',
+ 'tools/offline_compiler.c',
+ link_with : [libpowervr_rogue],
+ dependencies : [idep_mesautil, idep_nir],
+ include_directories : [
+ inc_mesa,
+ inc_include,
+ inc_src,
+ inc_mapi,
+ inc_gallium,
+ inc_gallium_aux,
+ inc_compiler,
+ ],
+ install : false,
+)
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "nir/nir_search_helpers.h"
+#include "rogue_constreg.h"
+#include "rogue_nir.h"
+
+/* TODO: optimize: if value is in const regs, replace, else, use shared regs and
+ * notify driver they need to be populated?
+ */
+
+/* Replaces multiple ssa uses from load_const with a single use -> a register.
+ */
+void rogue_nir_constreg(nir_shader *shader)
+{
+ nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+ nir_builder b;
+
+ nir_builder_init(&b, impl);
+
+ /* Find load_const instructions. */
+ nir_foreach_block (block, impl) {
+ nir_foreach_instr_safe (instr, block) {
+ if (instr->type != nir_instr_type_load_const)
+ continue;
+
+ nir_load_const_instr *load_const = nir_instr_as_load_const(instr);
+
+ /* Skip values that can be pulled from constant registers. */
+ uint32_t value = nir_const_value_as_uint(load_const->value[0], 32);
+ size_t const_reg = rogue_constreg_lookup(value);
+ if (const_reg != ROGUE_NO_CONST_REG)
+ continue;
+
+ b.cursor = nir_after_instr(&load_const->instr);
+ nir_ssa_def *mov = nir_mov(&b, &load_const->def);
+
+ nir_foreach_use_safe (use_src, &load_const->def) {
+ if (use_src->parent_instr == mov->parent_instr)
+ continue;
+
+ /* Skip when used as an index for intrinsics, as we want to
+ * access that value directly.
+ */
+ if (use_src->parent_instr->type == nir_instr_type_intrinsic)
+ continue;
+
+ nir_instr_rewrite_src_ssa(use_src->parent_instr, use_src, mov);
+ }
+ }
+ }
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "nir/nir_search_helpers.h"
+#include "rogue_nir.h"
+#include "rogue_nir_helpers.h"
+
+static void lower_vulkan_resource_index(nir_builder *b,
+ nir_intrinsic_instr *intr,
+ void *pipeline_layout)
+{
+ unsigned desc_set = nir_intrinsic_desc_set(intr);
+ unsigned binding = nir_intrinsic_binding(intr);
+
+ nir_ssa_def *def = nir_vec3(b,
+ nir_imm_int(b, desc_set),
+ nir_imm_int(b, binding),
+ nir_imm_int(b, 0));
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, def);
+ nir_instr_remove(&intr->instr);
+}
+
+static void lower_load_vulkan_descriptor(nir_builder *b,
+ nir_intrinsic_instr *intr)
+{
+ /* Loading the descriptor happens as part of the load/store instruction so
+ * this is a no-op.
+ */
+
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa, intr->src[0].ssa);
+ nir_instr_remove(&intr->instr);
+}
+
+static void lower_load_ubo_to_scalar(nir_builder *b, nir_intrinsic_instr *intr)
+{
+ /* Scalarize the load_ubo. */
+ b->cursor = nir_before_instr(&intr->instr);
+
+ assert(intr->dest.is_ssa);
+ assert(intr->num_components > 1);
+
+ nir_ssa_def *loads[NIR_MAX_VEC_COMPONENTS];
+
+ for (uint8_t i = 0; i < intr->num_components; i++) {
+ size_t scaled_range = nir_intrinsic_range(intr) / intr->num_components;
+ nir_intrinsic_instr *chan_intr =
+ nir_intrinsic_instr_create(b->shader, intr->intrinsic);
+ nir_ssa_dest_init(&chan_intr->instr,
+ &chan_intr->dest,
+ 1,
+ intr->dest.ssa.bit_size,
+ NULL);
+ chan_intr->num_components = 1;
+
+ nir_intrinsic_set_access(chan_intr, nir_intrinsic_access(intr));
+ nir_intrinsic_set_align_mul(chan_intr, nir_intrinsic_align_mul(intr));
+ nir_intrinsic_set_align_offset(chan_intr,
+ nir_intrinsic_align_offset(intr));
+ nir_intrinsic_set_range_base(chan_intr,
+ nir_intrinsic_range_base(intr) +
+ (i * intr->num_components));
+ nir_intrinsic_set_range(chan_intr, scaled_range);
+
+ /* Base (desc_set, binding). */
+ nir_src_copy(&chan_intr->src[0], &intr->src[0]);
+
+ /* Offset (unused). */
+ chan_intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+
+ nir_builder_instr_insert(b, &chan_intr->instr);
+
+ loads[i] = &chan_intr->dest.ssa;
+ }
+
+ nir_ssa_def_rewrite_uses(&intr->dest.ssa,
+ nir_vec(b, loads, intr->num_components));
+ nir_instr_remove(&intr->instr);
+}
+
+static bool
+lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, void *layout)
+{
+ switch (instr->intrinsic) {
+ case nir_intrinsic_load_vulkan_descriptor:
+ lower_load_vulkan_descriptor(b, instr);
+ return true;
+
+ case nir_intrinsic_vulkan_resource_index:
+ lower_vulkan_resource_index(b, instr, layout);
+ return true;
+
+ case nir_intrinsic_load_ubo:
+ lower_load_ubo_to_scalar(b, instr);
+ return true;
+
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static bool lower_impl(nir_function_impl *impl, void *layout)
+{
+ bool progress = false;
+ nir_builder b;
+
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block (block, impl) {
+ nir_foreach_instr_safe (instr, block) {
+ b.cursor = nir_before_instr(instr);
+ switch (instr->type) {
+ case nir_instr_type_intrinsic:
+ progress |=
+ lower_intrinsic(&b, nir_instr_as_intrinsic(instr), layout);
+ break;
+
+ default:
+ break;
+ }
+ }
+ }
+
+ if (progress)
+ nir_metadata_preserve(impl, nir_metadata_none);
+ else
+ nir_metadata_preserve(impl, nir_metadata_all);
+
+ return progress;
+}
+
+bool rogue_nir_lower_io(nir_shader *shader, void *layout)
+{
+ bool progress = false;
+
+ nir_foreach_function (function, shader) {
+ if (function->impl)
+ progress |= lower_impl(function->impl, layout);
+ }
+
+ if (progress)
+ nir_opt_dce(shader);
+
+ return progress;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "nir/nir_search_helpers.h"
+#include "rogue_nir.h"
+
+static void insert_pfo(nir_builder *b,
+ nir_intrinsic_instr *store_output,
+ nir_src *output_src)
+{
+ /* TODO: Support complex PFO with blending. */
+ /* TODO: Verify type is vec4. */
+
+ /* Pack the output color components into U8888 format. */
+ nir_ssa_def *new_output_src_ssa = nir_pack_unorm_4x8(b, output_src->ssa);
+ nir_src new_output_src = nir_src_for_ssa(new_output_src_ssa);
+
+ /* Update the store_output intrinsic. */
+ nir_instr_rewrite_src(&store_output->instr, output_src, new_output_src);
+ nir_intrinsic_set_write_mask(store_output, 1);
+ store_output->num_components = 1;
+ nir_intrinsic_set_src_type(store_output, nir_type_uint32);
+}
+
+void rogue_nir_pfo(nir_shader *shader)
+{
+ nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+ nir_builder b;
+
+ /* Only apply to fragment shaders. */
+ if (shader->info.stage != MESA_SHADER_FRAGMENT)
+ return;
+
+ nir_builder_init(&b, impl);
+
+ nir_foreach_block (block, impl) {
+ nir_foreach_instr_safe (instr, block) {
+ if (instr->type == nir_instr_type_intrinsic) {
+ /* Find the store_output intrinsic and pack the output value. */
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+ if (intr->intrinsic != nir_intrinsic_store_output)
+ continue;
+
+ b.cursor = nir_before_instr(&intr->instr);
+ insert_pfo(&b, intr, &intr->src[0]);
+ } else if (instr->type == nir_instr_type_deref) {
+ /* Find variable derefs and update their type. */
+ nir_deref_instr *deref = nir_instr_as_deref(instr);
+
+ if (!nir_deref_mode_is(deref, nir_var_shader_out))
+ continue;
+
+ if (deref->deref_type != nir_deref_type_var)
+ continue;
+
+ nir_variable *out = nir_deref_instr_get_variable(deref);
+
+ deref->type = glsl_uintN_t_type(32);
+ out->type = glsl_uintN_t_type(32);
+ }
+ }
+ }
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "compiler/shader_enums.h"
+#include "compiler/spirv/nir_spirv.h"
+#include "nir/nir.h"
+#include "rogue.h"
+#include "rogue_build_data.h"
+#include "rogue_compiler.h"
+#include "rogue_constreg.h"
+#include "rogue_encode.h"
+#include "rogue_nir.h"
+#include "rogue_nir_helpers.h"
+#include "rogue_operand.h"
+#include "rogue_regalloc.h"
+#include "rogue_shader.h"
+#include "rogue_validate.h"
+#include "util/macros.h"
+#include "util/memstream.h"
+#include "util/ralloc.h"
+
+/**
+ * \file rogue.c
+ *
+ * \brief Contains the top-level Rogue compiler interface for Vulkan driver and
+ * the offline compiler.
+ */
+
+/**
+ * \brief Converts a SPIR-V shader to NIR.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] stage Shader stage.
+ * \param[in] spirv_size SPIR-V data length in DWORDs.
+ * \param[in] spirv_data SPIR-V data.
+ * \param[in] num_spec Number of SPIR-V specializations.
+ * \param[in] spec SPIR-V specializations.
+ * \return A nir_shader* if successful, or NULL if unsuccessful.
+ */
+nir_shader *rogue_spirv_to_nir(struct rogue_build_ctx *ctx,
+ gl_shader_stage stage,
+ const char *entry,
+ size_t spirv_size,
+ const uint32_t *spirv_data,
+ unsigned num_spec,
+ struct nir_spirv_specialization *spec)
+{
+ nir_shader *nir;
+
+ nir = spirv_to_nir(spirv_data,
+ spirv_size,
+ spec,
+ num_spec,
+ stage,
+ entry,
+ rogue_get_spirv_options(ctx->compiler),
+ rogue_get_compiler_options(ctx->compiler));
+ if (!nir)
+ return NULL;
+
+ ralloc_steal(ctx, nir);
+
+ /* Apply passes. */
+ if (!rogue_nir_passes(ctx, nir, stage)) {
+ ralloc_free(nir);
+ return NULL;
+ }
+
+ /* Collect I/O data to pass back to the driver. */
+ if (!rogue_collect_io_data(ctx, nir)) {
+ ralloc_free(nir);
+ return NULL;
+ }
+
+ return nir;
+}
+
+/**
+ * \brief Converts a Rogue shader to binary.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] shader Rogue shader.
+ * \return A rogue_shader_binary* if successful, or NULL if unsuccessful.
+ */
+struct rogue_shader_binary *rogue_to_binary(struct rogue_build_ctx *ctx,
+ const struct rogue_shader *shader)
+{
+ struct rogue_shader_binary *binary;
+ struct u_memstream mem;
+ size_t buf_size;
+ char *buf;
+
+ if (!rogue_validate_shader(shader))
+ return NULL;
+
+ if (!u_memstream_open(&mem, &buf, &buf_size))
+ return NULL;
+
+ if (!rogue_encode_shader(shader, u_memstream_get(&mem))) {
+ u_memstream_close(&mem);
+ free(buf);
+ return NULL;
+ }
+
+ u_memstream_close(&mem);
+
+ binary = rzalloc_size(ctx, sizeof(*binary) + buf_size);
+ if (!binary) {
+ free(buf);
+ return NULL;
+ }
+
+ binary->size = buf_size;
+ memcpy(binary->data, buf, buf_size);
+
+ free(buf);
+
+ return binary;
+}
+
+static bool
+setup_alu_dest(struct rogue_instr *instr, size_t dest_index, nir_alu_instr *alu)
+{
+ assert(dest_index == 0);
+
+ /* Dest validation. */
+ assert(nir_dest_num_components(alu->dest.dest) == 1 ||
+ nir_dest_num_components(alu->dest.dest) == 4);
+ assert(nir_dest_bit_size(alu->dest.dest) == 32);
+
+ size_t nir_dest_reg = nir_alu_dest_regindex(alu);
+
+ if (nir_dest_num_components(alu->dest.dest) == 1) {
+ CHECK(rogue_instr_set_operand_vreg(instr, dest_index, nir_dest_reg));
+ } else {
+ size_t comp = nir_alu_dest_comp(alu);
+ CHECK(rogue_instr_set_operand_vreg_vec(instr,
+ dest_index,
+ comp,
+ nir_dest_reg));
+ }
+
+ return true;
+}
+
+static bool trans_constreg_operand(struct rogue_instr *instr,
+ size_t operand_index,
+ uint32_t const_value)
+{
+ size_t const_reg = rogue_constreg_lookup(const_value);
+
+ /* Only values that can be sourced from const regs should be left from the
+ * rogue_nir_constreg pass.
+ */
+ assert(const_reg != ROGUE_NO_CONST_REG);
+
+ CHECK(rogue_instr_set_operand_reg(instr,
+ operand_index,
+ ROGUE_OPERAND_TYPE_REG_CONST,
+ const_reg));
+
+ return true;
+}
+
+static bool trans_nir_alu_fmax(struct rogue_shader *shader, nir_alu_instr *alu)
+{
+ /* Src validation. */
+ assert(nir_src_num_components(alu->src[0].src) == 1);
+ assert(nir_src_bit_size(alu->src[0].src) == 32);
+
+ assert(nir_src_num_components(alu->src[1].src) == 1);
+ assert(nir_src_bit_size(alu->src[1].src) == 32);
+
+ struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MAX);
+
+ CHECK(setup_alu_dest(instr, 0, alu));
+
+ for (size_t u = 0; u < nir_op_infos[nir_op_fmax].num_inputs; ++u) {
+ /* Handle values that can be pulled from const regs. */
+ if (nir_alu_src_is_const(alu, u)) {
+ CHECK(trans_constreg_operand(instr, u + 1, nir_alu_src_const(alu, u)));
+ continue;
+ }
+
+ size_t nir_src_reg = nir_alu_src_regindex(alu, u);
+
+ CHECK(rogue_instr_set_operand_vreg(instr, u + 1, nir_src_reg));
+ }
+
+ return true;
+}
+
+static bool trans_nir_alu_fmin(struct rogue_shader *shader, nir_alu_instr *alu)
+{
+ /* Src validation. */
+ assert(nir_src_num_components(alu->src[0].src) == 1);
+ assert(nir_src_bit_size(alu->src[0].src) == 32);
+
+ assert(nir_src_num_components(alu->src[1].src) == 1);
+ assert(nir_src_bit_size(alu->src[1].src) == 32);
+
+ struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MIN);
+
+ CHECK(setup_alu_dest(instr, 0, alu));
+
+ for (size_t u = 0; u < nir_op_infos[nir_op_fmin].num_inputs; ++u) {
+ /* Handle values that can be pulled from const regs. */
+ if (nir_alu_src_is_const(alu, u)) {
+ CHECK(trans_constreg_operand(instr, u + 1, nir_alu_src_const(alu, u)));
+ continue;
+ }
+
+ size_t nir_src_reg = nir_alu_src_regindex(alu, u);
+
+ CHECK(rogue_instr_set_operand_vreg(instr, u + 1, nir_src_reg));
+ }
+
+ return true;
+}
+
+static bool trans_nir_alu_mov_imm(struct rogue_shader *shader,
+ nir_alu_instr *alu)
+{
+ /* Src validation. */
+ assert(nir_src_num_components(alu->src[0].src) == 1);
+ assert(nir_src_bit_size(alu->src[0].src) == 32);
+
+ uint32_t value = nir_alu_src_const(alu, 0);
+
+ struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MOV_IMM);
+
+ CHECK(setup_alu_dest(instr, 0, alu));
+ CHECK(rogue_instr_set_operand_imm(instr, 1, value));
+
+ return true;
+}
+
+static bool trans_nir_alu_mov(struct rogue_shader *shader, nir_alu_instr *alu)
+{
+ /* Constant value that isn't in constregs. */
+ if (nir_alu_src_is_const(alu, 0) &&
+ nir_dest_num_components(alu->dest.dest) == 1)
+ return trans_nir_alu_mov_imm(shader, alu);
+
+ /* Src validation. */
+ assert(nir_src_num_components(alu->src[0].src) == 1);
+ assert(nir_src_bit_size(alu->src[0].src) == 32);
+
+ struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MOV);
+
+ CHECK(setup_alu_dest(instr, 0, alu));
+
+ /* Handle values that can be pulled from const regs. */
+ if (nir_alu_src_is_const(alu, 0)) {
+ return trans_constreg_operand(instr, 1, nir_alu_src_const(alu, 0));
+ }
+
+ size_t nir_src_reg = nir_alu_src_regindex(alu, 0);
+ CHECK(rogue_instr_set_operand_vreg(instr, 1, nir_src_reg));
+
+ return true;
+}
+
+static bool trans_nir_alu_pack_unorm_4x8(struct rogue_shader *shader,
+ nir_alu_instr *alu)
+{
+ /* Src/dest validation. */
+ assert(nir_dest_num_components(alu->dest.dest) == 1);
+ assert(nir_dest_bit_size(alu->dest.dest) == 32);
+
+ assert(nir_src_num_components(alu->src[0].src) == 4);
+ assert(nir_src_bit_size(alu->src[0].src) == 32);
+
+ size_t nir_src_reg = nir_alu_src_regindex(alu, 0);
+ size_t nir_dest_reg = nir_alu_dest_regindex(alu);
+
+ struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_PACK_U8888);
+
+ CHECK(rogue_instr_set_operand_vreg(instr, 0, nir_dest_reg));
+
+ /* Ensure all 4 components are being sourced in order. */
+ for (size_t u = 0; u < nir_src_num_components(alu->src[0].src); ++u)
+ assert(alu->src->swizzle[u] == u);
+
+ CHECK(rogue_instr_set_operand_vreg_vec(instr,
+ 1,
+ ROGUE_COMPONENT_ALL,
+ nir_src_reg));
+
+ return true;
+}
+
+static bool trans_nir_alu_fmul(struct rogue_shader *shader, nir_alu_instr *alu)
+{
+ /* Src validation. */
+ assert(nir_src_num_components(alu->src[0].src) == 1);
+ assert(nir_src_bit_size(alu->src[0].src) == 32);
+
+ assert(nir_src_num_components(alu->src[1].src) == 1);
+ assert(nir_src_bit_size(alu->src[1].src) == 32);
+
+ size_t nir_in_reg_a = nir_alu_src_regindex(alu, 0);
+ size_t nir_in_reg_b = nir_alu_src_regindex(alu, 1);
+
+ struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MUL);
+
+ CHECK(setup_alu_dest(instr, 0, alu));
+ CHECK(rogue_instr_set_operand_vreg(instr, 1, nir_in_reg_a));
+ CHECK(rogue_instr_set_operand_vreg(instr, 2, nir_in_reg_b));
+
+ return true;
+}
+
+static bool trans_nir_alu_ffma(struct rogue_shader *shader, nir_alu_instr *alu)
+{
+ /* Src validation. */
+ assert(nir_src_num_components(alu->src[0].src) == 1);
+ assert(nir_src_bit_size(alu->src[0].src) == 32);
+
+ assert(nir_src_num_components(alu->src[1].src) == 1);
+ assert(nir_src_bit_size(alu->src[1].src) == 32);
+
+ assert(nir_src_num_components(alu->src[2].src) == 1);
+ assert(nir_src_bit_size(alu->src[2].src) == 32);
+
+ size_t nir_in_reg_a = nir_alu_src_regindex(alu, 0);
+ size_t nir_in_reg_b = nir_alu_src_regindex(alu, 1);
+ size_t nir_in_reg_c = nir_alu_src_regindex(alu, 2);
+
+ struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_FMA);
+
+ CHECK(setup_alu_dest(instr, 0, alu));
+ CHECK(rogue_instr_set_operand_vreg(instr, 1, nir_in_reg_a));
+ CHECK(rogue_instr_set_operand_vreg(instr, 2, nir_in_reg_b));
+ CHECK(rogue_instr_set_operand_vreg(instr, 3, nir_in_reg_c));
+
+ return true;
+}
+
+static bool trans_nir_alu(struct rogue_shader *shader, nir_alu_instr *alu)
+{
+ switch (alu->op) {
+ case nir_op_fmax:
+ return trans_nir_alu_fmax(shader, alu);
+
+ case nir_op_fmin:
+ return trans_nir_alu_fmin(shader, alu);
+
+ case nir_op_pack_unorm_4x8:
+ return trans_nir_alu_pack_unorm_4x8(shader, alu);
+
+ case nir_op_mov:
+ return trans_nir_alu_mov(shader, alu);
+
+ case nir_op_fmul:
+ return trans_nir_alu_fmul(shader, alu);
+
+ case nir_op_ffma:
+ return trans_nir_alu_ffma(shader, alu);
+
+ default:
+ break;
+ }
+
+ unreachable("Unimplemented NIR ALU instruction.");
+}
+
+static bool trans_nir_intrinsic_load_input_fs(struct rogue_shader *shader,
+ nir_intrinsic_instr *intr)
+{
+ struct rogue_fs_build_data *fs_data = &shader->ctx->stage_data.fs;
+
+ /* Src/dest validation. */
+ assert(nir_dest_num_components(intr->dest) == 1);
+ assert(nir_dest_bit_size(intr->dest) == 32);
+
+ assert(nir_src_num_components(intr->src[0]) == 1);
+ assert(nir_src_bit_size(intr->src[0]) == 32);
+ assert(nir_intr_src_is_const(intr, 0));
+
+ /* Intrinsic index validation. */
+ assert(nir_intrinsic_dest_type(intr) == nir_type_float32);
+
+ struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr);
+ size_t component = nir_intrinsic_component(intr);
+ size_t coeff_index = rogue_coeff_index_fs(&fs_data->iterator_args,
+ io_semantics.location,
+ component);
+ size_t wcoeff_index = rogue_coeff_index_fs(&fs_data->iterator_args, ~0, 0);
+ size_t drc_num = rogue_acquire_drc(shader);
+ uint64_t source_count = nir_dest_num_components(intr->dest);
+
+ size_t nir_dest_reg = nir_intr_dest_regindex(intr);
+
+ /* pixiter.w instruction. */
+ struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_PIX_ITER_W);
+
+ CHECK(rogue_instr_set_operand_vreg(instr, 0, nir_dest_reg));
+ CHECK(rogue_instr_set_operand_drc(instr, 1, drc_num));
+ CHECK(rogue_instr_set_operand_reg(instr,
+ 2,
+ ROGUE_OPERAND_TYPE_REG_COEFF,
+ coeff_index));
+ CHECK(rogue_instr_set_operand_reg(instr,
+ 3,
+ ROGUE_OPERAND_TYPE_REG_COEFF,
+ wcoeff_index));
+ CHECK(rogue_instr_set_operand_imm(instr, 4, source_count));
+
+ /* wdf instruction must follow the pixiter.w. */
+ instr = rogue_shader_insert(shader, ROGUE_OP_WDF);
+
+ CHECK(rogue_instr_set_operand_drc(instr, 0, drc_num));
+ rogue_release_drc(shader, drc_num);
+
+ return true;
+}
+
+static bool trans_nir_intrinsic_load_input_vs(struct rogue_shader *shader,
+ nir_intrinsic_instr *intr)
+{
+ /* Src/dest validation. */
+ assert(nir_dest_num_components(intr->dest) == 1);
+ assert(nir_dest_bit_size(intr->dest) == 32);
+
+ assert(nir_src_num_components(intr->src[0]) == 1);
+ assert(nir_src_bit_size(intr->src[0]) == 32);
+ assert(nir_intr_src_is_const(intr, 0));
+
+ /* Intrinsic index validation. */
+ assert(nir_intrinsic_dest_type(intr) == nir_type_float32);
+
+ size_t component = nir_intrinsic_component(intr);
+ struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr);
+ size_t vi_reg_index = ((io_semantics.location - VERT_ATTRIB_GENERIC0) * 3) +
+ component; /* TODO: get these properly with the
+ * intrinsic index (ssa argument)
+ */
+
+ size_t nir_dest_reg = nir_intr_dest_regindex(intr);
+
+ struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MOV);
+
+ CHECK(rogue_instr_set_operand_vreg(instr, 0, nir_dest_reg));
+ CHECK(rogue_instr_set_operand_reg(instr,
+ 1,
+ ROGUE_OPERAND_TYPE_REG_VERTEX_IN,
+ vi_reg_index));
+
+ return true;
+}
+
+static bool trans_nir_intrinsic_load_input(struct rogue_shader *shader,
+ nir_intrinsic_instr *intr)
+{
+ switch (shader->stage) {
+ case MESA_SHADER_FRAGMENT:
+ return trans_nir_intrinsic_load_input_fs(shader, intr);
+
+ case MESA_SHADER_VERTEX:
+ return trans_nir_intrinsic_load_input_vs(shader, intr);
+
+ default:
+ break;
+ }
+
+ unreachable("Unimplemented NIR load_input variant.");
+}
+
+static bool trans_nir_intrinsic_store_output_fs(struct rogue_shader *shader,
+ nir_intrinsic_instr *intr)
+{
+ /* Src/dest validation. */
+ assert(nir_src_num_components(intr->src[0]) == 1);
+ assert(nir_src_bit_size(intr->src[0]) == 32);
+ assert(!nir_intr_src_is_const(intr, 0));
+
+ assert(nir_src_num_components(intr->src[1]) == 1);
+ assert(nir_src_bit_size(intr->src[1]) == 32);
+ assert(nir_intr_src_is_const(intr, 1));
+
+ /* Intrinsic index validation. */
+ assert(nir_intrinsic_src_type(intr) == nir_type_uint32);
+
+ /* Fetch the output offset. */
+ /* TODO: Is this really the right value to use for pixel out reg. num? */
+ size_t offset = nir_intr_src_const(intr, 1);
+
+ /* Fetch the components. */
+ size_t src_reg = nir_intr_src_regindex(intr, 0);
+
+ /* mov.olchk instruction. */
+ struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MOV);
+
+ CHECK(rogue_instr_set_operand_reg(instr,
+ 0,
+ ROGUE_OPERAND_TYPE_REG_PIXEL_OUT,
+ offset));
+ CHECK(rogue_instr_set_operand_vreg(instr, 1, src_reg));
+ CHECK(rogue_instr_set_flag(instr, ROGUE_INSTR_FLAG_OLCHK));
+
+ return true;
+}
+
+static bool trans_nir_intrinsic_store_output_vs(struct rogue_shader *shader,
+ nir_intrinsic_instr *intr)
+{
+ struct rogue_vs_build_data *vs_data = &shader->ctx->stage_data.vs;
+
+ /* Src/dest validation. */
+ assert(nir_src_num_components(intr->src[0]) == 1);
+ assert(nir_src_bit_size(intr->src[0]) == 32);
+ assert(!nir_intr_src_is_const(intr, 0));
+
+ assert(nir_src_num_components(intr->src[1]) == 1);
+ assert(nir_src_bit_size(intr->src[1]) == 32);
+ assert(nir_intr_src_is_const(intr, 1));
+
+ /* Intrinsic index validation. */
+ assert(nir_intrinsic_src_type(intr) == nir_type_float32);
+ assert(util_bitcount(nir_intrinsic_write_mask(intr)) == 1);
+
+ struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr);
+ size_t component = nir_intrinsic_component(intr);
+ size_t vo_index = rogue_output_index_vs(&vs_data->outputs,
+ io_semantics.location,
+ component);
+
+ size_t src_reg = nir_intr_src_regindex(intr, 0);
+
+ struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_VTXOUT);
+
+ CHECK(rogue_instr_set_operand_imm(instr, 0, vo_index));
+ CHECK(rogue_instr_set_operand_vreg(instr, 1, src_reg));
+
+ return true;
+}
+
+static bool trans_nir_intrinsic_store_output(struct rogue_shader *shader,
+ nir_intrinsic_instr *intr)
+{
+ switch (shader->stage) {
+ case MESA_SHADER_FRAGMENT:
+ return trans_nir_intrinsic_store_output_fs(shader, intr);
+
+ case MESA_SHADER_VERTEX:
+ return trans_nir_intrinsic_store_output_vs(shader, intr);
+
+ default:
+ break;
+ }
+
+ unreachable("Unimplemented NIR store_output variant.");
+}
+
+static bool trans_nir_intrinsic_load_ubo(struct rogue_shader *shader,
+ nir_intrinsic_instr *intr)
+{
+ struct rogue_ubo_data *ubo_data =
+ &shader->ctx->common_data[shader->stage].ubo_data;
+
+ /* Src/dest validation. */
+ assert(nir_dest_num_components(intr->dest) == 1);
+ assert(nir_dest_bit_size(intr->dest) == 32);
+
+ assert(nir_src_num_components(intr->src[0]) == 2);
+ assert(nir_src_bit_size(intr->src[0]) == 32);
+ assert(nir_intr_src_is_const(intr, 0));
+
+ assert(nir_src_num_components(intr->src[1]) == 1);
+ assert(nir_src_bit_size(intr->src[1]) == 32);
+ assert(nir_intr_src_is_const(intr, 1));
+
+ /* Intrinsic index validation. */
+ assert((nir_intrinsic_range_base(intr) % ROGUE_REG_SIZE_BYTES) == 0);
+ assert(nir_intrinsic_range(intr) == ROGUE_REG_SIZE_BYTES);
+
+ size_t nir_dest_reg = nir_intr_dest_regindex(intr);
+
+ size_t desc_set = nir_intr_src_comp_const(intr, 0, 0);
+ size_t binding = nir_intr_src_comp_const(intr, 0, 1);
+ size_t offset = nir_intrinsic_range_base(intr);
+
+ size_t sh_num = rogue_ubo_reg(ubo_data, desc_set, binding, offset);
+
+ struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MOV);
+
+ CHECK(rogue_instr_set_operand_vreg(instr, 0, nir_dest_reg));
+ CHECK(rogue_instr_set_operand_reg(instr,
+ 1,
+ ROGUE_OPERAND_TYPE_REG_SHARED,
+ sh_num));
+ return true;
+}
+
+static bool trans_nir_intrinsic(struct rogue_shader *shader,
+ nir_intrinsic_instr *intr)
+{
+ switch (intr->intrinsic) {
+ case nir_intrinsic_load_input:
+ return trans_nir_intrinsic_load_input(shader, intr);
+
+ case nir_intrinsic_store_output:
+ return trans_nir_intrinsic_store_output(shader, intr);
+
+ case nir_intrinsic_load_ubo:
+ return trans_nir_intrinsic_load_ubo(shader, intr);
+
+ default:
+ break;
+ }
+
+ unreachable("Unimplemented NIR intrinsic instruction.");
+}
+
+static bool trans_nir_load_const(struct rogue_shader *shader,
+ nir_load_const_instr *load_const)
+{
+ /* Src/dest validation. */
+ assert(load_const->def.bit_size == 32);
+
+ /* Ensure that two-component load_consts are used only by load_ubos. */
+ if (load_const->def.num_components == 2) {
+ nir_foreach_use (use_src, &load_const->def) {
+ nir_instr *instr = use_src->parent_instr;
+ assert(instr->type == nir_instr_type_intrinsic);
+
+ ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ assert(intr->intrinsic == nir_intrinsic_load_ubo);
+ }
+ } else {
+ assert(load_const->def.num_components == 1);
+ }
+
+ /* TODO: This is currently done in MOV_IMM, but instead now would be the
+ * time to lookup the constant value, see if it lives in const regs, or if
+ * it needs to generate a MOV_IMM (or be constant calc-ed).
+ */
+ return true;
+}
+
+static bool trans_nir_jump_return(struct rogue_shader *shader,
+ nir_jump_instr *jump)
+{
+ enum rogue_opcode return_op;
+
+ switch (shader->stage) {
+ case MESA_SHADER_FRAGMENT:
+ return_op = ROGUE_OP_END_FRAG;
+ break;
+
+ case MESA_SHADER_VERTEX:
+ return_op = ROGUE_OP_END_VERT;
+ break;
+
+ default:
+ unreachable("Unimplemented NIR return instruction type.");
+ }
+
+ rogue_shader_insert(shader, return_op);
+
+ return true;
+}
+
+static bool trans_nir_jump(struct rogue_shader *shader, nir_jump_instr *jump)
+{
+ switch (jump->type) {
+ case nir_jump_return:
+ return trans_nir_jump_return(shader, jump);
+
+ default:
+ break;
+ }
+
+ unreachable("Unimplemented NIR jump instruction type.");
+}
+
+/**
+ * \brief Converts a NIR shader to Rogue.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] nir NIR shader.
+ * \return A rogue_shader* if successful, or NULL if unsuccessful.
+ */
+struct rogue_shader *rogue_nir_to_rogue(struct rogue_build_ctx *ctx,
+ const nir_shader *nir)
+{
+ gl_shader_stage stage = nir->info.stage;
+ struct rogue_shader *shader = rogue_shader_create(ctx, stage);
+ if (!shader)
+ return NULL;
+
+ /* Make sure we only have a single function. */
+ assert(exec_list_length(&nir->functions) == 1);
+
+ /* Translate shader entrypoint. */
+ nir_function_impl *entry = nir_shader_get_entrypoint((nir_shader *)nir);
+ nir_foreach_block (block, entry) {
+ nir_foreach_instr (instr, block) {
+ switch (instr->type) {
+ case nir_instr_type_alu:
+ /* TODO: Cleanup on failure. */
+ CHECKF(trans_nir_alu(shader, nir_instr_as_alu(instr)),
+ "Failed to translate NIR ALU instruction.");
+ break;
+
+ case nir_instr_type_intrinsic:
+ CHECKF(trans_nir_intrinsic(shader, nir_instr_as_intrinsic(instr)),
+ "Failed to translate NIR intrinsic instruction.");
+ break;
+
+ case nir_instr_type_load_const:
+ CHECKF(trans_nir_load_const(shader, nir_instr_as_load_const(instr)),
+ "Failed to translate NIR load_const instruction.");
+ break;
+
+ case nir_instr_type_jump:
+ CHECKF(trans_nir_jump(shader, nir_instr_as_jump(instr)),
+ "Failed to translate NIR jump instruction.");
+ break;
+
+ default:
+ unreachable("Unimplemented NIR instruction type.");
+ }
+ }
+ }
+
+ /* Perform register allocation. */
+ /* TODO: handle failure. */
+ if (!rogue_ra_alloc(&shader->instr_list,
+ shader->ra,
+ &ctx->common_data[stage].temps,
+ &ctx->common_data[stage].internals))
+ return NULL;
+
+ return shader;
+}
+
+/**
+ * \brief Creates and sets up a shared multi-stage build context.
+ *
+ * \param[in] compiler The compiler context.
+ * \return A pointer to the new build context, or NULL on failure.
+ */
+struct rogue_build_ctx *
+rogue_create_build_context(struct rogue_compiler *compiler)
+{
+ struct rogue_build_ctx *ctx;
+
+ ctx = rzalloc_size(compiler, sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ ctx->compiler = compiler;
+
+ /* nir/rogue/binary shaders need to be default-zeroed;
+ * this is taken care of by rzalloc_size.
+ */
+
+ /* Setup non-zero defaults. */
+ ctx->stage_data.fs.msaa_mode = ROGUE_MSAA_MODE_PIXEL;
+
+ return ctx;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_H
+#define ROGUE_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "compiler/shader_enums.h"
+#include "nir/nir.h"
+
+/* All registers are 32-bit in size. */
+#define ROGUE_REG_SIZE_BYTES 4
+#define ROGUE_REG_UNUSED UINT32_MAX
+
+struct nir_spirv_specialization;
+struct rogue_build_ctx;
+struct rogue_shader;
+
+enum rogue_msaa_mode {
+ ROGUE_MSAA_MODE_UNDEF = 0, /* explicitly treat 0 as undefined */
+ /* One task for all samples. */
+ ROGUE_MSAA_MODE_PIXEL,
+ /* For on-edge pixels only: separate tasks for each sample. */
+ ROGUE_MSAA_MODE_SELECTIVE,
+ /* For all pixels: separate tasks for each sample. */
+ ROGUE_MSAA_MODE_FULL,
+};
+
+/**
+ * \brief Shader binary.
+ */
+struct rogue_shader_binary {
+ size_t size;
+ uint8_t data[];
+};
+
+PUBLIC
+nir_shader *rogue_spirv_to_nir(struct rogue_build_ctx *ctx,
+ gl_shader_stage stage,
+ const char *entry,
+ size_t spirv_size,
+ const uint32_t *spirv_data,
+ unsigned num_spec,
+ struct nir_spirv_specialization *spec);
+
+PUBLIC
+struct rogue_shader_binary *rogue_to_binary(struct rogue_build_ctx *ctx,
+ const struct rogue_shader *shader);
+
+PUBLIC
+struct rogue_shader *rogue_nir_to_rogue(struct rogue_build_ctx *ctx,
+ const nir_shader *nir);
+#endif /* ROGUE_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "compiler/shader_enums.h"
+#include "nir/nir.h"
+#include "rogue_build_data.h"
+#include "rogue_nir_helpers.h"
+#include "rogue_operand.h"
+#include "util/macros.h"
+
+#define __pvr_address_type uint64_t
+#define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr)
+
+#include "csbgen/rogue_pds.h"
+
+#undef __pvr_get_address
+#undef __pvr_address_type
+
+/**
+ * \brief Allocates the coefficient registers that will contain the iterator
+ * data for the fragment shader input varyings.
+ *
+ * \param[in] args The iterator argument data.
+ * \return The total number of coefficient registers required by the iterators.
+ */
+static size_t alloc_iterator_regs(struct rogue_iterator_args *args)
+{
+ size_t coeffs = 0;
+
+ for (size_t u = 0; u < args->num_fpu_iterators; ++u) {
+ /* Ensure there aren't any gaps. */
+ assert(args->base[u] == ~0);
+
+ args->base[u] = coeffs;
+ coeffs += ROGUE_COEFF_ALIGN * args->components[u];
+ }
+
+ return coeffs;
+}
+
+/**
+ * \brief Reserves an iterator for a fragment shader input varying,
+ * and calculates its setup data.
+ *
+ * \param[in] args The iterator argument data.
+ * \param[in] i The iterator index.
+ * \param[in] type The interpolation type of the varying.
+ * \param[in] f16 Whether the data type is F16 or F32.
+ * \param[in] components The number of components in the varying.
+ */
+static void reserve_iterator(struct rogue_iterator_args *args,
+ size_t i,
+ enum glsl_interp_mode type,
+ bool f16,
+ size_t components)
+{
+ struct ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC data = { 0 };
+
+ assert(components >= 1 && components <= 4);
+
+ /* The first iterator (W) *must* be INTERP_MODE_NOPERSPECTIVE. */
+ assert(i > 0 || type == INTERP_MODE_NOPERSPECTIVE);
+ assert(i < ARRAY_SIZE(args->fpu_iterators));
+
+ switch (type) {
+ /* Default interpolation is smooth. */
+ case INTERP_MODE_NONE:
+ data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
+ data.perspective = true;
+ break;
+
+ case INTERP_MODE_NOPERSPECTIVE:
+ data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
+ data.perspective = false;
+ break;
+
+ default:
+ unreachable("Unimplemented interpolation type.");
+ }
+
+ /* Number of components in this varying
+ * (corresponds to ROGUE_PDSINST_DOUTI_SIZE_1..4D).
+ */
+ data.size = (components - 1);
+
+ /* TODO: Investigate F16 support. */
+ assert(!f16);
+ data.f16 = f16;
+
+ /* Offsets within the vertex. */
+ data.f32_offset = 2 * i;
+ data.f16_offset = data.f32_offset;
+
+ ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_pack(&args->fpu_iterators[i], &data);
+ args->destination[i] = i;
+ args->base[i] = ~0;
+ args->components[i] = components;
+ ++args->num_fpu_iterators;
+}
+
+/**
+ * \brief Collects the fragment shader I/O data to feed-back to the driver.
+ *
+ * \sa #collect_io_data()
+ *
+ * \param[in] common_data Common build data.
+ * \param[in] fs_data Fragment-specific build data.
+ * \param[in] nir NIR fragment shader.
+ * \return true if successful, otherwise false.
+ */
+static bool collect_io_data_fs(struct rogue_common_build_data *common_data,
+ struct rogue_fs_build_data *fs_data,
+ nir_shader *nir)
+{
+ size_t num_inputs = nir_count_variables_with_modes(nir, nir_var_shader_in);
+ assert(num_inputs < (ARRAY_SIZE(fs_data->iterator_args.fpu_iterators) - 1));
+
+ /* Process inputs (if present). */
+ if (num_inputs) {
+ /* If the fragment shader has inputs, the first iterator
+ * must be used for the W component.
+ */
+ reserve_iterator(&fs_data->iterator_args,
+ 0,
+ INTERP_MODE_NOPERSPECTIVE,
+ false,
+ 1);
+
+ nir_foreach_shader_in_variable (var, nir) {
+ size_t i = (var->data.location - VARYING_SLOT_VAR0) + 1;
+ size_t components = glsl_get_components(var->type);
+ enum glsl_interp_mode interp = var->data.interpolation;
+ bool f16 = glsl_type_is_16bit(var->type);
+
+ /* Check that arguments are either F16 or F32. */
+ assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
+ assert(f16 || glsl_type_is_32bit(var->type));
+
+ /* Check input location. */
+ assert(var->data.location >= VARYING_SLOT_VAR0 &&
+ var->data.location <= VARYING_SLOT_VAR31);
+
+ reserve_iterator(&fs_data->iterator_args, i, interp, f16, components);
+ }
+
+ common_data->coeffs = alloc_iterator_regs(&fs_data->iterator_args);
+ assert(common_data->coeffs);
+ assert(common_data->coeffs < ROGUE_MAX_REG_COEFF);
+ }
+
+ /* TODO: Process outputs. */
+
+ return true;
+}
+
+/**
+ * \brief Allocates the vertex shader input registers.
+ *
+ * \param[in] inputs The vertex shader input data.
+ * \return The total number of vertex input registers required.
+ */
+static size_t alloc_vs_inputs(struct rogue_vertex_inputs *inputs)
+{
+ size_t vs_inputs = 0;
+
+ for (size_t u = 0; u < inputs->num_input_vars; ++u) {
+ /* Ensure there aren't any gaps. */
+ assert(inputs->base[u] == ~0);
+
+ inputs->base[u] = vs_inputs;
+ vs_inputs += inputs->components[u];
+ }
+
+ return vs_inputs;
+}
+
+/**
+ * \brief Allocates the vertex shader outputs.
+ *
+ * \param[in] outputs The vertex shader output data.
+ * \return The total number of vertex outputs required.
+ */
+static size_t alloc_vs_outputs(struct rogue_vertex_outputs *outputs)
+{
+ size_t vs_outputs = 0;
+
+ for (size_t u = 0; u < outputs->num_output_vars; ++u) {
+ /* Ensure there aren't any gaps. */
+ assert(outputs->base[u] == ~0);
+
+ outputs->base[u] = vs_outputs;
+ vs_outputs += outputs->components[u];
+ }
+
+ return vs_outputs;
+}
+
+/**
+ * \brief Counts the varyings used by the vertex shader.
+ *
+ * \param[in] outputs The vertex shader output data.
+ * \return The number of varyings used.
+ */
+static size_t count_vs_varyings(struct rogue_vertex_outputs *outputs)
+{
+ size_t varyings = 0;
+
+ /* Skip the position. */
+ for (size_t u = 1; u < outputs->num_output_vars; ++u)
+ varyings += outputs->components[u];
+
+ return varyings;
+}
+
+/**
+ * \brief Reserves space for a vertex shader input.
+ *
+ * \param[in] inputs The vertex input data.
+ * \param[in] i The vertex input index.
+ * \param[in] components The number of components in the input.
+ */
+static void reserve_vs_input(struct rogue_vertex_inputs *inputs,
+ size_t i,
+ size_t components)
+{
+ assert(components >= 1 && components <= 4);
+
+ assert(i < ARRAY_SIZE(inputs->base));
+
+ inputs->base[i] = ~0;
+ inputs->components[i] = components;
+ ++inputs->num_input_vars;
+}
+
+/**
+ * \brief Reserves space for a vertex shader output.
+ *
+ * \param[in] outputs The vertex output data.
+ * \param[in] i The vertex output index.
+ * \param[in] components The number of components in the output.
+ */
+static void reserve_vs_output(struct rogue_vertex_outputs *outputs,
+ size_t i,
+ size_t components)
+{
+ assert(components >= 1 && components <= 4);
+
+ assert(i < ARRAY_SIZE(outputs->base));
+
+ outputs->base[i] = ~0;
+ outputs->components[i] = components;
+ ++outputs->num_output_vars;
+}
+
+/**
+ * \brief Collects the vertex shader I/O data to feed-back to the driver.
+ *
+ * \sa #collect_io_data()
+ *
+ * \param[in] common_data Common build data.
+ * \param[in] vs_data Vertex-specific build data.
+ * \param[in] nir NIR vertex shader.
+ * \return true if successful, otherwise false.
+ */
+static bool collect_io_data_vs(struct rogue_common_build_data *common_data,
+ struct rogue_vs_build_data *vs_data,
+ nir_shader *nir)
+{
+ ASSERTED bool out_pos_present = false;
+ ASSERTED size_t num_outputs =
+ nir_count_variables_with_modes(nir, nir_var_shader_out);
+
+ /* Process inputs. */
+ nir_foreach_shader_in_variable (var, nir) {
+ size_t components = glsl_get_components(var->type);
+ size_t i = var->data.location - VERT_ATTRIB_GENERIC0;
+
+ /* Check that inputs are F32. */
+ /* TODO: Support other types. */
+ assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
+ assert(glsl_type_is_32bit(var->type));
+
+ /* Check input location. */
+ assert(var->data.location >= VERT_ATTRIB_GENERIC0 &&
+ var->data.location <= VERT_ATTRIB_GENERIC15);
+
+ reserve_vs_input(&vs_data->inputs, i, components);
+ }
+
+ vs_data->num_vertex_input_regs = alloc_vs_inputs(&vs_data->inputs);
+ assert(vs_data->num_vertex_input_regs);
+ assert(vs_data->num_vertex_input_regs < ROGUE_MAX_REG_VERTEX_IN);
+
+ /* Process outputs. */
+
+ /* We should always have at least a position variable. */
+ assert(num_outputs > 0 && "Invalid number of vertex shader outputs.");
+
+ nir_foreach_shader_out_variable (var, nir) {
+ size_t components = glsl_get_components(var->type);
+
+ /* Check that outputs are F32. */
+ /* TODO: Support other types. */
+ assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
+ assert(glsl_type_is_32bit(var->type));
+
+ if (var->data.location == VARYING_SLOT_POS) {
+ assert(components == 4);
+ out_pos_present = true;
+
+ reserve_vs_output(&vs_data->outputs, 0, components);
+ } else if ((var->data.location >= VARYING_SLOT_VAR0) &&
+ (var->data.location <= VARYING_SLOT_VAR31)) {
+ size_t i = (var->data.location - VARYING_SLOT_VAR0) + 1;
+ reserve_vs_output(&vs_data->outputs, i, components);
+ } else {
+ unreachable("Unsupported vertex output type.");
+ }
+ }
+
+ /* Always need the output position to be present. */
+ assert(out_pos_present);
+
+ vs_data->num_vertex_outputs = alloc_vs_outputs(&vs_data->outputs);
+ assert(vs_data->num_vertex_outputs);
+ assert(vs_data->num_vertex_outputs < ROGUE_MAX_VERTEX_OUTPUTS);
+
+ vs_data->num_varyings = count_vs_varyings(&vs_data->outputs);
+
+ return true;
+}
+
+/**
+ * \brief Allocates the shared registers that will contain the UBOs.
+ *
+ * \param[in] ubo_data The UBO data.
+ * \return The total number of coefficient registers required by the iterators.
+ */
+static size_t alloc_ubos(struct rogue_ubo_data *ubo_data)
+{
+ size_t shareds = 0;
+
+ for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) {
+ /* Ensure there aren't any gaps. */
+ assert(ubo_data->dest[u] == ~0);
+
+ ubo_data->dest[u] = shareds;
+ shareds += ubo_data->size[u];
+ }
+
+ return shareds;
+}
+
+/**
+ * \brief Reserves a UBO and calculates its data.
+ *
+ * \param[in] ubo_data The UBO data.
+ * \param[in] desc_set The UBO descriptor set.
+ * \param[in] binding The UBO binding.
+ * \param[in] size The size required by the UBO (in dwords).
+ */
+static void reserve_ubo(struct rogue_ubo_data *ubo_data,
+ size_t desc_set,
+ size_t binding,
+ size_t size)
+{
+ size_t i = ubo_data->num_ubo_entries;
+ assert(i < ARRAY_SIZE(ubo_data->desc_set));
+
+ ubo_data->desc_set[i] = desc_set;
+ ubo_data->binding[i] = binding;
+ ubo_data->dest[i] = ~0;
+ ubo_data->size[i] = size;
+ ++ubo_data->num_ubo_entries;
+}
+
+/**
+ * \brief Collects UBO data to feed-back to the driver.
+ *
+ * \param[in] common_data Common build data.
+ * \param[in] nir NIR shader.
+ * \return true if successful, otherwise false.
+ */
+static bool collect_ubo_data(struct rogue_common_build_data *common_data,
+ nir_shader *nir)
+{
+ /* Iterate over each UBO. */
+ nir_foreach_variable_with_modes (var, nir, nir_var_mem_ubo) {
+ size_t desc_set = var->data.driver_location;
+ size_t binding = var->data.binding;
+ size_t ubo_size_regs = 0;
+
+ nir_function_impl *entry = nir_shader_get_entrypoint(nir);
+ /* Iterate over each load_ubo that uses this UBO. */
+ nir_foreach_block (block, entry) {
+ nir_foreach_instr (instr, block) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+ if (intr->intrinsic != nir_intrinsic_load_ubo)
+ continue;
+
+ assert(nir_src_num_components(intr->src[0]) == 2);
+ assert(nir_intr_src_is_const(intr, 0));
+
+ size_t load_desc_set = nir_intr_src_comp_const(intr, 0, 0);
+ size_t load_binding = nir_intr_src_comp_const(intr, 0, 1);
+
+ if (load_desc_set != desc_set || load_binding != binding)
+ continue;
+
+ ASSERTED size_t size_bytes = nir_intrinsic_range(intr);
+ assert(size_bytes == ROGUE_REG_SIZE_BYTES);
+
+ size_t offset_bytes = nir_intrinsic_range_base(intr);
+ assert(!(offset_bytes % ROGUE_REG_SIZE_BYTES));
+
+ size_t offset_regs = offset_bytes / ROGUE_REG_SIZE_BYTES;
+
+ /* TODO: Put offsets in a BITSET_DECLARE and check for gaps. */
+
+ /* Find the largest load offset. */
+ ubo_size_regs = MAX2(ubo_size_regs, offset_regs);
+ }
+ }
+
+ /* UBO size = largest offset + 1. */
+ ++ubo_size_regs;
+
+ reserve_ubo(&common_data->ubo_data, desc_set, binding, ubo_size_regs);
+ }
+
+ common_data->shareds = alloc_ubos(&common_data->ubo_data);
+ assert(common_data->shareds < ROGUE_MAX_REG_SHARED);
+
+ return true;
+}
+
+/**
+ * \brief Collects I/O data to feed-back to the driver.
+ *
+ * Collects the inputs/outputs/memory required, and feeds that back to the
+ * driver. Done at this stage rather than at the start of rogue_to_binary, so
+ * that all the I/O of all the shader stages is known before backend
+ * compilation, which would let us do things like cull unused inputs.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] nir NIR shader.
+ * \return true if successful, otherwise false.
+ */
+bool rogue_collect_io_data(struct rogue_build_ctx *ctx, nir_shader *nir)
+{
+ gl_shader_stage stage = nir->info.stage;
+ struct rogue_common_build_data *common_data = &ctx->common_data[stage];
+
+ /* Collect stage-agnostic data. */
+ if (!collect_ubo_data(common_data, nir))
+ return false;
+
+ /* Collect stage-specific data. */
+ switch (stage) {
+ case MESA_SHADER_FRAGMENT:
+ return collect_io_data_fs(common_data, &ctx->stage_data.fs, nir);
+
+ case MESA_SHADER_VERTEX:
+ return collect_io_data_vs(common_data, &ctx->stage_data.vs, nir);
+
+ default:
+ break;
+ }
+
+ return false;
+}
+
+/**
+ * \brief Returns the allocated coefficient register index for a component of an
+ * input varying location.
+ *
+ * \param[in] args The allocated iterator argument data.
+ * \param[in] location The input varying location, or ~0 for the W coefficient.
+ * \param[in] component The requested component.
+ * \return The coefficient register index.
+ */
+size_t rogue_coeff_index_fs(struct rogue_iterator_args *args,
+ gl_varying_slot location,
+ size_t component)
+{
+ size_t i;
+
+ /* Special case: W coefficient. */
+ if (location == ~0) {
+ /* The W component shouldn't be the only one. */
+ assert(args->num_fpu_iterators > 1);
+ assert(args->destination[0] == 0);
+ return 0;
+ }
+
+ i = (location - VARYING_SLOT_VAR0) + 1;
+ assert(location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31);
+ assert(i < args->num_fpu_iterators);
+ assert(component < args->components[i]);
+ assert(args->base[i] != ~0);
+
+ return args->base[i] + (ROGUE_COEFF_ALIGN * component);
+}
+
+/**
+ * \brief Returns the allocated vertex output index for a component of an input
+ * varying location.
+ *
+ * \param[in] outputs The vertex output data.
+ * \param[in] location The output varying location.
+ * \param[in] component The requested component.
+ * \return The vertex output index.
+ */
+size_t rogue_output_index_vs(struct rogue_vertex_outputs *outputs,
+ gl_varying_slot location,
+ size_t component)
+{
+ size_t i;
+
+ if (location == VARYING_SLOT_POS) {
+ /* Always at location 0. */
+ assert(outputs->base[0] == 0);
+ i = 0;
+ } else if ((location >= VARYING_SLOT_VAR0) &&
+ (location <= VARYING_SLOT_VAR31)) {
+ i = (location - VARYING_SLOT_VAR0) + 1;
+ } else {
+ unreachable("Unsupported vertex output type.");
+ }
+
+ assert(i < outputs->num_output_vars);
+ assert(component < outputs->components[i]);
+ assert(outputs->base[i] != ~0);
+
+ return outputs->base[i] + component;
+}
+
+/**
+ * \brief Returns the allocated shared register index for a given UBO offset.
+ *
+ * \param[in] ubo_data The UBO data.
+ * \param[in] desc_set The UBO descriptor set.
+ * \param[in] binding The UBO binding.
+ * \param[in] offset_bytes The UBO offset in bytes.
+ * \return The UBO offset shared register index.
+ */
+size_t rogue_ubo_reg(struct rogue_ubo_data *ubo_data,
+ size_t desc_set,
+ size_t binding,
+ size_t offset_bytes)
+{
+ size_t ubo_index = ~0;
+ size_t offset_regs;
+
+ /* Find UBO located at (desc_set, binding). */
+ for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) {
+ if (ubo_data->dest[u] == ~0)
+ continue;
+
+ if (ubo_data->desc_set[u] != desc_set || ubo_data->binding[u] != binding)
+ continue;
+
+ ubo_index = u;
+ break;
+ }
+
+ assert(ubo_index != ~0);
+
+ assert(!(offset_bytes % ROGUE_REG_SIZE_BYTES));
+ offset_regs = offset_bytes / ROGUE_REG_SIZE_BYTES;
+
+ return ubo_data->dest[ubo_index] + offset_regs;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_BUILD_DATA_H
+#define ROGUE_BUILD_DATA_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "compiler/shader_enums.h"
+#include "nir/nir.h"
+#include "rogue.h"
+
+/* Max number of I/O varying variables.
+ * Fragment shader: MAX_VARYING + 1 (W coefficient).
+ * Vertex shader: MAX_VARYING + 1 (position slot).
+ */
+#define ROGUE_MAX_IO_VARYING_VARS (MAX_VARYING + 1)
+
+/* VERT_ATTRIB_GENERIC0-15 */
+#define ROGUE_MAX_IO_ATTRIB_VARS 16
+
+/* Max buffers entries that can be used. */
+/* TODO: Currently UBOs are the only supported buffers. */
+#define ROGUE_MAX_BUFFERS 24
+
+struct rogue_compiler;
+struct rogue_shader;
+struct rogue_shader_binary;
+
+/**
+ * \brief UBO data.
+ */
+struct rogue_ubo_data {
+ size_t num_ubo_entries;
+ size_t desc_set[ROGUE_MAX_BUFFERS];
+ size_t binding[ROGUE_MAX_BUFFERS];
+ size_t dest[ROGUE_MAX_BUFFERS];
+ size_t size[ROGUE_MAX_BUFFERS];
+};
+
+/**
+ * \brief Per-stage common build data.
+ */
+struct rogue_common_build_data {
+ size_t temps;
+ size_t internals;
+ size_t coeffs;
+ size_t shareds;
+
+ struct rogue_ubo_data ubo_data;
+};
+
+/**
+ * \brief Arguments for the FPU iterator(s)
+ * (produces varyings for the fragment shader).
+ */
+struct rogue_iterator_args {
+ uint32_t num_fpu_iterators;
+ uint32_t fpu_iterators[ROGUE_MAX_IO_VARYING_VARS];
+ uint32_t destination[ROGUE_MAX_IO_VARYING_VARS];
+ size_t base[ROGUE_MAX_IO_VARYING_VARS];
+ size_t components[ROGUE_MAX_IO_VARYING_VARS];
+};
+
+/**
+ * \brief Vertex input register allocations.
+ */
+struct rogue_vertex_inputs {
+ size_t num_input_vars;
+ size_t base[ROGUE_MAX_IO_ATTRIB_VARS];
+ size_t components[ROGUE_MAX_IO_ATTRIB_VARS];
+};
+
+/**
+ * \brief Vertex output allocations.
+ */
+struct rogue_vertex_outputs {
+ size_t num_output_vars;
+ size_t base[ROGUE_MAX_IO_VARYING_VARS];
+ size_t components[ROGUE_MAX_IO_VARYING_VARS];
+};
+
+/**
+ * \brief Stage-specific build data.
+ */
+struct rogue_build_data {
+ struct rogue_fs_build_data {
+ struct rogue_iterator_args iterator_args;
+ enum rogue_msaa_mode msaa_mode;
+ bool phas; /* Indicates the presence of PHAS instruction. */
+ } fs;
+ struct rogue_vs_build_data {
+ struct rogue_vertex_inputs inputs;
+ size_t num_vertex_input_regs; /* Final number of inputs. */
+
+ struct rogue_vertex_outputs outputs;
+ size_t num_vertex_outputs; /* Final number of outputs. */
+
+ size_t num_varyings; /* Final number of varyings. */
+ } vs;
+};
+
+/**
+ * \brief Shared multi-stage build context.
+ */
+struct rogue_build_ctx {
+ struct rogue_compiler *compiler;
+
+ /* Shaders in various stages of compilations. */
+ nir_shader *nir[MESA_SHADER_FRAGMENT + 1];
+ struct rogue_shader *rogue[MESA_SHADER_FRAGMENT + 1];
+ struct rogue_shader_binary *binary[MESA_SHADER_FRAGMENT + 1];
+
+ struct rogue_common_build_data common_data[MESA_SHADER_FRAGMENT + 1];
+ struct rogue_build_data stage_data;
+};
+
+PUBLIC
+struct rogue_build_ctx *
+rogue_create_build_context(struct rogue_compiler *compiler);
+
+PUBLIC
+bool rogue_collect_io_data(struct rogue_build_ctx *ctx, nir_shader *nir);
+
+PUBLIC
+size_t rogue_coeff_index_fs(struct rogue_iterator_args *args,
+ gl_varying_slot location,
+ size_t component);
+
+PUBLIC
+size_t rogue_output_index_vs(struct rogue_vertex_outputs *outputs,
+ gl_varying_slot location,
+ size_t component);
+
+PUBLIC
+size_t rogue_ubo_reg(struct rogue_ubo_data *ubo_data,
+ size_t desc_set,
+ size_t binding,
+ size_t offset_bytes);
+
+#endif /* ROGUE_BUILD_DATA_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stddef.h>
+
+#include "compiler/glsl_types.h"
+#include "rogue_compiler.h"
+#include "util/ralloc.h"
+
+/**
+ * \file rogue_compiler.c
+ *
+ * \brief Contains the Rogue compiler interface.
+ */
+
+/**
+ * \brief Creates and sets up a Rogue compiler context.
+ *
+ * \param[in] dev_info Device info pointer.
+ * \return A pointer to the new compiler context, or NULL on failure.
+ */
+struct rogue_compiler *
+rogue_compiler_create(const struct pvr_device_info *dev_info)
+{
+ struct rogue_compiler *compiler;
+
+ compiler = rzalloc_size(NULL, sizeof(*compiler));
+ if (!compiler)
+ return NULL;
+
+ compiler->dev_info = dev_info;
+
+ /* TODO: Additional compiler setup (allocators? error message output
+ * location?).
+ */
+
+ glsl_type_singleton_init_or_ref();
+
+ return compiler;
+}
+
+/**
+ * \brief Destroys and frees a compiler context.
+ *
+ * \param[in] compiler The compiler context.
+ */
+void rogue_compiler_destroy(struct rogue_compiler *compiler)
+{
+ glsl_type_singleton_decref();
+
+ ralloc_free(compiler);
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_COMPILER_H
+#define ROGUE_COMPILER_H
+
+#include "util/macros.h"
+
+struct pvr_device_info;
+
+/**
+ * \brief Compiler context.
+ */
+struct rogue_compiler {
+ const struct pvr_device_info *dev_info;
+};
+
+PUBLIC
+struct rogue_compiler *
+rogue_compiler_create(const struct pvr_device_info *dev_info);
+
+PUBLIC
+void rogue_compiler_destroy(struct rogue_compiler *compiler);
+
+#endif /* ROGUE_COMPILER_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "rogue_constreg.h"
+#include "util/macros.h"
+
+/**
+ * \file rogue_constreg.c
+ *
+ * \brief Contains functions to find and allocate constant register values.
+ */
+
+/**
+ * \brief Mapping of constant register values and their indices.
+ */
+struct rogue_constreg {
+ uint32_t value;
+ size_t index;
+};
+
+#define CONSTREG(VALUE, INDEX) \
+ { \
+ .value = (VALUE), .index = (INDEX), \
+ }
+
+/**
+ * \brief Constant register values (sorted for bsearch).
+ */
+static const struct rogue_constreg const_regs[] = {
+ CONSTREG(0x00000000U, 0U), /* 0 (INT32) / 0.0 (Float) */
+ CONSTREG(0x00000001U, 1U), /* 1 (INT32) */
+ CONSTREG(0x00000002U, 2U), /* 2 (INT32) */
+ CONSTREG(0x00000003U, 3U), /* 3 (INT32) */
+ CONSTREG(0x00000004U, 4U), /* 4 (INT32) */
+ CONSTREG(0x00000005U, 5U), /* 5 (INT32) */
+ CONSTREG(0x00000006U, 6U), /* 6 (INT32) */
+ CONSTREG(0x00000007U, 7U), /* 7 (INT32) */
+ CONSTREG(0x00000008U, 8U), /* 8 (INT32) */
+ CONSTREG(0x00000009U, 9U), /* 9 (INT32) */
+ CONSTREG(0x0000000aU, 10U), /* 10 (INT32) */
+ CONSTREG(0x0000000bU, 11U), /* 11 (INT32) */
+ CONSTREG(0x0000000cU, 12U), /* 12 (INT32) */
+ CONSTREG(0x0000000dU, 13U), /* 13 (INT32) */
+ CONSTREG(0x0000000eU, 14U), /* 14 (INT32) */
+ CONSTREG(0x0000000fU, 15U), /* 15 (INT32) */
+ CONSTREG(0x00000010U, 16U), /* 16 (INT32) */
+ CONSTREG(0x00000011U, 17U), /* 17 (INT32) */
+ CONSTREG(0x00000012U, 18U), /* 18 (INT32) */
+ CONSTREG(0x00000013U, 19U), /* 19 (INT32) */
+ CONSTREG(0x00000014U, 20U), /* 20 (INT32) */
+ CONSTREG(0x00000015U, 21U), /* 21 (INT32) */
+ CONSTREG(0x00000016U, 22U), /* 22 (INT32) */
+ CONSTREG(0x00000017U, 23U), /* 23 (INT32) */
+ CONSTREG(0x00000018U, 24U), /* 24 (INT32) */
+ CONSTREG(0x00000019U, 25U), /* 25 (INT32) */
+ CONSTREG(0x0000001aU, 26U), /* 26 (INT32) */
+ CONSTREG(0x0000001bU, 27U), /* 27 (INT32) */
+ CONSTREG(0x0000001cU, 28U), /* 28 (INT32) */
+ CONSTREG(0x0000001dU, 29U), /* 29 (INT32) */
+ CONSTREG(0x0000001eU, 30U), /* 30 (INT32) */
+ CONSTREG(0x0000001fU, 31U), /* 31 (INT32) */
+ CONSTREG(0x0000007fU, 147U), /* 127 (INT32) */
+
+ CONSTREG(0x37800000U, 134U), /* 1.0f/65536f */
+ CONSTREG(0x38000000U, 135U), /* 1.0f/32768f */
+ CONSTREG(0x38800000U, 88U), /* float(2^-14) */
+ CONSTREG(0x39000000U, 87U), /* float(2^-13) */
+ CONSTREG(0x39800000U, 86U), /* float(2^-12) */
+ CONSTREG(0x3a000000U, 85U), /* float(2^-11) */
+ CONSTREG(0x3a800000U, 84U), /* float(2^-10) */
+ CONSTREG(0x3b000000U, 83U), /* float(2^-9) */
+ CONSTREG(0x3b4d2e1cU, 136U), /* 0.0031308f */
+ CONSTREG(0x3b800000U, 82U), /* float(2^-8) */
+ CONSTREG(0x3c000000U, 81U), /* float(2^-7) */
+ CONSTREG(0x3c800000U, 80U), /* float(2^-6) */
+ CONSTREG(0x3d000000U, 79U), /* float(2^-5) */
+ CONSTREG(0x3d25aee6U, 156U), /* 0.04045f */
+ CONSTREG(0x3d6147aeU, 140U), /* 0.055f */
+ CONSTREG(0x3d800000U, 78U), /* float(2^-4) */
+ CONSTREG(0x3d9e8391U, 157U), /* 1.0f/12.92f */
+ CONSTREG(0x3e000000U, 77U), /* float(2^-3) */
+ CONSTREG(0x3e2aaaabU, 153U), /* 1/6 */
+ CONSTREG(0x3e800000U, 76U), /* float(2^-2) */
+ CONSTREG(0x3e9a209bU, 145U), /* Log_10(2) */
+ CONSTREG(0x3ea2f983U, 128U), /* Float 1/PI */
+ CONSTREG(0x3eaaaaabU, 152U), /* 1/3 */
+ CONSTREG(0x3ebc5ab2U, 90U), /* 1/e */
+ CONSTREG(0x3ed55555U, 138U), /* 1.0f/2.4f */
+ CONSTREG(0x3f000000U, 75U), /* float(2^-1) */
+ CONSTREG(0x3f22f983U, 129U), /* Float 2/PI */
+ CONSTREG(0x3f317218U, 146U), /* Log_e(2) */
+ CONSTREG(0x3f3504f3U, 92U), /* Float 1/SQRT(2) */
+ CONSTREG(0x3f490fdbU, 93U), /* Float PI/4 */
+ CONSTREG(0x3f72a76fU, 158U), /* 1.0f/1.055f */
+ CONSTREG(0x3f800000U, 64U), /* 1.0f */
+ CONSTREG(0x3f860a92U, 151U), /* Pi/3 */
+ CONSTREG(0x3f870a3dU, 139U), /* 1.055f */
+ CONSTREG(0x3fa2f983U, 130U), /* Float 4/PI */
+ CONSTREG(0x3fb504f3U, 91U), /* Float SQRT(2) */
+ CONSTREG(0x3fb8aa3bU, 155U), /* Log_2(e) */
+ CONSTREG(0x3fc90fdbU, 94U), /* Float PI/2 */
+ CONSTREG(0x40000000U, 65U), /* float(2^1) */
+ CONSTREG(0x4019999aU, 159U), /* 2.4f */
+ CONSTREG(0x402df854U, 89U), /* e */
+ CONSTREG(0x40490fdbU, 95U), /* Float PI */
+ CONSTREG(0x40549a78U, 154U), /* Log_2(10) */
+ CONSTREG(0x40800000U, 66U), /* float(2^2) */
+ CONSTREG(0x40c90fdbU, 131U), /* Float 2*PI */
+ CONSTREG(0x41000000U, 67U), /* float(2^3) */
+ CONSTREG(0x41490fdbU, 132U), /* Float 4*PI */
+ CONSTREG(0x414eb852U, 137U), /* 12.92f */
+ CONSTREG(0x41800000U, 68U), /* float(2^4) */
+ CONSTREG(0x41c90fdbU, 133U), /* Float 8*PI */
+ CONSTREG(0x42000000U, 69U), /* float(2^5) */
+ CONSTREG(0x42800000U, 70U), /* float(2^6) */
+ CONSTREG(0x43000000U, 71U), /* float(2^7) */
+ CONSTREG(0x43800000U, 72U), /* float(2^8) */
+ CONSTREG(0x44000000U, 73U), /* float(2^9) */
+ CONSTREG(0x44800000U, 74U), /* float(2^10) */
+ CONSTREG(0x4b000000U, 149U), /* 2^23 */
+ CONSTREG(0x4b800000U, 150U), /* 2^24 */
+ CONSTREG(0x7f7fffffU, 148U), /* FLT_MAX */
+ CONSTREG(0x7f800000U, 142U), /* Infinity */
+ CONSTREG(0x7fff7fffU, 144U), /* ARGB1555 mask */
+ CONSTREG(0x80000000U, 141U), /* -0.0f */
+ CONSTREG(0xffffffffU, 143U), /* -1 */
+};
+
+#undef CONSTREG
+
+/**
+ * \brief Comparison function for bsearch() to support struct rogue_constreg.
+ *
+ * \param[in] lhs The left hand side of the comparison.
+ * \param[in] rhs The right hand side of the comparison.
+ * \return 0 if (lhs == rhs), -1 if (lhs < rhs), 1 if (lhs > rhs).
+ */
+static int constreg_cmp(const void *lhs, const void *rhs)
+{
+ const struct rogue_constreg *l = lhs;
+ const struct rogue_constreg *r = rhs;
+
+ if (l->value < r->value)
+ return -1;
+ else if (l->value > r->value)
+ return 1;
+
+ return 0;
+}
+
+/**
+ * \brief Determines whether a given integer value exists in a constant
+ * register.
+ *
+ * \param[in] value The value required.
+ * \return The index of the constant register containing the value, or
+ * ROGUE_NO_CONST_REG if the value is not found.
+ */
+size_t rogue_constreg_lookup(uint32_t value)
+{
+ struct rogue_constreg constreg_target = {
+ .value = value,
+ };
+ const struct rogue_constreg *constreg;
+
+ constreg = bsearch(&constreg_target,
+ const_regs,
+ ARRAY_SIZE(const_regs),
+ sizeof(struct rogue_constreg),
+ constreg_cmp);
+ if (!constreg)
+ return ROGUE_NO_CONST_REG;
+
+ return constreg->index;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_CONSTREGS_H
+#define ROGUE_CONSTREGS_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "util/macros.h"
+#include "util/u_math.h"
+
+#define ROGUE_NO_CONST_REG SIZE_MAX
+
+PUBLIC
+size_t rogue_constreg_lookup(uint32_t value);
+
+/**
+ * \brief Determines whether a given floating point value exists in a constant
+ * register.
+ *
+ * \param[in] value The value required.
+ * \return The index of the constant register containing the value, or
+ * ROGUE_NO_CONST_REG if the value is not found.
+ */
+static inline size_t rogue_constreg_lookup_float(float value)
+{
+ return rogue_constreg_lookup(fui(value));
+}
+
+#endif /* ROGUE_CONSTREGS_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "rogue_dump.h"
+#include "rogue_shader.h"
+#include "rogue_util.h"
+#include "util/bitscan.h"
+
+/**
+ * \file rogue_dump.c
+ *
+ * \brief Contains functions to dump Rogue data structures into a textual
+ * format.
+ */
+
+static const char *const rogue_operand_string[ROGUE_OPERAND_TYPE_COUNT] = {
+ [ROGUE_OPERAND_TYPE_REG_TEMP] = "r",
+ [ROGUE_OPERAND_TYPE_REG_COEFF] = "cf",
+ [ROGUE_OPERAND_TYPE_REG_CONST] = "c",
+ [ROGUE_OPERAND_TYPE_REG_SHARED] = "sh",
+ [ROGUE_OPERAND_TYPE_REG_PIXEL_OUT] = "po",
+ [ROGUE_OPERAND_TYPE_REG_VERTEX_IN] = "vi",
+ [ROGUE_OPERAND_TYPE_REG_INTERNAL] = "i",
+ [ROGUE_OPERAND_TYPE_IMMEDIATE] = "#",
+ [ROGUE_OPERAND_TYPE_DRC] = "drc",
+ [ROGUE_OPERAND_TYPE_VREG] = "V",
+};
+
+static const char *const rogue_opcode_string[ROGUE_OP_COUNT] = {
+ [ROGUE_OP_NOP] = "nop",
+ [ROGUE_OP_END_FRAG] = "end.frag",
+ [ROGUE_OP_END_VERT] = "end.vert",
+ [ROGUE_OP_WDF] = "wdf",
+ [ROGUE_OP_PIX_ITER_W] = "pixiter.w",
+ [ROGUE_OP_MAX] = "max",
+ [ROGUE_OP_MIN] = "min",
+ [ROGUE_OP_PACK_U8888] = "pack.u8888",
+ [ROGUE_OP_MOV] = "mov",
+ [ROGUE_OP_MOV_IMM] = "mov.imm",
+ [ROGUE_OP_FMA] = "fma",
+ [ROGUE_OP_MUL] = "mul",
+ [ROGUE_OP_VTXOUT] = "vtxout",
+};
+
+static const char *const rogue_instr_flag_string[ROGUE_INSTR_FLAG_COUNT] = {
+ [ROGUE_INSTR_FLAG_SAT] = "sat",
+ [ROGUE_INSTR_FLAG_LP] = "lp",
+ [ROGUE_INSTR_FLAG_OLCHK] = "olchk",
+};
+
+static const char rogue_vector_string[4] = {
+ 'x',
+ 'y',
+ 'z',
+ 'w',
+};
+
+/**
+ * \brief Dumps an operand as text to a file pointer.
+ *
+ * \param[in] operand The operand.
+ * \param[in] fp The file pointer.
+ * \return true if successful, otherwise false.
+ */
+bool rogue_dump_operand(const struct rogue_operand *operand, FILE *fp)
+{
+ ASSERT_OPERAND_RANGE(operand->type);
+
+ fprintf(fp, "%s", rogue_operand_string[operand->type]);
+
+ if (operand->type == ROGUE_OPERAND_TYPE_IMMEDIATE)
+ fprintf(fp, "%" PRIu64, operand->immediate.value);
+ else if (operand->type == ROGUE_OPERAND_TYPE_DRC)
+ fprintf(fp, "%zu", operand->drc.number);
+ else if (rogue_check_bitset(rogue_onehot(operand->type), ROGUE_MASK_ANY_REG))
+ fprintf(fp, "%zu", operand->reg.number);
+ else if (operand->type == ROGUE_OPERAND_TYPE_VREG) {
+ fprintf(fp, "%zu", operand->vreg.number);
+ if (operand->vreg.is_vector)
+ fprintf(fp, ".%c", rogue_vector_string[operand->vreg.component]);
+ }
+
+ return true;
+}
+
+/**
+ * \brief Dumps an instruction as text to a file pointer.
+ *
+ * \param[in] instr The instruction.
+ * \param[in] fp The file pointer.
+ * \return true if successful, otherwise false.
+ */
+bool rogue_dump_instr(const struct rogue_instr *instr, FILE *fp)
+{
+ uint64_t flags = 0U;
+
+ ASSERT_OPCODE_RANGE(instr->opcode);
+
+ flags = instr->flags;
+
+ fprintf(fp, "%s", rogue_opcode_string[instr->opcode]);
+
+ /* Iterate over each flag bit and print its string form. */
+ while (flags) {
+ uint64_t flag = u_bit_scan64(&flags);
+ ASSERT_INSTR_FLAG_RANGE(flag);
+ fprintf(fp, ".%s", rogue_instr_flag_string[flag]);
+ }
+
+ if (instr->num_operands)
+ fprintf(fp, " ");
+
+ /* Dump each operand. */
+ for (size_t u = 0U; u < instr->num_operands; ++u) {
+ CHECKF(rogue_dump_operand(&instr->operands[u], fp),
+ "Failed to dump operand.");
+ if (u < (instr->num_operands - 1))
+ fprintf(fp, ", ");
+ }
+
+ fprintf(fp, ";");
+
+ return true;
+}
+
+/**
+ * \brief Dumps a shader as text to a file pointer.
+ *
+ * \param[in] shader The shader.
+ * \param[in] fp The file pointer.
+ * \return true if successful, otherwise false.
+ */
+bool rogue_dump_shader(const struct rogue_shader *shader, FILE *fp)
+{
+ /* Dump the shader stage. */
+ fprintf(fp, "# %s shader\n", _mesa_shader_stage_to_string(shader->stage));
+
+ /* Dump each instruction. */
+ foreach_instr (instr, &shader->instr_list) {
+ CHECKF(rogue_dump_instr(instr, fp), "Failed to dump instruction.");
+ fprintf(fp, "\n");
+ }
+ fprintf(fp, "\n");
+
+ return true;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_DUMP_H
+#define ROGUE_DUMP_H
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "rogue_instr.h"
+#include "rogue_operand.h"
+#include "rogue_shader.h"
+#include "util/macros.h"
+
+PUBLIC
+bool rogue_dump_operand(const struct rogue_operand *operand, FILE *fp);
+
+PUBLIC
+bool rogue_dump_instr(const struct rogue_instr *instr, FILE *fp);
+
+PUBLIC
+bool rogue_dump_shader(const struct rogue_shader *shader, FILE *fp);
+
+#endif /* ROGUE_DUMP_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "rogue_encode.h"
+#include "rogue_encoders.h"
+#include "rogue_operand.h"
+#include "rogue_shader.h"
+#include "rogue_util.h"
+#include "util/bitscan.h"
+#include "util/macros.h"
+
+static size_t rogue_encode_reg_bank(const struct rogue_operand *operand)
+{
+ switch (operand->type) {
+ case ROGUE_OPERAND_TYPE_REG_INTERNAL:
+ case ROGUE_OPERAND_TYPE_REG_PIXEL_OUT:
+ case ROGUE_OPERAND_TYPE_REG_CONST:
+ return 0;
+ case ROGUE_OPERAND_TYPE_REG_TEMP:
+ return 1;
+ case ROGUE_OPERAND_TYPE_REG_VERTEX_IN:
+ return 2;
+ case ROGUE_OPERAND_TYPE_REG_COEFF:
+ return 3;
+ case ROGUE_OPERAND_TYPE_REG_SHARED:
+ return 4;
+ default:
+ break;
+ }
+
+ unreachable("Unimplemented register bank.");
+}
+
+/**
+ * \brief Field mapping type.
+ */
+enum rogue_map_type {
+ ROGUE_MAP_TYPE_INSTR_FLAG = 0,
+ ROGUE_MAP_TYPE_OPERAND_FLAG,
+ ROGUE_MAP_TYPE_OPERAND,
+
+ ROGUE_MAP_TYPE_COUNT,
+};
+
+/**
+ * \brief Field mapping rule description.
+ */
+struct rogue_field_mapping {
+ /* Type of mapping being performed. */
+ enum rogue_map_type type;
+
+ /* Index of the source operand/flag being mapped. */
+ size_t index;
+
+ /* List of ranges to perform mapping. */
+ struct rogue_rangelist rangelist;
+
+ /* Function used to encode the input into the value to be mapped. */
+ field_encoder_t encoder_fn;
+};
+
+/**
+ * \brief Instruction encoding rule description.
+ */
+struct rogue_instr_encoding {
+ /* Number of bytes making up the base mask. */
+ size_t num_bytes;
+ /* Base mask bytes. */
+ uint8_t *bytes;
+
+ /* Number of field mappings for this instruction. */
+ size_t num_mappings;
+ /* Field mappings. */
+ struct rogue_field_mapping *mappings;
+};
+
+static const
+struct rogue_instr_encoding instr_encodings[ROGUE_OP_COUNT] = {
+ [ROGUE_OP_NOP] = {
+ .num_bytes = 8,
+ .bytes = (uint8_t []) { 0x04, 0x80, 0x6e, 0x00, 0xf2, 0xff, 0xff, 0xff },
+ },
+
+ [ROGUE_OP_END_FRAG] = {
+ .num_bytes = 8,
+ .bytes = (uint8_t []) { 0x04, 0x80, 0xee, 0x00, 0xf2, 0xff, 0xff, 0xff },
+ },
+
+ [ROGUE_OP_END_VERT] = {
+ .num_bytes = 8,
+ .bytes = (uint8_t []) { 0x44, 0xa0, 0x80, 0x05, 0x00, 0x00, 0x00, 0xff },
+ },
+
+ [ROGUE_OP_WDF] = {
+ .num_bytes = 8,
+ .bytes = (uint8_t []) { 0x04, 0x80, 0x6a, 0xff, 0xf2, 0xff, 0xff, 0xff },
+ .num_mappings = 1,
+ .mappings = (struct rogue_field_mapping []) {
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 0,
+ .rangelist = {
+ .num_ranges = 1,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 47, .num = 1, },
+ },
+ },
+ .encoder_fn = &rogue_encoder_drc,
+ },
+ },
+ },
+
+ [ROGUE_OP_PIX_ITER_W] = {
+ .num_bytes = 16,
+ .bytes = (uint8_t []) { 0x48, 0x20, 0xb0, 0x01, 0x80, 0x40, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0xff, 0xf1, 0xff },
+ .num_mappings = 6,
+ .mappings = (struct rogue_field_mapping []) {
+ /* Instruction flag mappings. */
+ {
+ .type = ROGUE_MAP_TYPE_INSTR_FLAG,
+ .index = ROGUE_INSTR_FLAG_SAT,
+ .rangelist = {
+ .num_ranges = 1,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 100, .num = 1, },
+ },
+ },
+ .encoder_fn = NULL,
+ },
+ /* Operand mappings. */
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 0,
+ .rangelist = {
+ .num_ranges = 5,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 43, .num = 2, }, /* SB3(2..1) */
+ { .start = 54, .num = 1, }, /* SB3(0) */
+ { .start = 34, .num = 3, }, /* S3(10..8) */
+ { .start = 41, .num = 2, }, /* S3(7..6) */
+ { .start = 53, .num = 6, }, /* S3(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_11,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 1,
+ .rangelist = {
+ .num_ranges = 1,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 59, .num = 1, },
+ },
+ },
+ .encoder_fn = &rogue_encoder_drc,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 2,
+ .rangelist = {
+ .num_ranges = 6,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 59, .num = 1, }, /* SB0(2) */
+ { .start = 76, .num = 1, }, /* SB0(1) */
+ { .start = 94, .num = 1, }, /* SB0(0) */
+ { .start = 57, .num = 1, }, /* S0(7) */
+ { .start = 74, .num = 1, }, /* S0(6) */
+ { .start = 93, .num = 6, }, /* S0(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_8,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 3,
+ .rangelist = {
+ .num_ranges = 4,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 63, .num = 1, }, /* SB2(2) */
+ { .start = 71, .num = 2, }, /* SB2(1..0) */
+ { .start = 62, .num = 2, }, /* S2(7..6) */
+ { .start = 69, .num = 6, }, /* S2(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_8,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 4,
+ .rangelist = {
+ .num_ranges = 1,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 99, .num = 4, },
+ },
+ },
+ .encoder_fn = &rogue_encoder_ls_1_16,
+ },
+ },
+ },
+
+ [ROGUE_OP_MAX] = {
+ .num_bytes = 16,
+ .bytes = (uint8_t []) { 0x68, 0x42, 0xd0, 0x3c, 0xfa, 0x10, 0x87, 0x80, 0xc0, 0x80, 0x10, 0x00, 0x32, 0x80, 0x00, 0xff },
+ .num_mappings = 3,
+ .mappings = (struct rogue_field_mapping []) {
+ /* Operand mappings. */
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 0,
+ .rangelist = {
+ .num_ranges = 5,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 11, .num = 2, }, /* DBn(2..1) */
+ { .start = 22, .num = 1, }, /* DBn(0) */
+ { .start = 14, .num = 3, }, /* Dn(10..8) */
+ { .start = 9, .num = 2, }, /* Dn(7..6) */
+ { .start = 21, .num = 6, }, /* Dn(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_11,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 1,
+ .rangelist = {
+ .num_ranges = 7,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 43, .num = 1, }, /* SB0(2) */
+ { .start = 52, .num = 1, }, /* SB0(1) */
+ { .start = 70, .num = 1, }, /* SB0(0) */
+ { .start = 47, .num = 3, }, /* S0(10..8) */
+ { .start = 41, .num = 1, }, /* S0(7) */
+ { .start = 50, .num = 1, }, /* S0(6) */
+ { .start = 69, .num = 6, }, /* S0(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_11,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 2,
+ .rangelist = {
+ .num_ranges = 5,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 51, .num = 1, }, /* SB1(1) */
+ { .start = 61, .num = 1, }, /* SB1(0) */
+ { .start = 40, .num = 1, }, /* S1(7) */
+ { .start = 49, .num = 2, }, /* S1(6..5) */
+ { .start = 60, .num = 5, }, /* S1(4..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_2_8,
+ },
+ },
+ },
+
+ [ROGUE_OP_MIN] = {
+ .num_bytes = 16,
+ .bytes = (uint8_t []) { 0x68, 0x42, 0xd0, 0x3c, 0xf0, 0x11, 0x87, 0x80, 0xc0, 0x80, 0x10, 0x00, 0x32, 0x80, 0x00, 0xff },
+ .num_mappings = 3,
+ .mappings = (struct rogue_field_mapping []) {
+ /* Operand mappings. */
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 0,
+ .rangelist = {
+ .num_ranges = 5,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 11, .num = 2, }, /* DBn(2..1) */
+ { .start = 22, .num = 1, }, /* DBn(0) */
+ { .start = 14, .num = 3, }, /* Dn(10..8) */
+ { .start = 9, .num = 2, }, /* Dn(7..6) */
+ { .start = 21, .num = 6, }, /* Dn(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_11,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 1,
+ .rangelist = {
+ .num_ranges = 7,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 43, .num = 1, }, /* SB0(2) */
+ { .start = 52, .num = 1, }, /* SB0(1) */
+ { .start = 70, .num = 1, }, /* SB0(0) */
+ { .start = 47, .num = 3, }, /* S0(10..8) */
+ { .start = 41, .num = 1, }, /* S0(7) */
+ { .start = 50, .num = 1, }, /* S0(6) */
+ { .start = 69, .num = 6, }, /* S0(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_11,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 2,
+ .rangelist = {
+ .num_ranges = 5,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 51, .num = 1, }, /* SB1(1) */
+ { .start = 61, .num = 1, }, /* SB1(0) */
+ { .start = 40, .num = 1, }, /* S1(7) */
+ { .start = 49, .num = 2, }, /* S1(6..5) */
+ { .start = 60, .num = 5, }, /* S1(4..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_2_8,
+ },
+ },
+ },
+
+ [ROGUE_OP_PACK_U8888] = {
+ .num_bytes = 16,
+ .bytes = (uint8_t []) { 0x58, 0x92, 0x06, 0x9c, 0x20, 0x80, 0x00, 0x00, 0x00, 0x2c, 0x80, 0x00, 0xf2, 0xff, 0xff, 0xff },
+ .num_mappings = 2,
+ .mappings = (struct rogue_field_mapping []) {
+ /* Operand mappings. */
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 0,
+ .rangelist = {
+ .num_ranges = 5,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 35, .num = 2, }, /* DBn(2..1) */
+ { .start = 46, .num = 1, }, /* DBn(0) */
+ { .start = 38, .num = 3, }, /* Dn(10..8) */
+ { .start = 33, .num = 2, }, /* Dn(7..6) */
+ { .start = 45, .num = 6, }, /* Dn(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_11,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 1,
+ .rangelist = {
+ .num_ranges = 5,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 75, .num = 2, }, /* SB0(2..1) */
+ { .start = 86, .num = 1, }, /* SB0(0) */
+ { .start = 66, .num = 3, }, /* S0(10..8) */
+ { .start = 73, .num = 2, }, /* S0(7..6) */
+ { .start = 85, .num = 6, }, /* S0(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_11,
+ },
+ },
+ },
+
+ [ROGUE_OP_MOV] = {
+ .num_bytes = 16,
+ .bytes = (uint8_t []) { 0x48, 0x42, 0xd0, 0x3f, 0x87, 0x80, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0xf2, 0xff, 0xff, 0xff },
+ .num_mappings = 3,
+ .mappings = (struct rogue_field_mapping []) {
+ /* Instruction flag mappings. */
+ {
+ .type = ROGUE_MAP_TYPE_INSTR_FLAG,
+ .index = ROGUE_INSTR_FLAG_OLCHK,
+ .rangelist = {
+ .num_ranges = 1,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 115, .num = 1, },
+ },
+ },
+ .encoder_fn = NULL,
+ },
+ /* Operand mappings. */
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 0,
+ .rangelist = {
+ .num_ranges = 5,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 35, .num = 2, }, /* DBn(2..1) */
+ { .start = 46, .num = 1, }, /* DBn(0) */
+ { .start = 38, .num = 3, }, /* Dn(10..8) */
+ { .start = 33, .num = 2, }, /* Dn(7..6) */
+ { .start = 45, .num = 6, }, /* Dn(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_11,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 1,
+ .rangelist = {
+ .num_ranges = 5,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 75, .num = 2, }, /* SB0(2..1) */
+ { .start = 86, .num = 1, }, /* SB0(0) */
+ { .start = 66, .num = 3, }, /* S0(10..8) */
+ { .start = 73, .num = 2, }, /* S0(7..6) */
+ { .start = 85, .num = 6, }, /* S0(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_11,
+ },
+ },
+ },
+
+ [ROGUE_OP_MOV_IMM] = {
+ .num_bytes = 16,
+ .bytes = (uint8_t []) { 0x88, 0x92, 0x40, 0x91, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0xf2, 0xff, 0xff, 0xff },
+ .num_mappings = 2,
+ .mappings = (struct rogue_field_mapping []) {
+ /* Operand mappings. */
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 0,
+ .rangelist = {
+ .num_ranges = 5,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 35, .num = 2, }, /* DBn(2..1) */
+ { .start = 46, .num = 1, }, /* DBn(0) */
+ { .start = 38, .num = 3, }, /* Dn(10..8) */
+ { .start = 33, .num = 2, }, /* Dn(7..6) */
+ { .start = 45, .num = 6, }, /* Dn(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_11,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 1,
+ .rangelist = {
+ .num_ranges = 4,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 71, .num = 8, }, /* imm(31:24) */
+ { .start = 79, .num = 8, }, /* imm(23:16) */
+ { .start = 87, .num = 8, }, /* imm(15:8) */
+ { .start = 95, .num = 8, }, /* imm(7:0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_imm,
+ },
+ },
+ },
+
+ [ROGUE_OP_FMA] = {
+ .num_bytes = 16,
+ .bytes = (uint8_t []) { 0x28, 0x02, 0xd0, 0x00, 0x80, 0x40, 0x80, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0xff, 0xf1, 0xff },
+ .num_mappings = 6,
+ .mappings = (struct rogue_field_mapping []) {
+ /* Instruction flag mappings. */
+ {
+ .type = ROGUE_MAP_TYPE_INSTR_FLAG,
+ .index = ROGUE_INSTR_FLAG_SAT,
+ .rangelist = {
+ .num_ranges = 1,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 104, .num = 1, },
+ },
+ },
+ .encoder_fn = NULL,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_INSTR_FLAG,
+ .index = ROGUE_INSTR_FLAG_LP,
+ .rangelist = {
+ .num_ranges = 1,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 100, .num = 1, },
+ },
+ },
+ .encoder_fn = NULL,
+ },
+ /* Operand mappings. */
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 0,
+ .rangelist = {
+ .num_ranges = 5,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 27, .num = 2, }, /* DBn(2..1) */
+ { .start = 38, .num = 1, }, /* DBn(0) */
+ { .start = 30, .num = 3, }, /* Dn(10..8) */
+ { .start = 25, .num = 2, }, /* Dn(7..6) */
+ { .start = 37, .num = 6, }, /* Dn(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_11,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 1,
+ .rangelist = {
+ .num_ranges = 6,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 59, .num = 1, }, /* SB0(2) */
+ { .start = 76, .num = 1, }, /* SB0(1) */
+ { .start = 94, .num = 1, }, /* SB0(0) */
+ { .start = 57, .num = 1, }, /* S0(7) */
+ { .start = 74, .num = 1, }, /* S0(6) */
+ { .start = 93, .num = 6, }, /* S0(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_8,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 2,
+ .rangelist = {
+ .num_ranges = 5,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 75, .num = 1, }, /* SB1(1) */
+ { .start = 85, .num = 1, }, /* SB1(0) */
+ { .start = 56, .num = 1, }, /* S1(7) */
+ { .start = 73, .num = 2, }, /* S1(6..5) */
+ { .start = 84, .num = 5, }, /* S1(4..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_2_8,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 3,
+ .rangelist = {
+ .num_ranges = 4,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 63, .num = 1, }, /* SB2(2) */
+ { .start = 71, .num = 2, }, /* SB2(1..0) */
+ { .start = 62, .num = 2, }, /* S2(7..6) */
+ { .start = 69, .num = 6, }, /* S2(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_8,
+ },
+ },
+ },
+
+ [ROGUE_OP_MUL] = {
+ .num_bytes = 16,
+ .bytes = (uint8_t []) { 0x28, 0x02, 0x40, 0x80, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0xff, 0xf2, 0xff, 0xff, 0xff },
+ .num_mappings = 5,
+ .mappings = (struct rogue_field_mapping []) {
+ /* Instruction flag mappings. */
+ {
+ .type = ROGUE_MAP_TYPE_INSTR_FLAG,
+ .index = ROGUE_INSTR_FLAG_SAT,
+ .rangelist = {
+ .num_ranges = 1,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 108, .num = 1, },
+ },
+ },
+ .encoder_fn = NULL,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_INSTR_FLAG,
+ .index = ROGUE_INSTR_FLAG_LP,
+ .rangelist = {
+ .num_ranges = 1,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 109, .num = 1, },
+ },
+ },
+ .encoder_fn = NULL,
+ },
+ /* Operand mappings. */
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 0,
+ .rangelist = {
+ .num_ranges = 5,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 43, .num = 2, }, /* DBn(2..1) */
+ { .start = 54, .num = 1, }, /* DBn(0) */
+ { .start = 46, .num = 3, }, /* Dn(10..8) */
+ { .start = 41, .num = 2, }, /* Dn(7..6) */
+ { .start = 53, .num = 6, }, /* Dn(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_11,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 1,
+ .rangelist = {
+ .num_ranges = 7,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 75, .num = 1, }, /* SB0(2) */
+ { .start = 84, .num = 1, }, /* SB0(1) */
+ { .start = 102, .num = 1, }, /* SB0(0) */
+ { .start = 79, .num = 3, }, /* S0(10..8) */
+ { .start = 73, .num = 1, }, /* S0(7) */
+ { .start = 82, .num = 1, }, /* S0(6) */
+ { .start = 101, .num = 6, }, /* S0(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_11,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 2,
+ .rangelist = {
+ .num_ranges = 5,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 83, .num = 1, }, /* SB1(1) */
+ { .start = 93, .num = 1, }, /* SB1(0) */
+ { .start = 72, .num = 1, }, /* S1(7) */
+ { .start = 81, .num = 2, }, /* S1(6..5) */
+ { .start = 92, .num = 5, }, /* S1(4..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_2_8,
+ },
+ },
+ },
+
+ [ROGUE_OP_VTXOUT] = {
+ .num_bytes = 16,
+ .bytes = (uint8_t []) { 0x48, 0x20, 0x08, 0x00, 0x80, 0x00, 0x00, 0x00, 0x30, 0xff, 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff },
+ .num_mappings = 2,
+ .mappings = (struct rogue_field_mapping []) {
+ /* Operand mappings. */
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 0,
+ .rangelist = {
+ .num_ranges = 1,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 103, .num = 8, }, /* Immediate address. */
+ },
+ },
+ .encoder_fn = &rogue_encoder_imm,
+ },
+ {
+ .type = ROGUE_MAP_TYPE_OPERAND,
+ .index = 1,
+ .rangelist = {
+ .num_ranges = 5,
+ .ranges = (struct rogue_bitrange []) {
+ { .start = 83, .num = 2, }, /* SB0(2..1) */
+ { .start = 94, .num = 1, }, /* SB0(0) */
+ { .start = 74, .num = 3, }, /* S0(10..8) */
+ { .start = 81, .num = 2, }, /* S0(7..6) */
+ { .start = 93, .num = 6, }, /* S0(5..0) */
+ },
+ },
+ .encoder_fn = &rogue_encoder_reg_3_11,
+ },
+ },
+ },
+};
+
+/**
+ * \brief Applies a boolean flag encoding onto an instruction mask.
+ *
+ * \param[in] set Whether to set/unset the flag.
+ * \param[in] mapping The field mapping to apply.
+ * \param[in] instr_size The size of the instruction mask in bytes.
+ * \param[in] instr_bytes The instruction mask.
+ * \return true if encoding was successful.
+ */
+static bool rogue_encode_flag(bool set,
+ const struct rogue_field_mapping *mapping,
+ size_t instr_size,
+ uint8_t instr_bytes[instr_size])
+{
+ return rogue_distribute_value((uint64_t)set,
+ &mapping->rangelist,
+ instr_size,
+ instr_bytes);
+}
+
+/**
+ * \brief Applies an operand encoding onto an instruction mask.
+ *
+ * \param[in] operand The operand to apply.
+ * \param[in] mapping The field mapping to apply.
+ * \param[in] instr_size The size of the instruction mask in bytes.
+ * \param[in] instr_bytes The instruction mask.
+ * \return true if encoding was successful.
+ */
+static bool rogue_encode_operand(const struct rogue_operand *operand,
+ const struct rogue_field_mapping *mapping,
+ size_t instr_size,
+ uint8_t instr_bytes[instr_size])
+{
+ uint64_t value = 0U;
+
+ switch (operand->type) {
+ case ROGUE_OPERAND_TYPE_REG_PIXEL_OUT:
+ CHECKF(
+ mapping->encoder_fn(&value,
+ 2,
+ rogue_encode_reg_bank(operand),
+ operand->reg.number + ROGUE_PIXEL_OUT_REG_OFFSET),
+ "Failed to encode pixel output register operand.");
+ break;
+ case ROGUE_OPERAND_TYPE_REG_INTERNAL:
+ CHECKF(
+ mapping->encoder_fn(&value,
+ 2,
+ rogue_encode_reg_bank(operand),
+ operand->reg.number + ROGUE_INTERNAL_REG_OFFSET),
+ "Failed to encode internal register operand.");
+ break;
+ case ROGUE_OPERAND_TYPE_REG_TEMP:
+ case ROGUE_OPERAND_TYPE_REG_COEFF:
+ case ROGUE_OPERAND_TYPE_REG_CONST:
+ case ROGUE_OPERAND_TYPE_REG_SHARED:
+ case ROGUE_OPERAND_TYPE_REG_VERTEX_IN:
+ CHECKF(mapping->encoder_fn(&value,
+ 2,
+ rogue_encode_reg_bank(operand),
+ operand->reg.number),
+ "Failed to encode register operand.");
+ break;
+
+ case ROGUE_OPERAND_TYPE_IMMEDIATE:
+ CHECKF(mapping->encoder_fn(&value, 1, operand->immediate.value),
+ "Failed to encode immediate operand.");
+ break;
+
+ case ROGUE_OPERAND_TYPE_DRC:
+ CHECKF(mapping->encoder_fn(&value, 1, (uint64_t)operand->drc.number),
+ "Failed to encode DRC operand.");
+ break;
+
+ default:
+ return false;
+ }
+
+ CHECKF(rogue_distribute_value(value,
+ &mapping->rangelist,
+ instr_size,
+ instr_bytes),
+ "Failed to distribute value.");
+
+ return true;
+}
+
+/**
+ * \brief Applies operand and flag encodings to the base instruction bytes, then
+ * writes the result to file pointer "fp".
+ *
+ * \param[in] instr The instruction to be encoded.
+ * \param[in] fp The file pointer.
+ * \return true if encoding was successful.
+ */
+bool rogue_encode_instr(const struct rogue_instr *instr, FILE *fp)
+{
+ const struct rogue_instr_encoding *instr_encoding;
+ size_t instr_size;
+ uint8_t instr_bytes[ROGUE_MAX_INSTR_BYTES];
+
+ ASSERT_OPCODE_RANGE(instr->opcode);
+
+ instr_encoding = &instr_encodings[instr->opcode];
+
+ /* Set up base instruction bytes. */
+ instr_size = instr_encoding->num_bytes;
+ assert(instr_size <= ARRAY_SIZE(instr_bytes));
+ memcpy(instr_bytes, instr_encoding->bytes, instr_size);
+
+ /* Encode the operands and flags. */
+ for (size_t u = 0U; u < instr_encoding->num_mappings; ++u) {
+ const struct rogue_field_mapping *mapping = &instr_encoding->mappings[u];
+
+ switch (mapping->type) {
+ case ROGUE_MAP_TYPE_INSTR_FLAG: {
+ uint64_t flag = rogue_onehot(mapping->index);
+ CHECKF(rogue_encode_flag(!!(instr->flags & flag),
+ mapping,
+ instr_size,
+ instr_bytes),
+ "Failed to encode instruction flag.");
+ break;
+ }
+
+ case ROGUE_MAP_TYPE_OPERAND_FLAG:
+ return false;
+
+ case ROGUE_MAP_TYPE_OPERAND: {
+ size_t operand_index = mapping->index;
+ CHECKF(rogue_encode_operand(&instr->operands[operand_index],
+ mapping,
+ instr_size,
+ instr_bytes),
+ "Failed to encode instruction operand.");
+ break;
+ }
+
+ default:
+ return false;
+ }
+ }
+
+ CHECKF(fwrite(instr_bytes, 1, instr_size, fp) == instr_size,
+ "Failed to write encoded instruction bytes.");
+ fflush(fp);
+
+ return true;
+}
+
+/**
+ * \brief Encodes each instruction in "shader", writing the output to "fp".
+ *
+ * \param[in] shader The shader to be encoded.
+ * \param[in] fp The file pointer.
+ * \return true if encoding was successful.
+ */
+bool rogue_encode_shader(const struct rogue_shader *shader, FILE *fp)
+{
+ long bytes_written;
+
+ /* Encode each instruction. */
+ foreach_instr (instr, &shader->instr_list)
+ CHECKF(rogue_encode_instr(instr, fp), "Failed to encode instruction.");
+
+ /* Pad end of shader if required. */
+ bytes_written = ftell(fp);
+ if (bytes_written <= 0)
+ return false;
+
+ /* FIXME: Figure out the define for alignment of 16. */
+ for (size_t u = 0; u < (bytes_written % 16); ++u)
+ fputc(0xff, fp);
+
+ return true;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_ENCODE_H
+#define ROGUE_ENCODE_H
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "util/macros.h"
+
+struct rogue_instr;
+struct rogue_shader;
+
+PUBLIC
+bool rogue_encode_instr(const struct rogue_instr *instr, FILE *fp);
+
+PUBLIC
+bool rogue_encode_shader(const struct rogue_shader *shader, FILE *fp);
+
+#endif /* ROGUE_ENCODE_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "rogue_encoders.h"
+#include "rogue_util.h"
+#include "util/bitscan.h"
+
+/**
+ * \brief Passes the input value through unchanged.
+ *
+ * \param[in] value Pointer to the destination value.
+ * \param[in] inputs Number of inputs provided.
+ * \param[in] ... Input value(s).
+ * \return true if encoding was successful.
+ */
+bool rogue_encoder_pass(uint64_t *value, size_t inputs, ...)
+{
+ va_list args;
+
+ assert(inputs == 1);
+
+ va_start(args, inputs);
+ *value = va_arg(args, uint64_t);
+ va_end(args);
+
+ return true;
+}
+
+/**
+ * \brief Encoder for DRC values.
+ *
+ * \sa #rogue_encoder_pass()
+ *
+ * \param[in] value Pointer to the destination value.
+ * \param[in] inputs Number of inputs provided.
+ * \param[in] ... Input value(s).
+ * \return true if encoding was successful.
+ */
+bool rogue_encoder_drc(uint64_t *value, size_t inputs, ...)
+ __attribute__((alias("rogue_encoder_pass")));
+
+/**
+ * \brief Encoder for immediate values.
+ *
+ * \sa #rogue_encoder_pass()
+ *
+ * \param[in] value Pointer to the destination value.
+ * \param[in] inputs Number of inputs provided.
+ * \param[in] ... Input value(s).
+ * \return true if encoding was successful.
+ */
+bool rogue_encoder_imm(uint64_t *value, size_t inputs, ...)
+ __attribute__((alias("rogue_encoder_pass")));
+
+/**
+ * \brief Encodes input ranges {1..15 -> 1-15} and {16 -> 0}.
+ *
+ * The input should be in the range 1-16; the function represents 1-15 normally
+ * and represents 16 by 0.
+ *
+ * \param[in] value Pointer to the destination value.
+ * \param[in] inputs Number of inputs provided.
+ * \param[in] ... Input value(s).
+ * \return true if encoding was successful.
+ */
+bool rogue_encoder_ls_1_16(uint64_t *value, size_t inputs, ...)
+{
+ va_list args;
+ uint64_t input;
+
+ assert(inputs == 1);
+
+ va_start(args, inputs);
+ input = va_arg(args, uint64_t);
+ va_end(args);
+
+ /* Validate the input range. */
+ if (!input || input > 16) {
+ *value = UINT64_MAX;
+ return false;
+ }
+
+ *value = input % 16;
+
+ return true;
+}
+
+/**
+ * \brief Encodes registers according to the number of bits needed to specify
+ * the bank number and register number.
+ *
+ * \param[in] value Pointer to the destination value.
+ * \param[in] bank_bits The number of bits used to represent the register bank.
+ * \param[in] bank the register bank
+ * \param[in] num_bits The number of bits used to represent the register number.
+ * \param[in] num The register number.
+ * \return true if encoding was successful.
+ */
+static bool rogue_encoder_reg(uint64_t *value,
+ size_t bank_bits,
+ size_t bank,
+ size_t num_bits,
+ size_t num)
+{
+ /* Verify "num" fits in "num_bits" and "bank" fits in "bank_bits". */
+ assert(util_last_bit64(num) <= num_bits);
+ assert(util_last_bit64(bank) <= bank_bits);
+
+ *value = num;
+ *value |= (bank << num_bits);
+
+ return true;
+}
+
+/**
+ * \brief Macro to define the rogue_encoder_reg variants.
+ */
+#define ROGUE_ENCODER_REG_VARIANT(bank_bits, num_bits) \
+ bool rogue_encoder_reg_##bank_bits##_##num_bits(uint64_t *value, \
+ size_t inputs, \
+ ...) \
+ { \
+ va_list args; \
+ size_t bank; \
+ size_t num; \
+ assert(inputs == 2); \
+ va_start(args, inputs); \
+ bank = va_arg(args, size_t); \
+ num = va_arg(args, size_t); \
+ va_end(args); \
+ return rogue_encoder_reg(value, bank_bits, bank, num_bits, num); \
+ }
+
+ROGUE_ENCODER_REG_VARIANT(2, 8)
+ROGUE_ENCODER_REG_VARIANT(3, 8)
+ROGUE_ENCODER_REG_VARIANT(3, 11)
+
+#undef ROGUE_ENCODER_REG_VARIANT
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_ENCODERS_H
+#define ROGUE_ENCODERS_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "util/macros.h"
+
+/* Returns false if input was invalid. */
+typedef bool (*field_encoder_t)(uint64_t *value, size_t inputs, ...);
+
+bool rogue_encoder_pass(uint64_t *value, size_t inputs, ...);
+bool rogue_encoder_drc(uint64_t *value, size_t inputs, ...);
+bool rogue_encoder_imm(uint64_t *value, size_t inputs, ...);
+bool rogue_encoder_ls_1_16(uint64_t *value, size_t inputs, ...);
+
+/**
+ * \brief Macro to declare the rogue_encoder_reg variants.
+ */
+#define ROGUE_ENCODER_REG_VARIANT(bank_bits, num_bits) \
+ bool rogue_encoder_reg_##bank_bits##_##num_bits(uint64_t *value, \
+ size_t inputs, \
+ ...);
+ROGUE_ENCODER_REG_VARIANT(2, 8)
+ROGUE_ENCODER_REG_VARIANT(3, 8)
+ROGUE_ENCODER_REG_VARIANT(3, 11)
+#undef ROGUE_ENCODER_REG_VARIANT
+
+#endif /* ROGUE_ENCODERS_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "rogue_instr.h"
+#include "rogue_operand.h"
+#include "rogue_util.h"
+#include "util/ralloc.h"
+
+/**
+ * \file rogue_instr.c
+ *
+ * \brief Contains functions to manipulate Rogue instructions.
+ */
+
+/* clang-format off */
+
+static const size_t instr_operand_count[ROGUE_OP_COUNT] = {
+ [ROGUE_OP_NOP] = 0,
+ [ROGUE_OP_END_FRAG] = 0,
+ [ROGUE_OP_END_VERT] = 0,
+ [ROGUE_OP_WDF] = 1,
+ [ROGUE_OP_PIX_ITER_W] = 5,
+ [ROGUE_OP_MAX] = 3,
+ [ROGUE_OP_MIN] = 3,
+ [ROGUE_OP_PACK_U8888] = 2,
+ [ROGUE_OP_MOV] = 2,
+ [ROGUE_OP_MOV_IMM] = 2,
+ [ROGUE_OP_FMA] = 4,
+ [ROGUE_OP_MUL] = 3,
+ [ROGUE_OP_VTXOUT] = 2,
+};
+
+/* clang-format on */
+
+/**
+ * \brief Returns the number of operands an instruction takes.
+ *
+ * \param[in] opcode The instruction opcode.
+ * \return The number of operands.
+ */
+static inline size_t rogue_instr_num_operands(enum rogue_opcode opcode)
+{
+ ASSERT_OPCODE_RANGE(opcode);
+
+ return instr_operand_count[opcode];
+}
+
+/**
+ * \brief Allocates and sets up a Rogue instruction.
+ *
+ * \param[in] mem_ctx The memory context for the instruction.
+ * \param[in] opcode The instruction opcode.
+ * \return A rogue_instr* if successful, or NULL if unsuccessful.
+ */
+struct rogue_instr *rogue_instr_create(void *mem_ctx, enum rogue_opcode opcode)
+{
+ struct rogue_instr *instr;
+
+ ASSERT_OPCODE_RANGE(opcode);
+
+ instr = rzalloc_size(mem_ctx, sizeof(*instr));
+ if (!instr)
+ return NULL;
+
+ instr->opcode = opcode;
+ instr->num_operands = rogue_instr_num_operands(opcode);
+
+ /* Allocate space for operand array. */
+ if (instr->num_operands) {
+ instr->operands = rzalloc_array_size(instr,
+ sizeof(*instr->operands),
+ instr->num_operands);
+ if (!instr->operands) {
+ ralloc_free(instr);
+ return NULL;
+ }
+ }
+
+ return instr;
+}
+
+/**
+ * \brief Sets a Rogue instruction flag.
+ *
+ * \param[in] instr The instruction.
+ * \param[in] flag The flag to set.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_instr_set_flag(struct rogue_instr *instr, enum rogue_instr_flag flag)
+{
+ instr->flags = ROH(flag);
+
+ return true;
+}
+
+/**
+ * \brief Sets a Rogue instruction operand to an immediate value.
+ *
+ * \param[in] instr The instruction.
+ * \param[in] index The operand index.
+ * \param[in] value The value to set.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_instr_set_operand_imm(struct rogue_instr *instr,
+ size_t index,
+ uint64_t value)
+{
+ ASSERT_INSTR_OPERAND_INDEX(instr, index);
+
+ instr->operands[index].type = ROGUE_OPERAND_TYPE_IMMEDIATE;
+ instr->operands[index].immediate.value = value;
+
+ return true;
+}
+
+/**
+ * \brief Sets a Rogue instruction operand to a DRC number.
+ *
+ * \param[in] instr The instruction.
+ * \param[in] index The operand index.
+ * \param[in] number The DRC number to set.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_instr_set_operand_drc(struct rogue_instr *instr,
+ size_t index,
+ size_t number)
+{
+ ASSERT_INSTR_OPERAND_INDEX(instr, index);
+
+ instr->operands[index].type = ROGUE_OPERAND_TYPE_DRC;
+ instr->operands[index].drc.number = number;
+
+ return true;
+}
+
+/**
+ * \brief Sets a Rogue instruction operand to a register.
+ *
+ * \param[in] instr The instruction.
+ * \param[in] index The operand index.
+ * \param[in] type The register type to set.
+ * \param[in] number The register number to set.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_instr_set_operand_reg(struct rogue_instr *instr,
+ size_t index,
+ enum rogue_operand_type type,
+ size_t number)
+{
+ ASSERT_INSTR_OPERAND_INDEX(instr, index);
+ ASSERT_OPERAND_REG(type);
+
+ instr->operands[index].type = type;
+ instr->operands[index].reg.number = number;
+
+ return true;
+}
+
+/**
+ * \brief Sets a Rogue instruction operand to a virtual register.
+ *
+ * \param[in] instr The instruction.
+ * \param[in] index The operand index.
+ * \param[in] number The register number to set.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_instr_set_operand_vreg(struct rogue_instr *instr,
+ size_t index,
+ size_t number)
+{
+ ASSERT_INSTR_OPERAND_INDEX(instr, index);
+
+ instr->operands[index].type = ROGUE_OPERAND_TYPE_VREG;
+ instr->operands[index].vreg.number = number;
+ instr->operands[index].vreg.is_vector = false;
+
+ return true;
+}
+
+/**
+ * \brief Sets a Rogue instruction operand to a virtual register
+ * that is a vector type.
+ *
+ * \param[in] instr The instruction.
+ * \param[in] index The operand index.
+ * \param[in] component The vector component.
+ * \param[in] number The register number to set.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_instr_set_operand_vreg_vec(struct rogue_instr *instr,
+ size_t index,
+ size_t component,
+ size_t number)
+{
+ ASSERT_INSTR_OPERAND_INDEX(instr, index);
+
+ instr->operands[index].type = ROGUE_OPERAND_TYPE_VREG;
+ instr->operands[index].vreg.number = number;
+ instr->operands[index].vreg.is_vector = true;
+ instr->operands[index].vreg.component = component;
+
+ return true;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_INSTR_H
+#define ROGUE_INSTR_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "rogue_operand.h"
+#include "util/list.h"
+
+/**
+ * \brief Instruction opcodes.
+ */
+enum rogue_opcode {
+ ROGUE_OP_NOP = 0, /** No-operation. */
+ ROGUE_OP_END_FRAG, /** Fragment shader end. */
+ ROGUE_OP_END_VERT, /** Vertex shader end. */
+ ROGUE_OP_WDF, /** Write data fence. */
+
+ ROGUE_OP_PIX_ITER_W, /** Pixel iteration with coefficients. */
+
+ ROGUE_OP_MAX, /** Returns the largest out of two floats. */
+ ROGUE_OP_MIN, /** Returns the smallest out of two floats. */
+
+ ROGUE_OP_PACK_U8888, /** Scales the four input floats:
+ * [0.0f, 0.1f] -> [0, 255] and packs them
+ * into a 32-bit unsigned integer.
+ */
+
+ ROGUE_OP_MOV, /** Register move instruction. */
+ ROGUE_OP_MOV_IMM, /** Move immediate instruction. */
+
+ ROGUE_OP_FMA, /** Fused-multiply-add (float). */
+ ROGUE_OP_MUL, /** Multiply (float). */
+
+ ROGUE_OP_VTXOUT, /** Writes the input register
+ * to the given vertex output index.
+ */
+
+ ROGUE_OP_COUNT,
+};
+
+/**
+ * \brief Instruction flags.
+ */
+enum rogue_instr_flag {
+ ROGUE_INSTR_FLAG_SAT = 0, /** Saturate values to 0.0 ... 1.0. */
+ ROGUE_INSTR_FLAG_LP, /** Low-precision modifier. */
+ ROGUE_INSTR_FLAG_OLCHK, /** Overlap check (pixel write). */
+
+ ROGUE_INSTR_FLAG_COUNT,
+};
+
+/**
+ * \brief Instruction description.
+ */
+struct rogue_instr {
+ enum rogue_opcode opcode;
+
+ size_t num_operands;
+ struct rogue_operand *operands;
+
+ uint64_t flags; /** A mask of #rogue_instr_flag values. */
+
+ struct list_head node; /** Linked list node. */
+};
+
+struct rogue_instr *rogue_instr_create(void *mem_ctx, enum rogue_opcode opcode);
+
+bool rogue_instr_set_flag(struct rogue_instr *instr,
+ enum rogue_instr_flag flag);
+
+bool rogue_instr_set_operand_imm(struct rogue_instr *instr,
+ size_t index,
+ uint64_t value);
+bool rogue_instr_set_operand_drc(struct rogue_instr *instr,
+ size_t index,
+ size_t number);
+bool rogue_instr_set_operand_reg(struct rogue_instr *instr,
+ size_t index,
+ enum rogue_operand_type type,
+ size_t number);
+bool rogue_instr_set_operand_vreg(struct rogue_instr *instr,
+ size_t index,
+ size_t number);
+bool rogue_instr_set_operand_vreg_vec(struct rogue_instr *instr,
+ size_t index,
+ size_t component,
+ size_t number);
+#endif /* ROGUE_INSTR_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compiler/spirv/nir_spirv.h"
+#include "nir/nir.h"
+#include "nir/nir_schedule.h"
+#include "rogue_nir.h"
+#include "rogue_operand.h"
+
+/**
+ * \file rogue_nir.c
+ *
+ * \brief Contains NIR-specific functions.
+ */
+
+/**
+ * \brief SPIR-V to NIR compilation options.
+ */
+static const struct spirv_to_nir_options spirv_options = {
+ .environment = NIR_SPIRV_VULKAN,
+
+ /* Buffer address: (descriptor_set, binding), offset. */
+ .ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
+};
+
+static const nir_shader_compiler_options nir_options = {
+ .lower_fsat = true,
+ .fuse_ffma32 = true,
+};
+
+const struct spirv_to_nir_options *
+rogue_get_spirv_options(const struct rogue_compiler *compiler)
+{
+ return &spirv_options;
+}
+
+const nir_shader_compiler_options *
+rogue_get_compiler_options(const struct rogue_compiler *compiler)
+{
+ return &nir_options;
+}
+
+static int rogue_glsl_type_size(const struct glsl_type *type, bool bindless)
+{
+ return glsl_count_attribute_slots(type, false);
+}
+
+/**
+ * \brief Applies optimizations and passes required to lower the NIR shader into
+ * a form suitable for lowering to Rogue IR.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] shader Rogue shader.
+ * \param[in] stage Shader stage.
+ * \return true if successful, otherwise false.
+ */
+bool rogue_nir_passes(struct rogue_build_ctx *ctx,
+ nir_shader *nir,
+ gl_shader_stage stage)
+{
+ bool progress;
+
+ nir_validate_shader(nir, "after spirv_to_nir");
+
+ /* Splitting. */
+ NIR_PASS_V(nir, nir_split_var_copies);
+ NIR_PASS_V(nir, nir_split_per_member_structs);
+
+ /* Ensure fs outputs are in the [0.0f...1.0f] range. */
+ NIR_PASS_V(nir, nir_lower_clamp_color_outputs);
+
+ /* Replace references to I/O variables with intrinsics. */
+ NIR_PASS_V(nir,
+ nir_lower_io,
+ nir_var_shader_in | nir_var_shader_out,
+ rogue_glsl_type_size,
+ (nir_lower_io_options)0);
+
+ /* Load inputs to scalars (single registers later). */
+ NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_in);
+
+ /* Optimize GL access qualifiers. */
+ const nir_opt_access_options opt_access_options = {
+ .is_vulkan = true,
+ .infer_non_readable = true,
+ };
+ NIR_PASS_V(nir, nir_opt_access, &opt_access_options);
+
+ /* Apply PFO code to the fragment shader output. */
+ if (nir->info.stage == MESA_SHADER_FRAGMENT)
+ NIR_PASS_V(nir, rogue_nir_pfo);
+
+ /* Load outputs to scalars (single registers later). */
+ NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_out);
+
+ /* Lower ALU operations to scalars. */
+ NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
+
+ /* Algebraic opts. */
+ do {
+ progress = false;
+
+ NIR_PASS(progress, nir, nir_copy_prop);
+ NIR_PASS(progress, nir, nir_opt_cse);
+ NIR_PASS(progress, nir, nir_opt_algebraic);
+ NIR_PASS(progress, nir, nir_opt_constant_folding);
+ NIR_PASS(progress, nir, nir_opt_dce);
+ NIR_PASS_V(nir, nir_opt_gcm, false);
+ } while (progress);
+
+ /* Additional I/O lowering. */
+ NIR_PASS_V(nir,
+ nir_lower_explicit_io,
+ nir_var_mem_ubo,
+ spirv_options.ubo_addr_format);
+ NIR_PASS_V(nir, rogue_nir_lower_io, NULL);
+
+ /* Late algebraic opts. */
+ do {
+ progress = false;
+
+ NIR_PASS(progress, nir, nir_opt_algebraic_late);
+ NIR_PASS_V(nir, nir_opt_constant_folding);
+ NIR_PASS_V(nir, nir_copy_prop);
+ NIR_PASS_V(nir, nir_opt_dce);
+ NIR_PASS_V(nir, nir_opt_cse);
+ } while (progress);
+
+ /* Replace SSA constant references with a register that loads the value. */
+ NIR_PASS_V(nir, rogue_nir_constreg);
+ /* Remove unused constant registers. */
+ NIR_PASS_V(nir, nir_opt_dce);
+
+ /* Move loads to just before they're needed. */
+ NIR_PASS_V(nir, nir_opt_move, nir_move_load_ubo | nir_move_load_input);
+
+ /* Convert vecNs to movs so we can sequentially allocate them later. */
+ NIR_PASS_V(nir, nir_lower_vec_to_movs, NULL, NULL);
+
+ /* Out of SSA pass. */
+ NIR_PASS_V(nir, nir_convert_from_ssa, false);
+
+ /* TODO: Re-enable scheduling after register pressure tweaks. */
+#if 0
+ /* Instruction scheduling. */
+ struct nir_schedule_options schedule_options = {
+ .threshold = ROGUE_MAX_REG_TEMP / 2,
+ };
+ NIR_PASS_V(nir, nir_schedule, &schedule_options);
+#endif
+
+ /* Assign I/O locations. */
+ nir_assign_io_var_locations(nir,
+ nir_var_shader_in,
+ &nir->num_inputs,
+ nir->info.stage);
+ nir_assign_io_var_locations(nir,
+ nir_var_shader_out,
+ &nir->num_outputs,
+ nir->info.stage);
+
+ /* Gather info into nir shader struct. */
+ nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+ /* Clean-up after passes. */
+ nir_sweep(nir);
+
+ nir_validate_shader(nir, "after passes");
+
+ return true;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_NIR_H
+#define ROGUE_NIR_H
+
+#include "compiler/shader_enums.h"
+#include "nir/nir.h"
+#include "util/macros.h"
+
+struct rogue_build_ctx;
+struct rogue_compiler;
+struct spirv_to_nir_options;
+
+PUBLIC
+const struct spirv_to_nir_options *
+rogue_get_spirv_options(const struct rogue_compiler *compiler);
+
+PUBLIC
+const nir_shader_compiler_options *
+rogue_get_compiler_options(const struct rogue_compiler *compiler);
+
+bool rogue_nir_passes(struct rogue_build_ctx *ctx,
+ nir_shader *nir,
+ gl_shader_stage stage);
+
+/* Custom passes. */
+void rogue_nir_pfo(nir_shader *shader);
+void rogue_nir_constreg(nir_shader *shader);
+bool rogue_nir_lower_io(nir_shader *shader, void *layout);
+
+#endif /* ROGUE_NIR_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_NIR_HELPERS_H
+#define ROGUE_NIR_HELPERS_H
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "nir/nir.h"
+#include "util/bitscan.h"
+
+/**
+ * \file rogue_nir.c
+ *
+ * \brief Contains various NIR helper functions.
+ */
+
+static inline unsigned nir_alu_dest_regindex(const nir_alu_instr *alu)
+{
+ assert(!alu->dest.dest.is_ssa);
+
+ return alu->dest.dest.reg.reg->index;
+}
+
+static inline unsigned nir_alu_dest_comp(const nir_alu_instr *alu)
+{
+ assert(!alu->dest.dest.is_ssa);
+ assert(util_is_power_of_two_nonzero(alu->dest.write_mask));
+
+ return ffs(alu->dest.write_mask) - 1;
+}
+
+static inline unsigned nir_alu_src_regindex(const nir_alu_instr *alu,
+ size_t src)
+{
+ assert(src < nir_op_infos[alu->op].num_inputs);
+ assert(!alu->src[src].src.is_ssa);
+
+ return alu->src[src].src.reg.reg->index;
+}
+
+static inline uint32_t nir_alu_src_const(const nir_alu_instr *alu, size_t src)
+{
+ assert(src < nir_op_infos[alu->op].num_inputs);
+ assert(alu->src[src].src.is_ssa);
+
+ nir_const_value *const_value = nir_src_as_const_value(alu->src[src].src);
+
+ return nir_const_value_as_uint(*const_value, 32);
+}
+
+static inline bool nir_alu_src_is_const(const nir_alu_instr *alu, size_t src)
+{
+ assert(src < nir_op_infos[alu->op].num_inputs);
+
+ if (!alu->src[src].src.is_ssa)
+ return false;
+
+ assert(alu->src[src].src.ssa->parent_instr);
+
+ return (alu->src[src].src.ssa->parent_instr->type ==
+ nir_instr_type_load_const);
+}
+
+static inline unsigned nir_intr_dest_regindex(const nir_intrinsic_instr *intr)
+{
+ assert(!intr->dest.is_ssa);
+
+ return intr->dest.reg.reg->index;
+}
+
+static inline unsigned nir_intr_src_regindex(const nir_intrinsic_instr *intr,
+ size_t src)
+{
+ assert(src < nir_intrinsic_infos[intr->intrinsic].num_srcs);
+ assert(!intr->src[src].is_ssa);
+
+ return intr->src[src].reg.reg->index;
+}
+
+static inline uint32_t nir_intr_src_const(const nir_intrinsic_instr *intr,
+ size_t src)
+{
+ assert(src < nir_intrinsic_infos[intr->intrinsic].num_srcs);
+ assert(intr->src[src].is_ssa);
+
+ nir_const_value *const_value = nir_src_as_const_value(intr->src[src]);
+
+ return nir_const_value_as_uint(*const_value, 32);
+}
+
+static inline uint32_t nir_intr_src_comp_const(const nir_intrinsic_instr *intr,
+ size_t src,
+ size_t comp)
+{
+ assert(src < nir_intrinsic_infos[intr->intrinsic].num_srcs);
+ assert(intr->src[src].is_ssa);
+ assert(comp < nir_src_num_components(intr->src[src]));
+
+ return nir_src_comp_as_uint(intr->src[src], comp);
+}
+
+static inline bool nir_intr_src_is_const(const nir_intrinsic_instr *intr,
+ size_t src)
+{
+ assert(src < nir_intrinsic_infos[intr->intrinsic].num_srcs);
+
+ if (!intr->src[src].is_ssa)
+ return false;
+
+ assert(intr->src[src].ssa->parent_instr);
+
+ return (intr->src[src].ssa->parent_instr->type == nir_instr_type_load_const);
+}
+
+static inline size_t nir_count_variables_with_modes(const nir_shader *nir,
+ nir_variable_mode mode)
+{
+ size_t count = 0;
+
+ nir_foreach_variable_with_modes (var, nir, mode)
+ ++count;
+
+ return count;
+}
+
+#endif /* ROGUE_NIR_HELPERS_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rogue_operand.h"
+
+/**
+ * \file rogue_operand.c
+ *
+ * \brief Contains functions to manipulate Rogue instruction operands.
+ */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_OPERAND_H
+#define ROGUE_OPERAND_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "rogue_util.h"
+#include "util/macros.h"
+
+/* Register-related defines. */
+
+/* Total max number of registers per class
+ * (instances > ROGUE_MAX_REG_INDEX addressable via indexing only).
+ */
+#define ROGUE_MAX_REG_TEMP 248
+#define ROGUE_MAX_REG_COEFF 4096
+#define ROGUE_MAX_REG_CONST 240
+#define ROGUE_MAX_REG_SHARED 4096
+#define ROGUE_MAX_REG_PIXEL_OUT 8
+#define ROGUE_MAX_REG_VERTEX_IN 248
+#define ROGUE_MAX_REG_INTERNAL 8
+
+/* Maximum register index via offset encoding. */
+#define ROGUE_MAX_REG_INDEX 256
+
+/* Pixel-out register offset. */
+#define ROGUE_PIXEL_OUT_REG_OFFSET 32
+
+/* Internal register offset. */
+#define ROGUE_INTERNAL_REG_OFFSET 36
+
+/* Coefficient registers are typically used in groups of 4. */
+#define ROGUE_COEFF_ALIGN 4
+
+/* Defines for other operand types. */
+
+/* Available dependent read counters. */
+#define ROGUE_NUM_DRCS 2
+
+/* Maximum number of vertex outputs. */
+#define ROGUE_MAX_VERTEX_OUTPUTS 256
+
+/* All components of an emulated vec4 register group. */
+#define ROGUE_COMPONENT_ALL (~0)
+
+/**
+ * \brief Operand types.
+ */
+enum rogue_operand_type {
+ /* Register operands. */
+ ROGUE_OPERAND_TYPE_REG_TEMP = 0, /** Temporary register. */
+ ROGUE_OPERAND_TYPE_REG_COEFF, /** Coefficient register. */
+ ROGUE_OPERAND_TYPE_REG_CONST, /** Constant register. */
+ ROGUE_OPERAND_TYPE_REG_SHARED, /** Shared register. */
+ ROGUE_OPERAND_TYPE_REG_PIXEL_OUT, /** Pixel output register. */
+ ROGUE_OPERAND_TYPE_REG_VERTEX_IN, /** Vertex input register. */
+ ROGUE_OPERAND_TYPE_REG_INTERNAL, /** Internal register. */
+
+ ROGUE_OPERAND_TYPE_REG_MAX = ROGUE_OPERAND_TYPE_REG_INTERNAL,
+
+ ROGUE_OPERAND_TYPE_IMMEDIATE, /** Immediate value. */
+
+ ROGUE_OPERAND_TYPE_DRC, /** Dependent read counter. */
+
+ ROGUE_OPERAND_TYPE_VREG, /** Virtual register (pre-regalloc). */
+
+ ROGUE_OPERAND_TYPE_COUNT,
+};
+
+/* clang-format off */
+
+#define ROGUE_NUM_REG_TYPES (ROGUE_OPERAND_TYPE_REG_MAX + 1)
+
+/**
+ * \brief A bitmask for any register operand type.
+ */
+#define ROGUE_MASK_ANY_REG \
+ ROH(ROGUE_OPERAND_TYPE_REG_TEMP) | \
+ ROH(ROGUE_OPERAND_TYPE_REG_COEFF) | \
+ ROH(ROGUE_OPERAND_TYPE_REG_CONST) | \
+ ROH(ROGUE_OPERAND_TYPE_REG_PIXEL_OUT) | \
+ ROH(ROGUE_OPERAND_TYPE_REG_VERTEX_IN) | \
+ ROH(ROGUE_OPERAND_TYPE_REG_SHARED) | \
+ ROH(ROGUE_OPERAND_TYPE_REG_INTERNAL)
+
+/* clang-format on */
+
+/**
+ * \brief Operand description.
+ */
+struct rogue_operand {
+ enum rogue_operand_type type;
+
+ union {
+ struct {
+ uint64_t value;
+ } immediate;
+
+ struct {
+ size_t number;
+ } drc;
+
+ struct {
+ size_t number;
+ } reg;
+
+ struct {
+ size_t number;
+ bool is_vector;
+ size_t component;
+ } vreg;
+ };
+};
+
+/**
+ * \brief Register access flags.
+ */
+enum rogue_register_access {
+ ROGUE_REG_ACCESS_READ = BITFIELD_BIT(0U), /** Read-only. */
+ ROGUE_REG_ACCESS_WRITE = BITFIELD_BIT(1U), /* Write-only. */
+ ROGUE_REG_ACCESS_RW = ROGUE_REG_ACCESS_READ |
+ ROGUE_REG_ACCESS_WRITE, /** Read/write. */
+};
+
+/**
+ * \brief Register modifier flags.
+ */
+enum rogue_register_modifier {
+ ROGUE_REG_MOD_NONE = 0U,
+ ROGUE_REG_MOD_IDX = BITFIELD_BIT(0U), /** Index modifier. */
+ ROGUE_REG_MOD_DIM = BITFIELD_BIT(1U), /** Dimension modifier. */
+ ROGUE_REG_MOD_ALL = ROGUE_REG_MOD_IDX | ROGUE_REG_MOD_DIM,
+};
+
+#endif /* ROGUE_OPERAND_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "rogue_operand.h"
+#include "rogue_regalloc.h"
+#include "rogue_shader.h"
+#include "rogue_util.h"
+#include "util/hash_table.h"
+#include "util/list.h"
+#include "util/ralloc.h"
+#include "util/register_allocate.h"
+#include "util/u_dynarray.h"
+
+/**
+ * \file rogue_regalloc.c
+ *
+ * \brief Contains register allocation helper functions.
+ */
+
+/**
+ * \brief Sets up the register data with the classes to be used for allocation.
+ *
+ * \param[in] data The register data array.
+ */
+static void
+rogue_reg_data_init(struct rogue_reg_data data[static ROGUE_REG_CLASS_COUNT])
+{
+ data[ROGUE_REG_CLASS_TEMP].type = ROGUE_OPERAND_TYPE_REG_TEMP;
+ data[ROGUE_REG_CLASS_TEMP].count = ROGUE_MAX_REG_TEMP;
+ data[ROGUE_REG_CLASS_TEMP].stride = 1;
+
+ data[ROGUE_REG_CLASS_VEC4].type = ROGUE_OPERAND_TYPE_REG_INTERNAL;
+ data[ROGUE_REG_CLASS_VEC4].count = ROGUE_MAX_REG_INTERNAL;
+ data[ROGUE_REG_CLASS_VEC4].stride = 4;
+}
+
+/**
+ * \brief Initializes the Rogue register allocation context.
+ *
+ * \param[in] mem_ctx The memory context for the ra context.
+ * \return A rogue_ra * if successful, or NULL if unsuccessful.
+ */
+struct rogue_ra *rogue_ra_init(void *mem_ctx)
+{
+ struct rogue_ra *ra;
+ size_t total_regs = 0;
+
+ ra = rzalloc_size(mem_ctx, sizeof(*ra));
+ if (!ra)
+ return NULL;
+
+ /* Initialize the register class data. */
+ rogue_reg_data_init(ra->reg_data);
+
+ /* Count up the registers classes and set up their offsets.
+ *
+ * The physical register numbers are sequential, even if the
+ * registers are from different banks, so keeping track of
+ * the offset means we can get the true physical register
+ * number back after allocation.
+ */
+ for (size_t u = 0; u < ARRAY_SIZE(ra->reg_data); ++u) {
+ ra->reg_data[u].offset = total_regs;
+ total_regs += ra->reg_data[u].count;
+ }
+
+ /* Create a register set for allocation. */
+ ra->regs = ra_alloc_reg_set(ra, total_regs, true);
+ if (!ra->regs) {
+ ralloc_free(ra);
+ return NULL;
+ }
+
+ /* Create the register class for the temps. */
+ ra->reg_data[ROGUE_REG_CLASS_TEMP].class =
+ ra_alloc_contig_reg_class(ra->regs, 1);
+
+ /* Create the register class for vec4 registers
+ * (using the internal register bank).
+ */
+ ra->reg_data[ROGUE_REG_CLASS_VEC4].class =
+ ra_alloc_contig_reg_class(ra->regs, 4);
+
+ /* Populate the register classes. */
+ for (size_t u = 0; u < ARRAY_SIZE(ra->reg_data); ++u) {
+ struct rogue_reg_data *reg_data = &ra->reg_data[u];
+ size_t offset = reg_data->offset;
+ size_t end = reg_data->offset + reg_data->count;
+ size_t stride = reg_data->stride;
+
+ for (size_t r = offset; r < end; r += stride)
+ ra_class_add_reg(reg_data->class, r);
+ }
+
+ /* Finalize the set (no early conflicts passed along for now). */
+ ra_set_finalize(ra->regs, NULL);
+
+ return ra;
+}
+
+/**
+ * \brief The range for which a (virtual) register is live, and its references.
+ */
+struct live_range {
+ size_t start;
+ size_t end;
+ enum rogue_reg_class class;
+ struct util_dynarray operand_refs;
+};
+
+/**
+ * \brief Performs register allocation.
+ *
+ * \param[in] instr_list A linked list of instructions with virtual registers to
+ * be allocated.
+ * \param[in] ra The register allocation context.
+ */
+bool rogue_ra_alloc(struct list_head *instr_list,
+ struct rogue_ra *ra,
+ size_t *temps_used,
+ size_t *internals_used)
+{
+ /* Used for ra_alloc_interference_graph() as it doesn't
+ * like having gaps (e.g. with v0, v2 count = 3 rather
+ * than 2).
+ */
+ size_t max_vreg = 0;
+
+ struct hash_table *reg_ht =
+ _mesa_hash_table_create(ra, _mesa_hash_uint, _mesa_key_uint_equal);
+ if (!reg_ht)
+ return false;
+
+ /* Calculate live ranges for virtual registers. */
+ size_t ip = 0U; /* "Instruction pointer". */
+ foreach_instr (instr, instr_list) {
+ for (size_t u = 0U; u < instr->num_operands; ++u) {
+ struct hash_entry *entry;
+ struct live_range *range;
+
+ if (instr->operands[u].type != ROGUE_OPERAND_TYPE_VREG)
+ continue;
+
+ entry =
+ _mesa_hash_table_search(reg_ht, &instr->operands[u].vreg.number);
+ if (!entry) {
+ /* First use of this virtual register: initialize live range. */
+ /* TODO: Error handling. */
+ range = rzalloc_size(reg_ht, sizeof(*range));
+
+ range->start = ip;
+ range->end = ip;
+ range->class = instr->operands[u].vreg.is_vector
+ ? ROGUE_REG_CLASS_VEC4
+ : ROGUE_REG_CLASS_TEMP;
+
+ entry = _mesa_hash_table_insert(reg_ht,
+ &instr->operands[u].vreg.number,
+ range);
+
+ max_vreg = MAX2(max_vreg, instr->operands[u].vreg.number);
+
+ util_dynarray_init(&range->operand_refs, range);
+ } else {
+ /* Subsequent uses: update live range end. */
+ range = entry->data;
+ range->end = MAX2(range->end, ip);
+ assert(range->class == (instr->operands[u].vreg.is_vector
+ ? ROGUE_REG_CLASS_VEC4
+ : ROGUE_REG_CLASS_TEMP));
+ }
+
+ /* Save a reference to the operand. */
+ util_dynarray_append(&range->operand_refs,
+ struct rogue_operand *,
+ &instr->operands[u]);
+ }
+ ++ip;
+ }
+
+ /* Initialize the interference graph. */
+ struct ra_graph *g = ra_alloc_interference_graph(ra->regs, max_vreg + 1);
+
+ /* Set each virtual register to the appropriate class. */
+ hash_table_foreach (reg_ht, entry) {
+ const uint32_t *vreg = entry->key;
+ struct live_range *range = entry->data;
+ struct ra_class *class = ra->reg_data[range->class].class;
+
+ ra_set_node_class(g, *vreg, class);
+ /* TODO: ra_set_node_spill_cost(g, *vreg, cost); */
+ }
+
+ /* Build interference graph from overlapping live ranges. */
+ hash_table_foreach (reg_ht, entry_first) {
+ const uint32_t *vreg_first = entry_first->key;
+ struct live_range *range_first = entry_first->data;
+
+ hash_table_foreach (reg_ht, entry_second) {
+ const uint32_t *vreg_second = entry_second->key;
+ struct live_range *range_second = entry_second->data;
+
+ if (*vreg_first == *vreg_second)
+ continue;
+
+ /* If the live ranges overlap, those register nodes interfere. */
+ if (!(range_first->start >= range_second->end ||
+ range_second->start >= range_first->end)) {
+ ra_add_node_interference(g, *vreg_first, *vreg_second);
+ }
+ }
+ }
+
+ /* Add node interferences such that the same register can't be used for
+ * both an instruction's source and destination.
+ */
+ foreach_instr (instr, instr_list) {
+ for (size_t u = 0U; u < instr->num_operands; ++u) {
+ if (instr->operands[u].type != ROGUE_OPERAND_TYPE_VREG)
+ continue;
+
+ /* Operand 0 (if it exists and is virtual) is always
+ * the destination register.
+ */
+ if (u > 0 && instr->operands[0].type == ROGUE_OPERAND_TYPE_VREG)
+ ra_add_node_interference(g,
+ instr->operands[0].vreg.number,
+ instr->operands[u].vreg.number);
+ }
+ }
+
+ /* Perform register allocation. */
+ /* TODO: Spilling support. */
+ assert(ra_allocate(g));
+
+ /* Replace virtual registers with allocated physical registers.
+ * N.B. This is a destructive process as it overwrites the hash table key!
+ */
+ hash_table_foreach (reg_ht, entry) {
+ uint32_t vreg = *(uint32_t *)entry->key;
+ unsigned phy_reg = ra_get_node_reg(g, vreg);
+ struct live_range *range = entry->data;
+
+ struct rogue_reg_data *reg_data = &ra->reg_data[range->class];
+ enum rogue_operand_type type = reg_data->type;
+ size_t reg_offset = reg_data->offset;
+ size_t *num_used = ®_data->num_used;
+
+ util_dynarray_foreach (&range->operand_refs,
+ struct rogue_operand *,
+ operand_ptr) {
+ size_t num = phy_reg - reg_offset;
+ struct rogue_operand *operand = *operand_ptr;
+
+ assert(operand->type == ROGUE_OPERAND_TYPE_VREG);
+ assert(operand->vreg.number == vreg);
+
+ /* Index the component of emulated vec4 registers. */
+ if (operand->vreg.is_vector &&
+ operand->vreg.component != ROGUE_COMPONENT_ALL)
+ num += operand->vreg.component;
+
+ operand->type = type;
+ operand->reg.number = num;
+
+ *num_used = MAX2(*num_used, operand->reg.number);
+ }
+
+ util_dynarray_fini(&range->operand_refs);
+ _mesa_hash_table_remove(reg_ht, entry);
+ }
+
+ /* Registers used = max reg number + 1. */
+ for (size_t u = 0; u < ARRAY_SIZE(ra->reg_data); ++u)
+ if (ra->reg_data[u].num_used)
+ ++ra->reg_data[u].num_used;
+
+ /* Pass back the registers used. */
+ if (temps_used)
+ *temps_used = ra->reg_data[ROGUE_REG_CLASS_TEMP].num_used;
+
+ if (internals_used)
+ *internals_used = ra->reg_data[ROGUE_REG_CLASS_VEC4].num_used;
+
+ ralloc_free(g);
+
+ _mesa_hash_table_destroy(reg_ht, NULL);
+
+ return true;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_REGALLOC_H
+#define ROGUE_REGALLOC_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "util/list.h"
+
+/**
+ * \brief Register classes used for allocation.
+ */
+enum rogue_reg_class {
+ ROGUE_REG_CLASS_TEMP,
+ ROGUE_REG_CLASS_VEC4,
+
+ ROGUE_REG_CLASS_COUNT,
+};
+
+/**
+ * \brief Register data for each class.
+ */
+struct rogue_reg_data {
+ enum rogue_operand_type type;
+ size_t count;
+ size_t stride;
+
+ size_t offset;
+ struct ra_class *class;
+ size_t num_used;
+};
+
+/**
+ * \brief Register allocation context.
+ */
+struct rogue_ra {
+ struct ra_regs *regs;
+
+ struct rogue_reg_data reg_data[ROGUE_REG_CLASS_COUNT];
+};
+
+struct rogue_ra *rogue_ra_init(void *mem_ctx);
+bool rogue_ra_alloc(struct list_head *instr_list,
+ struct rogue_ra *ra,
+ size_t *temps_used,
+ size_t *internals_used);
+
+#endif /* ROGUE_REGALLOC_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "rogue_shader.h"
+#include "rogue_instr.h"
+#include "rogue_regalloc.h"
+#include "rogue_util.h"
+#include "util/ralloc.h"
+
+/**
+ * \file rogue_shader.c
+ *
+ * \brief Contains functions to manipulate Rogue shaders.
+ */
+
+/**
+ * \brief Counts how many times an instruction is used in a shader.
+ *
+ * \param[in] shader The shader containing instructions to count.
+ * \param[in] opcode The opcode of the instruction to be counted.
+ * \return The number of times "opcode" is present, or 0 on error.
+ */
+size_t rogue_shader_instr_count_type(const struct rogue_shader *shader,
+ enum rogue_opcode opcode)
+{
+ size_t count = 0U;
+
+ ASSERT_OPCODE_RANGE(opcode);
+
+ foreach_instr (instr, &shader->instr_list)
+ if (instr->opcode == opcode)
+ ++count;
+
+ return count;
+}
+
+/**
+ * \brief Allocates and sets up a Rogue shader.
+ *
+ * \param[in] stage The shader stage.
+ * \return A rogue_shader* if successful, or NULL if unsuccessful.
+ */
+struct rogue_shader *rogue_shader_create(struct rogue_build_ctx *ctx,
+ gl_shader_stage stage)
+{
+ struct rogue_shader *shader;
+
+ if (!ctx)
+ return NULL;
+
+ shader = rzalloc_size(ctx, sizeof(*shader));
+ if (!shader)
+ return NULL;
+
+ shader->stage = stage;
+
+ list_inithead(&shader->instr_list);
+
+ shader->ctx = ctx;
+ shader->ra = rogue_ra_init(shader);
+ if (!shader->ra) {
+ ralloc_free(shader);
+ return NULL;
+ }
+
+ return shader;
+}
+
+/**
+ * \brief Creates an instruction and appends it to a Rogue shader.
+ *
+ * \param[in] shader The shader.
+ * \param[in] opcode The instruction opcode.
+ * \return A rogue_instr* if successful, or NULL if unsuccessful.
+ */
+struct rogue_instr *rogue_shader_insert(struct rogue_shader *shader,
+ enum rogue_opcode opcode)
+{
+ struct rogue_instr *instr = rogue_instr_create(shader, opcode);
+ if (!instr)
+ return NULL;
+
+ list_addtail(&instr->node, &shader->instr_list);
+
+ return instr;
+}
+
+size_t rogue_acquire_drc(struct rogue_shader *shader)
+{
+ size_t drc;
+
+ /* If both DRCs are in use, we have a problem. */
+ if (shader->drc_used[0] && shader->drc_used[1])
+ return SIZE_MAX;
+
+ drc = !shader->drc_used[0] ? 0 : 1;
+ shader->drc_used[drc] = true;
+
+ return drc;
+}
+
+void rogue_release_drc(struct rogue_shader *shader, size_t drc)
+{
+ assert(drc < ROGUE_NUM_DRCS);
+ assert(shader->drc_used[drc]);
+
+ shader->drc_used[drc] = false;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_SHADER_H
+#define ROGUE_SHADER_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "compiler/shader_enums.h"
+#include "rogue_instr.h"
+#include "rogue_operand.h"
+#include "rogue_util.h"
+#include "util/list.h"
+#include "util/macros.h"
+
+struct rogue_build_ctx;
+struct rogue_ra;
+
+/**
+ * \brief Shader description.
+ */
+struct rogue_shader {
+ gl_shader_stage stage; /** Shader stage. */
+
+ struct list_head instr_list; /** Instructions linked list. */
+
+ struct rogue_build_ctx *ctx;
+ struct rogue_ra *ra;
+
+ bool drc_used[ROGUE_NUM_DRCS];
+};
+
+/* Shader instruction list iterators and helpers. */
+#define foreach_instr(__instr, __list) \
+ list_for_each_entry (struct rogue_instr, __instr, __list, node)
+#define foreach_instr_rev(__instr, __list) \
+ list_for_each_entry_rev (struct rogue_instr, __instr, __list, node)
+#define foreach_instr_safe(__instr, __list) \
+ list_for_each_entry_safe (struct rogue_instr, __instr, __list, node)
+
+#define instr_first_entry(__list) \
+ list_first_entry(__list, struct rogue_instr, node)
+#define instr_last_entry(__list) \
+ list_last_entry(__list, struct rogue_instr, node)
+
+size_t rogue_shader_instr_count_type(const struct rogue_shader *shader,
+ enum rogue_opcode opcode);
+
+PUBLIC
+struct rogue_shader *rogue_shader_create(struct rogue_build_ctx *ctx,
+ gl_shader_stage stage);
+
+PUBLIC
+struct rogue_instr *rogue_shader_insert(struct rogue_shader *shader,
+ enum rogue_opcode opcode);
+
+size_t rogue_acquire_drc(struct rogue_shader *shader);
+void rogue_release_drc(struct rogue_shader *shader, size_t drc);
+
+#endif /* ROGUE_SHADER_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "rogue_util.h"
+#include "util/macros.h"
+
+/**
+ * \file rogue_util.c
+ *
+ * \brief Contains compiler utility and helper functions.
+ */
+
+/**
+ * \brief Splits and distributes value "source" across "dest_bytes" according to
+ * the ranges specified (from MSB to LSB).
+ *
+ * \param[in] source The source value to be distributed.
+ * \param[in] rangelist The rangelist describing how to distribute "source".
+ * \param[in] dest_size The size of the destination in bytes.
+ * \param[in] dest_bytes The destination byte array.
+ * \return false if invalid inputs were provided, else true.
+ */
+bool rogue_distribute_value(uint64_t source,
+ const struct rogue_rangelist *rangelist,
+ size_t dest_size,
+ uint8_t dest_bytes[dest_size])
+{
+ size_t total_bits_left = 0U;
+
+ /* Check that "value" is actually representable in "total_bits" bits. */
+ total_bits_left = rogue_rangelist_bits(rangelist);
+ assert(util_last_bit64(source) <= total_bits_left &&
+ "Value cannot be represented.");
+
+ /* Iterate over each range. */
+ for (size_t u = 0U; u < rangelist->num_ranges; ++u) {
+ struct rogue_bitrange *range = &rangelist->ranges[u];
+
+ size_t dest_bit = range->start;
+ size_t bits_left = range->num;
+ size_t bytes_covered = rogue_bytes_spilled(range) + 1;
+ size_t base_byte = rogue_byte_index(range, dest_size);
+
+ /* Iterate over each byte covered by the current range. */
+ for (size_t b = 0U; b < bytes_covered; ++b) {
+ size_t max_bits = rogue_max_bits(dest_bit);
+ size_t bits_to_place = MIN2(bits_left, max_bits);
+ size_t dest_byte_bit = dest_bit % 8;
+ size_t source_bit = total_bits_left - 1;
+
+ /* Mask and shuffle the source value so that it'll fit into the
+ * correct place in the destination byte:
+ */
+
+ /* Extract bits. */
+ uint64_t value_masked =
+ (source & BITMASK64_N(source_bit, bits_to_place));
+ /* Shift all the way right. */
+ value_masked >>= (1 + source_bit - bits_to_place);
+ /* Shift left to the correct position. */
+ value_masked <<= (1 + dest_byte_bit - bits_to_place);
+ /* Place value into byte. */
+ dest_bytes[base_byte + b] |= (value_masked & 0xff);
+
+ dest_bit -= max_bits;
+ bits_left -= bits_to_place;
+ total_bits_left -= bits_to_place;
+ }
+ }
+
+ return true;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_UTIL_H
+#define ROGUE_UTIL_H
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "util/bitscan.h"
+#include "util/log.h"
+#include "util/macros.h"
+
+/* Input validation helpers. */
+
+/**
+ * \brief Returns false if "expr" is not asserted.
+ *
+ * \param[in] expr The expression to check.
+ */
+#define CHECK(expr) \
+ do { \
+ if (!(expr)) \
+ return false; \
+ } while (0)
+
+/**
+ * \brief Returns false if "expr" is not asserted,
+ * and logs the provided error message.
+ *
+ * \param[in] expr The expression to check.
+ * \param[in] fmt The error message to print.
+ * \param[in] ... The printf-style varable arguments.
+ */
+#define CHECKF(expr, fmt, ...) \
+ do { \
+ if (!(expr)) { \
+ mesa_log(MESA_LOG_ERROR, "ROGUE", fmt, ##__VA_ARGS__); \
+ return false; \
+ } \
+ } while (0)
+
+/**
+ * \brief Asserts if "opcode" is invalid.
+ *
+ * \param[in] opcode The opcode to check.
+ */
+#define ASSERT_OPCODE_RANGE(opcode) assert((opcode) < ROGUE_OP_COUNT)
+
+/**
+ * \brief Asserts if "operand" is invalid.
+ *
+ * \param[in] operand The operand to check.
+ */
+#define ASSERT_OPERAND_RANGE(operand) \
+ assert((operand) < ROGUE_OPERAND_TYPE_COUNT)
+
+/**
+ * \brief Asserts if "operand" is not a register.
+ *
+ * \param[in] operand The operand to check.
+ */
+#define ASSERT_OPERAND_REG(operand) \
+ assert((operand) <= ROGUE_OPERAND_TYPE_REG_MAX)
+
+/**
+ * \brief Asserts if "flag" is invalid.
+ *
+ * \param[in] flag The flag to check.
+ */
+#define ASSERT_INSTR_FLAG_RANGE(flag) assert((flag) < ROGUE_INSTR_FLAG_COUNT)
+
+/**
+ * \brief Asserts if operand index "index" is out of range.
+ *
+ * \param[in] instr The target instruction.
+ * \param[in] index The operand index to check.
+ */
+#define ASSERT_INSTR_OPERAND_INDEX(instr, index) \
+ assert((index) < (instr)->num_operands)
+
+/**
+ * \brief Asserts if "stage" is invalid.
+ *
+ * \param[in] stage The stage to check.
+ */
+#define ASSERT_SHADER_STAGE_RANGE(stage) assert((stage) < MESA_SHADER_STAGES)
+
+/**
+ * \brief Creates a "n"-bit mask starting from bit "b".
+ *
+ * \param[in] b The starting bit.
+ * \param[in] n The number of bits in the mask.
+ */
+#define BITMASK64_N(b, n) (((~0ULL) << (64 - (n))) >> (63 - (b)))
+
+/**
+ * \brief Compile-time rogue_onehot.
+ *
+ * \sa #rogue_onehot()
+ */
+#define ROH(OFFSET) BITFIELD64_BIT(OFFSET)
+
+/* TODO: Consider integrating the following into src/util/{macros,bitscan}.h */
+
+/**
+ * \brief Converts a one-hot encoding to an offset encoding.
+ *
+ * E.g. 0b10000 -> 4
+ *
+ * \param[in] onehot The one-hot encoding.
+ * \return The offset encoding.
+ */
+static inline uint64_t rogue_offset(uint64_t onehot)
+{
+ assert(util_bitcount64(onehot) == 1);
+ return ffsll(onehot) - 1;
+}
+
+/**
+ * \brief Converts an offset encoding to a one-hot encoding.
+ *
+ * E.g. 0 -> 0b1
+ *
+ * \param[in] offset The offset encoding.
+ * \return The one-hot encoding.
+ */
+static inline uint64_t rogue_onehot(uint64_t offset)
+{
+ assert(offset < 64ULL);
+ return (1ULL << offset);
+}
+
+/**
+ * \brief Checks whether an input bitfield contains only a valid bitset.
+ *
+ * E.g. rogue_check_bitset(0b00001100, 0b00001111) -> true
+ * rogue_check_bitset(0b00001100, 0b00000111) -> false
+ *
+ * \param[in] input The input bitfield.
+ * \param[in] valid_bits The valid bitset.
+ * \return true if "input" contains only "valid_bits", false otherwise.
+ */
+static inline bool rogue_check_bitset(uint64_t input, uint64_t valid_bits)
+{
+ input &= ~valid_bits;
+ return !input;
+}
+
+/**
+ * \brief Describes a downward range of bits within an arbitrarily-sized
+ * sequence.
+ *
+ * E.g. for start = 7 and num = 3:
+ *
+ * 76543210
+ * abcdefgh
+ *
+ * the bit range would be: abc.
+ */
+struct rogue_bitrange {
+ size_t start;
+ size_t num;
+};
+
+/**
+ * \brief Describes a collection of bit-ranges within an arbitrarily-sized
+ * sequence that are meaningful together.
+ *
+ * E.g. an 8-bit value that is encoded within a larger value:
+ * 8-bit value: abcdefgh
+ * Parent value: 010ab0cdef0010gh
+ *
+ */
+struct rogue_rangelist {
+ size_t num_ranges;
+ struct rogue_bitrange *ranges;
+};
+
+/**
+ * \brief Counts the total number of bits described in a rangelist.
+ *
+ * \param[in] rangelist The input rangelist.
+ * \return The total number of bits.
+ */
+static inline size_t
+rogue_rangelist_bits(const struct rogue_rangelist *rangelist)
+{
+ size_t total_bits = 0U;
+
+ for (size_t u = 0U; u < rangelist->num_ranges; ++u)
+ total_bits += rangelist->ranges[u].num;
+
+ return total_bits;
+}
+
+/**
+ * \brief Returns the byte offset of the bitrange moving left from the LSB.
+ *
+ * \param[in] bitrange The input bit-range.
+ * \return The byte offset.
+ */
+static inline size_t rogue_byte_num(const struct rogue_bitrange *bitrange)
+{
+ /* Make sure there are enough bits. */
+ assert(bitrange->num <= (bitrange->start + 1));
+
+ return bitrange->start / 8;
+}
+
+/**
+ * \brief Returns the array-indexable byte offset of a bit-range if the sequence
+ * it represents were to be stored in an byte-array containing "num_bytes"
+ * bytes.
+ *
+ * E.g. uint8_t array[2] is a sequence of 16 bits:
+ * bit(0) is located in array[1].
+ * bit(15) is located in array[0].
+ *
+ * For uint8_t array[4]:
+ * bit(0) is located in array[3].
+ * bit(15) is located in array[2].
+ *
+ * \param[in] bitrange The input bit-range.
+ * \param[in] num_bytes The number of bytes that are used to contain the
+ * bit-range. \return The byte offset.
+ */
+static inline size_t rogue_byte_index(const struct rogue_bitrange *bitrange,
+ size_t num_bytes)
+{
+ /* Make sure there are enough bits. */
+ assert(bitrange->num <= (bitrange->start + 1));
+
+ return num_bytes - rogue_byte_num(bitrange) - 1;
+}
+
+/**
+ * \brief Returns the bit offset of a bit-range if the sequence it represents is
+ * being accessed in a byte-wise manner.
+ *
+ * E.g. bit 17 has a bit offset of 1.
+ *
+ * \param[in] bitrange The input bit-range.
+ * \return The bit offset.
+ */
+static inline size_t rogue_bit_offset(const struct rogue_bitrange *bitrange)
+{
+ /* Make sure there are enough bits. */
+ assert(bitrange->num <= (bitrange->start + 1));
+
+ return bitrange->start % 8;
+}
+
+/**
+ * \brief Returns the number of additional bytes that the bit-range spills into
+ * (excluding its "starting" byte).
+ *
+ * \param[in] bitrange The input bit-range.
+ * \return The number of bytes spilled.
+ */
+static inline size_t rogue_bytes_spilled(const struct rogue_bitrange *bitrange)
+{
+ /* Make sure there are enough bits. */
+ assert(bitrange->num <= (bitrange->start + 1));
+
+ return ((bitrange->num - 1) / 8) +
+ ((bitrange->num % 8) > (rogue_bit_offset(bitrange) + 1));
+}
+
+/**
+ * \brief For a given bit offset, returns the maximum number of bits (including
+ * itself) that are accessible before spilling into the following byte.
+ *
+ * E.g. When trying to insert an 8-bit value offset of 13, a maximum of 6 bits
+ * can be placed; the last 2 bits will need to go into the next byte.
+ *
+ * 8-bit value: abcdefgh
+ *
+ * array[0] array[1]
+ * 15 8 7 0
+ * iiiiiiii jjjjjjjj
+ * ^
+ * abcdef gh
+ *
+ * \param[in] The bit offset.
+ * \return The maximum number of accessible bits.
+ */
+static inline size_t rogue_max_bits(size_t offset)
+{
+ return (offset % 8) + 1;
+}
+
+bool rogue_distribute_value(uint64_t source,
+ const struct rogue_rangelist *rangelist,
+ size_t dest_size,
+ uint8_t dest_bytes[dest_size]);
+
+#endif /* ROGUE_UTIL_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * \file rogue_validate.c
+ *
+ * \brief Contains rules and functions for validating Rogue data structures.
+ */
+
+#include <stdbool.h>
+
+#include "rogue_operand.h"
+#include "rogue_shader.h"
+#include "rogue_util.h"
+#include "rogue_validate.h"
+#include "util/list.h"
+#include "util/macros.h"
+
+/**
+ * \brief Register operand rules.
+ */
+#define REG_RULE(OPERAND, ACCESS, MAX, MODIFIERS) \
+ [ROGUE_OPERAND_TYPE_REG_##OPERAND] = { \
+ .access = ROGUE_REG_ACCESS_##ACCESS, \
+ .max = MAX, \
+ .modifiers = ROGUE_REG_MOD_##MODIFIERS, \
+ }
+
+/* TODO: Support register indexing > ROGUE_MAX_REG_TEMP. */
+static const struct rogue_register_rule reg_rules[ROGUE_NUM_REG_TYPES] = {
+ REG_RULE(TEMP, RW, MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_TEMP), ALL),
+ REG_RULE(COEFF, RW, MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_COEFF), ALL),
+ REG_RULE(CONST, RW, MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_CONST), NONE),
+ REG_RULE(SHARED, RW, MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_SHARED), ALL),
+ REG_RULE(PIXEL_OUT,
+ RW,
+ MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_PIXEL_OUT),
+ NONE),
+ REG_RULE(VERTEX_IN,
+ RW,
+ MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_VERTEX_IN),
+ ALL),
+ REG_RULE(INTERNAL,
+ RW,
+ MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_INTERNAL),
+ NONE),
+};
+#undef REG_RULE
+
+/**
+ * \brief Instruction rules.
+ */
+/* TODO: Common up register classes to prevent long lines. */
+static const struct rogue_instr_rule instr_rules[ROGUE_OP_COUNT] = {
+ [ROGUE_OP_NOP] = { .flags = 0, .num_operands = 0, .operand_rules = NULL, },
+ [ROGUE_OP_END_FRAG] = { .flags = 0, .num_operands = 0, .operand_rules = NULL, },
+ [ROGUE_OP_END_VERT] = { .flags = 0, .num_operands = 0, .operand_rules = NULL, },
+ [ROGUE_OP_WDF] = { .flags = 0,
+ .num_operands = 1, .operand_rules = (struct rogue_instr_operand_rule[]){
+ [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_DRC), .min = -1, .max = -1, .align = -1, },
+ },
+ },
+ [ROGUE_OP_PIX_ITER_W] = { .flags = ROH(ROGUE_INSTR_FLAG_SAT),
+ .num_operands = 5, .operand_rules = (struct rogue_instr_operand_rule[]){
+ [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_DRC), .min = -1, .max = -1, .align = -1, },
+ [2] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_COEFF), .min = -1, .max = -1, .align = ROGUE_COEFF_ALIGN, },
+ [3] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_COEFF), .min = -1, .max = -1, .align = ROGUE_COEFF_ALIGN, },
+ [4] = { .mask = ROH(ROGUE_OPERAND_TYPE_IMMEDIATE), .min = 1, .max = 16, .align = -1, },
+ },
+ },
+ [ROGUE_OP_MAX] = { .flags = 0,
+ .num_operands = 3, .operand_rules = (struct rogue_instr_operand_rule[]){
+ [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ [2] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_CONST) | ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ },
+ },
+ [ROGUE_OP_MIN] = { .flags = 0,
+ .num_operands = 3, .operand_rules = (struct rogue_instr_operand_rule[]){
+ [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP) | ROH(ROGUE_OPERAND_TYPE_REG_INTERNAL), .min = -1, .max = -1, .align = -1, },
+ [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ [2] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_CONST) | ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ },
+ },
+ /* TODO: Add representation for 4 sequential registers. */
+ [ROGUE_OP_PACK_U8888] = { .flags = 0,
+ .num_operands = 2, .operand_rules = (struct rogue_instr_operand_rule[]){
+ [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_INTERNAL), .min = -1, .max = -1, .align = -1, },
+ },
+ },
+ [ROGUE_OP_MOV] = { .flags = ROH(ROGUE_INSTR_FLAG_OLCHK),
+ .num_operands = 2, .operand_rules = (struct rogue_instr_operand_rule[]){
+ [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP) | ROH(ROGUE_OPERAND_TYPE_REG_INTERNAL) | ROH(ROGUE_OPERAND_TYPE_REG_PIXEL_OUT), .min = -1, .max = -1, .align = -1, },
+ [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_CONST) | ROH(ROGUE_OPERAND_TYPE_REG_TEMP) | ROH(ROGUE_OPERAND_TYPE_REG_SHARED) | ROH(ROGUE_OPERAND_TYPE_REG_VERTEX_IN), .min = -1, .max = -1, .align = -1, },
+ },
+ },
+ [ROGUE_OP_MOV_IMM] = { .flags = 0,
+ .num_operands = 2, .operand_rules = (struct rogue_instr_operand_rule[]){
+ [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_IMMEDIATE), .min = 0, .max = UINT32_MAX, .align = -1, },
+ },
+ },
+ [ROGUE_OP_FMA] = { .flags = ROH(ROGUE_INSTR_FLAG_SAT) | ROH(ROGUE_INSTR_FLAG_LP),
+ .num_operands = 4, .operand_rules = (struct rogue_instr_operand_rule[]){
+ [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ [2] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ [3] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ },
+ },
+ [ROGUE_OP_MUL] = { .flags = ROH(ROGUE_INSTR_FLAG_SAT) | ROH(ROGUE_INSTR_FLAG_LP),
+ .num_operands = 3, .operand_rules = (struct rogue_instr_operand_rule[]){
+ [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ [2] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ },
+ },
+ [ROGUE_OP_VTXOUT] = { .flags = 0,
+ .num_operands = 2, .operand_rules = (struct rogue_instr_operand_rule[]){
+ [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_IMMEDIATE), .min = 0, .max = ROGUE_MAX_VERTEX_OUTPUTS, .align = -1, },
+ [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+ },
+ },
+};
+
+/**
+ * \brief Validates an operand.
+ *
+ * \param[in] operand The operand.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_validate_operand(const struct rogue_operand *operand)
+{
+ ASSERT_OPERAND_RANGE(operand->type);
+
+ switch (operand->type) {
+ case ROGUE_OPERAND_TYPE_IMMEDIATE:
+ return true;
+
+ case ROGUE_OPERAND_TYPE_DRC:
+ CHECKF(operand->drc.number < ROGUE_NUM_DRCS,
+ "Invalid DRC number '%zu'.",
+ operand->drc.number);
+ return true;
+
+ case ROGUE_OPERAND_TYPE_REG_TEMP:
+ case ROGUE_OPERAND_TYPE_REG_COEFF:
+ case ROGUE_OPERAND_TYPE_REG_CONST:
+ case ROGUE_OPERAND_TYPE_REG_SHARED:
+ case ROGUE_OPERAND_TYPE_REG_PIXEL_OUT:
+ case ROGUE_OPERAND_TYPE_REG_VERTEX_IN:
+ case ROGUE_OPERAND_TYPE_REG_INTERNAL:
+ CHECKF(operand->reg.number < reg_rules[operand->type].max,
+ "Register number '%zu' out of range.",
+ operand->reg.number);
+ return true;
+
+ default:
+ break;
+ }
+
+ return false;
+}
+
+/**
+ * \brief Validates an instruction.
+ *
+ * \param[in] instr The instruction.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_validate_instr(const struct rogue_instr *instr)
+{
+ const struct rogue_instr_rule *rule;
+
+ ASSERT_OPCODE_RANGE(instr->opcode);
+
+ rule = &instr_rules[instr->opcode];
+
+ /* Validate flags. */
+ CHECKF(rogue_check_bitset(instr->flags, rule->flags),
+ "Invalid instruction flags specified.");
+
+ /* Validate number of operands. */
+ CHECKF(instr->num_operands == rule->num_operands,
+ "Invalid number of operands specified.");
+
+ CHECK(!rule->num_operands || instr->operands);
+ for (size_t u = 0U; u < instr->num_operands; ++u) {
+ /* Validate operand types. */
+ CHECKF(rogue_check_bitset(rogue_onehot(instr->operands[u].type),
+ rule->operand_rules[u].mask),
+ "Invalid type for operand %zu.",
+ u);
+
+ /* Validate immediate ranges. */
+ if (rogue_check_bitset(rogue_onehot(instr->operands[u].type),
+ ROH(ROGUE_OPERAND_TYPE_IMMEDIATE)) &&
+ rule->operand_rules[u].min != -1 &&
+ rule->operand_rules[u].max != -1) {
+ CHECKF(
+ instr->operands[u].immediate.value >= rule->operand_rules[u].min &&
+ instr->operands[u].immediate.value <= rule->operand_rules[u].max,
+ "Immediate value out of range for operand %zu.",
+ u);
+ }
+
+ /* Validate register alignment. */
+ if (rogue_check_bitset(rogue_onehot(instr->operands[u].type),
+ ROGUE_MASK_ANY_REG) &&
+ rule->operand_rules[u].align != -1) {
+ CHECKF(!(instr->operands[u].reg.number % rule->operand_rules[u].align),
+ "Invalid register alignment in operand %zu.",
+ u);
+ }
+
+ /* Validate each operand. */
+ CHECKF(rogue_validate_operand(&instr->operands[u]),
+ "Failed to validate operand.");
+ }
+
+ return true;
+}
+
+/**
+ * \brief Validates a shader.
+ *
+ * \param[in] shader The shader.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_validate_shader(const struct rogue_shader *shader)
+{
+ CHECK(!list_is_empty(&shader->instr_list));
+ ASSERT_SHADER_STAGE_RANGE(shader->stage);
+
+ /* Shader stage-specific validation. */
+ switch (shader->stage) {
+ case MESA_SHADER_VERTEX:
+ /* Make sure there is (only) one end vertex shader instruction. */
+ CHECKF(rogue_shader_instr_count_type(shader, ROGUE_OP_END_VERT) == 1,
+ "Shader must contain a single end.vert instruction.");
+
+ /* Make sure the end vertex shader instruction is the last one. */
+ CHECKF(instr_last_entry(&shader->instr_list)->opcode == ROGUE_OP_END_VERT,
+ "end.vert not last instruction.");
+ break;
+
+ case MESA_SHADER_FRAGMENT:
+ /* Make sure there is (only) one end fragment shader instruction. */
+ CHECKF(rogue_shader_instr_count_type(shader, ROGUE_OP_END_FRAG) == 1,
+ "Shader must contain a single end.frag instruction.");
+
+ /* Make sure the end fragment shader instruction is the last one. */
+ CHECKF(instr_last_entry(&shader->instr_list)->opcode == ROGUE_OP_END_FRAG,
+ "end.frag not last instruction.");
+ break;
+
+ default:
+ return false;
+ }
+
+ /* Validate each instruction. */
+ foreach_instr (instr, &shader->instr_list)
+ CHECKF(rogue_validate_instr(instr), "Failed to validate instruction.");
+
+ return true;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_VALIDATE_H
+#define ROGUE_VALIDATE_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "rogue_instr.h"
+#include "rogue_operand.h"
+#include "rogue_shader.h"
+#include "util/macros.h"
+
+/**
+ * \brief Register rule description.
+ */
+struct rogue_register_rule {
+ enum rogue_register_access access;
+ size_t max;
+ enum rogue_register_modifier modifiers;
+};
+
+/**
+ * \brief Instruction operand rule description.
+ */
+struct rogue_instr_operand_rule {
+ uint64_t mask;
+ ssize_t min;
+ ssize_t max;
+ ssize_t align;
+};
+
+/**
+ * \brief Instruction rule description.
+ */
+struct rogue_instr_rule {
+ uint64_t flags; /** A mask of #rogue_instr_flag values. */
+ size_t num_operands;
+ struct rogue_instr_operand_rule *operand_rules;
+};
+
+PUBLIC
+bool rogue_validate_operand(const struct rogue_operand *operand);
+
+PUBLIC
+bool rogue_validate_instr(const struct rogue_instr *instr);
+
+PUBLIC
+bool rogue_validate_shader(const struct rogue_shader *shader);
+
+#endif /* ROGUE_VALIDATE_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compiler/shader_enums.h"
+#include "nir/nir.h"
+#include "rogue.h"
+#include "rogue_build_data.h"
+#include "rogue_compiler.h"
+#include "rogue_dump.h"
+#include "util/os_file.h"
+#include "util/ralloc.h"
+
+#include <getopt.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Number of hex columns to dump before starting a new line. */
+#define ARRAY_DUMP_COLS 16
+
+/**
+ * \file compiler.c
+ *
+ * \brief Rogue offline compiler.
+ */
+
+static const struct option cmdline_opts[] = {
+ /* Arguments. */
+ { "stage", required_argument, NULL, 's' },
+ { "file", required_argument, NULL, 'f' },
+ { "entry", required_argument, NULL, 'e' },
+
+ /* Options. */
+ { "help", no_argument, NULL, 'h' },
+ { "out", required_argument, NULL, 'o' },
+
+ { "dump-c-array", no_argument, NULL, 'c' },
+ { "dump-rogue", no_argument, NULL, 'r' },
+ { "dump-nir", no_argument, NULL, 'n' },
+
+ { NULL, 0, NULL, 0 },
+};
+
+struct compiler_opts {
+ gl_shader_stage stage;
+ char *file;
+ char *entry;
+ char *out_file;
+ bool dump_c_array;
+ bool dump_rogue;
+ bool dump_nir;
+};
+
+static void usage(const char *argv0)
+{
+ /* clang-format off */
+ printf("Rogue offline compiler.\n");
+ printf("Usage: %s -s <stage> -f <file> [-e <entry>] [-o <file>] [-c] [-r] [-n] [-h]\n", argv0);
+ printf("\n");
+
+ printf("Required arguments:\n");
+ printf("\t-s, --stage <stage> Shader stage (supported options: frag, vert).\n");
+ printf("\t-f, --file <file> Shader SPIR-V filename.\n");
+ printf("\n");
+
+ printf("Options:\n");
+ printf("\t-h, --help Prints this help message.\n");
+ printf("\t-e, --entry <entry> Overrides the shader entry-point name (default: 'main').\n");
+ printf("\t-o, --out <file> Overrides the output filename (default: 'out.bin').\n");
+ printf("\n");
+
+ printf("\t-c, --dump-c-array Print the shader binary as a C byte array.\n");
+ printf("\t-r, --dump-rogue Prints the shader Rogue assembly.\n");
+ printf("\t-n, --dump-nir Prints the shader NIR.\n");
+ printf("\n");
+ /* clang-format on */
+}
+
+static bool parse_cmdline(int argc, char *argv[], struct compiler_opts *opts)
+{
+ int opt;
+ int longindex;
+
+ while (
+ (opt =
+ getopt_long(argc, argv, "crnhs:f:e:o:", cmdline_opts, &longindex)) !=
+ -1) {
+ switch (opt) {
+ case 'c':
+ opts->dump_c_array = true;
+ break;
+
+ case 'e':
+ if (opts->entry)
+ continue;
+
+ opts->entry = optarg;
+ break;
+
+ case 'f':
+ if (opts->file)
+ continue;
+
+ opts->file = optarg;
+ break;
+
+ case 'n':
+ opts->dump_nir = true;
+ break;
+
+ case 'o':
+ if (opts->out_file)
+ continue;
+
+ opts->out_file = optarg;
+ break;
+
+ case 'r':
+ opts->dump_rogue = true;
+ break;
+
+ case 's':
+ if (opts->stage != MESA_SHADER_NONE)
+ continue;
+
+ if (!strcmp(optarg, "frag"))
+ opts->stage = MESA_SHADER_FRAGMENT;
+ else if (!strcmp(optarg, "vert"))
+ opts->stage = MESA_SHADER_VERTEX;
+ else {
+ fprintf(stderr, "Invalid stage \"%s\".\n", optarg);
+ usage(argv[0]);
+ return false;
+ }
+
+ break;
+
+ case 'h':
+ default:
+ usage(argv[0]);
+ return false;
+ }
+ }
+
+ if (opts->stage == MESA_SHADER_NONE || !opts->file) {
+ fprintf(stderr,
+ "%s: --stage and --file are required arguments.\n",
+ argv[0]);
+ usage(argv[0]);
+ return false;
+ }
+
+ if (!opts->out_file)
+ opts->out_file = "out.bin";
+
+ if (!opts->entry)
+ opts->entry = "main";
+
+ return true;
+}
+
+int main(int argc, char *argv[])
+{
+ /* Command-line options. */
+ /* N.B. MESA_SHADER_NONE != 0 */
+ struct compiler_opts opts = { .stage = MESA_SHADER_NONE, 0 };
+
+ /* Input file data. */
+ char *input_data;
+ size_t input_size;
+
+ /* Compiler context. */
+ struct rogue_compiler *compiler;
+
+ /* Multi-stage build context. */
+ struct rogue_build_ctx *ctx;
+
+ /* Output file. */
+ FILE *fp;
+ size_t bytes_written;
+
+ /* Parse command-line options. */
+ if (!parse_cmdline(argc, argv, &opts))
+ return 1;
+
+ /* Load SPIR-V input file. */
+ input_data = os_read_file(opts.file, &input_size);
+ if (!input_data) {
+ fprintf(stderr, "Failed to read file \"%s\".\n", opts.file);
+ return 1;
+ }
+
+ /* Create compiler context. */
+ compiler = rogue_compiler_create(NULL);
+ if (!compiler) {
+ fprintf(stderr, "Failed to set up compiler context.\n");
+ goto err_free_input;
+ }
+
+ ctx = rogue_create_build_context(compiler);
+ if (!ctx) {
+ fprintf(stderr, "Failed to set up build context.\n");
+ goto err_destroy_compiler;
+ }
+
+ /* SPIR-V -> NIR. */
+ ctx->nir[opts.stage] = rogue_spirv_to_nir(ctx,
+ opts.stage,
+ opts.entry,
+ input_size / sizeof(uint32_t),
+ (uint32_t *)input_data,
+ 0,
+ NULL);
+ if (!ctx->nir[opts.stage]) {
+ fprintf(stderr, "Failed to translate SPIR-V input to NIR.\n");
+ goto err_free_build_context;
+ }
+
+ /* Dump NIR shader. */
+ if (opts.dump_nir)
+ nir_print_shader(ctx->nir[opts.stage], stdout);
+
+ /* NIR -> Rogue. */
+ ctx->rogue[opts.stage] = rogue_nir_to_rogue(ctx, ctx->nir[opts.stage]);
+ if (!ctx->rogue[opts.stage]) {
+ fprintf(stderr, "Failed to translate NIR input to Rogue.\n");
+ goto err_free_build_context;
+ }
+
+ /* Dump Rogue shader. */
+ if (opts.dump_rogue)
+ rogue_dump_shader(ctx->rogue[opts.stage], stdout);
+
+ /* Rogue -> Binary. */
+ ctx->binary[opts.stage] = rogue_to_binary(ctx, ctx->rogue[opts.stage]);
+ if (!ctx->binary[opts.stage]) {
+ fprintf(stderr, "Failed to translate Rogue to binary.\n");
+ goto err_free_build_context;
+ }
+
+ /* Dump binary as a C array. */
+ if (opts.dump_c_array) {
+ printf("uint8_t shader_bytes[%zu] = {", ctx->binary[opts.stage]->size);
+ for (size_t u = 0U; u < ctx->binary[opts.stage]->size; ++u) {
+ if (!(u % ARRAY_DUMP_COLS))
+ printf("\n\t");
+
+ printf("0x%02x, ", ctx->binary[opts.stage]->data[u]);
+ }
+ printf("\n};\n");
+ }
+
+ /* Write shader binary to disk. */
+ fp = fopen(opts.out_file, "wb");
+ if (!fp) {
+ fprintf(stderr, "Failed to open output file \"%s\".\n", opts.out_file);
+ goto err_free_build_context;
+ }
+
+ bytes_written = fwrite(ctx->binary[opts.stage]->data,
+ 1,
+ ctx->binary[opts.stage]->size,
+ fp);
+ if (bytes_written != ctx->binary[opts.stage]->size) {
+ fprintf(
+ stderr,
+ "Failed to write to output file \"%s\" (%zu bytes of %zu written).\n",
+ opts.out_file,
+ bytes_written,
+ ctx->binary[opts.stage]->size);
+ goto err_close_outfile;
+ }
+
+ /* Clean up. */
+ fclose(fp);
+ ralloc_free(ctx);
+ rogue_compiler_destroy(compiler);
+ free(input_data);
+
+ return 0;
+
+err_close_outfile:
+ fclose(fp);
+err_free_build_context:
+ ralloc_free(ctx);
+err_destroy_compiler:
+ rogue_compiler_destroy(compiler);
+err_free_input:
+ free(input_data);
+
+ return 1;
+}
--- /dev/null
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+subdir('pds')
+
+pvr_entrypoints = custom_target(
+ 'pvr_entrypoints',
+ input : [vk_entrypoints_gen, vk_api_xml],
+ output : ['pvr_entrypoints.h', 'pvr_entrypoints.c'],
+ command : [
+ prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak',
+ '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'pvr',
+ ],
+ depend_files : vk_entrypoints_gen_depend_files,
+)
+
+pvr_files = files(
+ 'winsys/powervr/pvr_drm.c',
+ 'winsys/pvr_winsys.c',
+ 'winsys/pvr_winsys_helper.c',
+ 'pvr_blit.c',
+ 'pvr_bo.c',
+ 'pvr_cmd_buffer.c',
+ 'pvr_csb.c',
+ 'pvr_descriptor_set.c',
+ 'pvr_device.c',
+ 'pvr_formats.c',
+ 'pvr_hw_pass.c',
+ 'pvr_image.c',
+ 'pvr_job_common.c',
+ 'pvr_job_compute.c',
+ 'pvr_job_context.c',
+ 'pvr_job_render.c',
+ 'pvr_pass.c',
+ 'pvr_pipeline.c',
+ 'pvr_pipeline_cache.c',
+ 'pvr_query.c',
+ 'pvr_queue.c',
+ 'pvr_shader.c',
+ 'pvr_tex_state.c',
+ 'pvr_wsi.c',
+)
+
+pvr_includes = [
+ include_directories('usc/programs'),
+ include_directories('winsys'),
+ libpowervr_pds_includes,
+]
+
+pvr_deps = [
+ dep_csbgen,
+ dep_libdrm,
+ dep_valgrind,
+ idep_vulkan_runtime,
+ idep_vulkan_util,
+ idep_vulkan_wsi,
+]
+
+pvr_flags = [
+ no_override_init_args,
+]
+
+if with_imagination_srv
+ pvr_files += files(
+ 'winsys/pvrsrvkm/pvr_srv.c',
+ 'winsys/pvrsrvkm/pvr_srv_bo.c',
+ 'winsys/pvrsrvkm/pvr_srv_bridge.c',
+ 'winsys/pvrsrvkm/pvr_srv_job_compute.c',
+ 'winsys/pvrsrvkm/pvr_srv_job_render.c',
+ 'winsys/pvrsrvkm/pvr_srv_syncobj.c',
+ )
+ pvr_flags += '-DPVR_SUPPORT_SERVICES_DRIVER'
+endif
+
+libvulkan_powervr_mesa = shared_library(
+ 'vulkan_powervr_mesa',
+ [pvr_files, pvr_entrypoints],
+ include_directories : [
+ pvr_includes,
+ inc_gallium_aux,
+ inc_imagination,
+ inc_include,
+ inc_src,
+ inc_mesa,
+ inc_gallium,
+ inc_compiler,
+ ],
+ link_with : [
+ libpowervr_common,
+ libpowervr_pds,
+ libpowervr_rogue,
+ libvulkan_wsi,
+ ],
+ dependencies : [
+ pvr_deps,
+ idep_nir,
+ ],
+ c_args : pvr_flags,
+ link_args : [
+ ld_args_build_id,
+ ld_args_bsymbolic,
+ ld_args_gc_sections
+ ],
+ gnu_symbol_visibility : 'hidden',
+ install : true,
+)
+
+if with_symbols_check
+ test(
+ 'pvr symbols check',
+ symbols_check,
+ args : [
+ '--lib', libvulkan_powervr_mesa,
+ '--symbols-file', vulkan_icd_symbols,
+ symbols_check_args,
+ ],
+ suite : ['imagination'],
+ )
+endif
+
+powervr_mesa_icd = custom_target(
+ 'powervr_mesa_icd',
+ input : [vk_icd_gen, vk_api_xml],
+ output : 'powervr_mesa_icd.@0@.json'.format(host_machine.cpu()),
+ command : [
+ prog_python, '@INPUT0@',
+ '--api-version', '1.0', '--xml', '@INPUT1@',
+ '--lib-path', join_paths(get_option('prefix'), get_option('libdir'),
+ 'libvulkan_powervr_mesa.so'),
+ '--out', '@OUTPUT@',
+ ],
+ build_by_default : true,
+ install_dir : with_vulkan_icd_dir,
+ install : true,
+)
+
+if meson.version().version_compare('>= 0.58')
+ _dev_icdname = 'powervr_mesa_devenv_icd.@0@.json'.format(host_machine.cpu())
+ custom_target(
+ 'powervr_mesa_devenv_icd',
+ input : [vk_icd_gen, vk_api_xml],
+ output : _dev_icdname,
+ command : [
+ prog_python, '@INPUT0@',
+ '--api-version', '1.0', '--xml', '@INPUT1@',
+ '--lib-path', meson.current_build_dir() / 'libvulkan_powervr_mesa.so',
+ '--out', '@OUTPUT@',
+ ],
+ build_by_default : true,
+ )
+
+ devenv.append('VK_ICD_FILENAMES', meson.current_build_dir() / _dev_icdname)
+endif
--- /dev/null
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+
+libpowervr_pds_files = files(
+ 'pvr_pds.c',
+ 'pvr_pds_disasm.c',
+ 'pvr_pds_printer.c',
+ 'pvr_xgl_pds.c',
+)
+
+libpowervr_pds_includes = include_directories(
+ '..',
+ '.',
+ 'pvr_pds_programs',
+)
+
+libpowervr_pds = static_library(
+ 'pvr_pds',
+ [libpowervr_pds_files],
+ include_directories : [
+ libpowervr_pds_includes,
+ inc_include,
+ inc_src,
+ inc_imagination,
+ ],
+ c_args : [
+ no_override_init_args,
+ ],
+ gnu_symbol_visibility : 'hidden',
+ pic : true,
+)
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "pvr_device_info.h"
+#include "pvr_pds.h"
+#include "pvr_rogue_pds_defs.h"
+#include "pvr_rogue_pds_disasm.h"
+#include "pvr_rogue_pds_encode.h"
+#include "util/log.h"
+#include "util/macros.h"
+
+#define H32(X) (uint32_t)((((X) >> 32U) & 0xFFFFFFFFUL))
+#define L32(X) (uint32_t)(((X)&0xFFFFFFFFUL))
+
+/*****************************************************************************
+ Macro definitions
+*****************************************************************************/
+
+#define PVR_PDS_DWORD_SHIFT 2
+
+#define PVR_PDS_CONSTANTS_BLOCK_BASE 0
+#define PVR_PDS_CONSTANTS_BLOCK_SIZE 128
+#define PVR_PDS_TEMPS_BLOCK_BASE 128
+#define PVR_PDS_TEMPS_BLOCK_SIZE 32
+
+#define PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE PVR_ROGUE_PDSINST_ST_COUNT4_MASK
+#define PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE PVR_ROGUE_PDSINST_LD_COUNT8_MASK
+
+/* Map PDS temp registers to the CDM values they contain Work-group IDs are only
+ * available in the coefficient sync task.
+ */
+#define PVR_PDS_CDM_WORK_GROUP_ID_X 0
+#define PVR_PDS_CDM_WORK_GROUP_ID_Y 1
+#define PVR_PDS_CDM_WORK_GROUP_ID_Z 2
+/* Local IDs are available in every task. */
+#define PVR_PDS_CDM_LOCAL_ID_X 0
+#define PVR_PDS_CDM_LOCAL_ID_YZ 1
+
+#define PVR_PDS_DOUTW_LOWER32 0x0
+#define PVR_PDS_DOUTW_UPPER32 0x1
+#define PVR_PDS_DOUTW_LOWER64 0x2
+#define PVR_PDS_DOUTW_LOWER128 0x3
+#define PVR_PDS_DOUTW_MAXMASK 0x4
+
+#define ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE 8U
+#define PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE (16U)
+
+/*****************************************************************************
+ Static variables
+*****************************************************************************/
+
+static const uint32_t dword_mask_const[PVR_PDS_DOUTW_MAXMASK] = {
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64
+};
+
+/* If has_slc_mcu_cache_control is enabled use cache_control_const[0], else use
+ * cache_control_const[1].
+ */
+static const uint32_t cache_control_const[2][2] = {
+ { PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_BYPASS,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED },
+ { 0, 0 }
+};
+
+/*****************************************************************************
+ Function definitions
+*****************************************************************************/
+
+uint64_t pvr_pds_encode_ld_src0(uint64_t dest,
+ uint64_t count8,
+ uint64_t src_add,
+ bool cached,
+ const struct pvr_device_info *dev_info)
+{
+ uint64_t encoded = 0;
+
+ if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
+ encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED
+ : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS);
+ }
+
+ encoded |= ((src_add & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
+ << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
+ encoded |= ((count8 & PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
+ << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
+ encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED
+ : PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_BYPASS);
+ encoded |= ((dest & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+ << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
+
+ return encoded;
+}
+
+uint64_t pvr_pds_encode_st_src0(uint64_t src,
+ uint64_t count4,
+ uint64_t dst_add,
+ bool write_through,
+ const struct pvr_device_info *device_info)
+{
+ uint64_t encoded = 0;
+
+ if (device_info->features.has_slc_mcu_cache_controls) {
+ encoded |= (write_through
+ ? PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH
+ : PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_BACK);
+ }
+
+ encoded |= ((dst_add & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
+ << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
+ encoded |= ((count4 & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
+ << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
+ encoded |= (write_through ? PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH
+ : PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_BACK);
+ encoded |= ((src & PVR_ROGUE_PDSINST_REGS32TP_MASK)
+ << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t
+pvr_pds_encode_doutw_src1(uint32_t dest,
+ uint32_t dword_mask,
+ uint32_t flags,
+ bool cached,
+ const struct pvr_device_info *dev_info)
+{
+ assert(((dword_mask > PVR_PDS_DOUTW_LOWER64) && ((dest & 3) == 0)) ||
+ ((dword_mask == PVR_PDS_DOUTW_LOWER64) && ((dest & 1) == 0)) ||
+ (dword_mask < PVR_PDS_DOUTW_LOWER64));
+
+ uint32_t encoded =
+ (dest << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT);
+
+ encoded |= dword_mask_const[dword_mask];
+
+ encoded |= flags;
+
+ encoded |=
+ cache_control_const[PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) ? 0
+ : 1]
+ [cached ? 1 : 0];
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_encode_doutw64(uint32_t cc,
+ uint32_t end,
+ uint32_t src1,
+ uint32_t src0)
+{
+ return pvr_pds_inst_encode_dout(cc,
+ end,
+ src1,
+ src0,
+ PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
+ uint32_t end,
+ uint32_t src0)
+{
+ return pvr_pds_inst_encode_dout(cc,
+ end,
+ 0,
+ src0,
+ PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_doutc(uint32_t cc,
+ uint32_t end)
+{
+ return pvr_pds_inst_encode_dout(cc,
+ end,
+ 0,
+ 0,
+ PVR_ROGUE_PDSINST_DSTDOUT_DOUTC);
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_encode_doutd(uint32_t cc,
+ uint32_t end,
+ uint32_t src1,
+ uint32_t src0)
+{
+ return pvr_pds_inst_encode_dout(cc,
+ end,
+ src1,
+ src0,
+ PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_encode_douti(uint32_t cc,
+ uint32_t end,
+ uint32_t src0)
+{
+ return pvr_pds_inst_encode_dout(cc,
+ end,
+ 0,
+ src0,
+ PVR_ROGUE_PDSINST_DSTDOUT_DOUTI);
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_encode_doutv(uint32_t cc,
+ uint32_t end,
+ uint32_t src1,
+ uint32_t src0)
+{
+ return pvr_pds_inst_encode_dout(cc,
+ end,
+ src1,
+ src0,
+ PVR_ROGUE_PDSINST_DSTDOUT_DOUTV);
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_encode_bra(uint32_t srcc,
+ uint32_t neg,
+ uint32_t setc,
+ int32_t relative_address)
+{
+ /* Address should be signed but API only allows unsigned value. */
+ return pvr_pds_inst_encode_bra(srcc, neg, setc, (uint32_t)relative_address);
+}
+
+/**
+ * Gets the next constant address and moves the next constant pointer along.
+ *
+ * \param next_constant Pointer to the next constant address.
+ * \param num_constants The number of constants required.
+ * \param count The number of constants allocated.
+ * \return The address of the next constant.
+ */
+static uint32_t pvr_pds_get_constants(uint32_t *next_constant,
+ uint32_t num_constants,
+ uint32_t *count)
+{
+ uint32_t constant;
+
+ /* Work out starting constant number. For even number of constants, start on
+ * a 64-bit boundary.
+ */
+ if (num_constants & 1)
+ constant = *next_constant;
+ else
+ constant = (*next_constant + 1) & ~1;
+
+ /* Update the count with the number of constants actually allocated. */
+ *count += constant + num_constants - *next_constant;
+
+ /* Move the next constant pointer. */
+ *next_constant = constant + num_constants;
+
+ assert((constant + num_constants) <= PVR_PDS_CONSTANTS_BLOCK_SIZE);
+
+ return constant;
+}
+
+/**
+ * Gets the next temp address and moves the next temp pointer along.
+ *
+ * \param next_temp Pointer to the next temp address.
+ * \param num_temps The number of temps required.
+ * \param count The number of temps allocated.
+ * \return The address of the next temp.
+ */
+static uint32_t
+pvr_pds_get_temps(uint32_t *next_temp, uint32_t num_temps, uint32_t *count)
+{
+ uint32_t temp;
+
+ /* Work out starting temp number. For even number of temps, start on a
+ * 64-bit boundary.
+ */
+ if (num_temps & 1)
+ temp = *next_temp;
+ else
+ temp = (*next_temp + 1) & ~1;
+
+ /* Update the count with the number of temps actually allocated. */
+ *count += temp + num_temps - *next_temp;
+
+ /* Move the next temp pointer. */
+ *next_temp = temp + num_temps;
+
+ assert((temp + num_temps) <=
+ (PVR_PDS_TEMPS_BLOCK_SIZE + PVR_PDS_TEMPS_BLOCK_BASE));
+
+ return temp;
+}
+
+/**
+ * Write a 32-bit constant indexed by the long range.
+ *
+ * \param data_block Pointer to data block to write to.
+ * \param index Index within the data to write to.
+ * \param dword The 32-bit constant to write.
+ */
+static void
+pvr_pds_write_constant32(uint32_t *data_block, uint32_t index, uint32_t dword0)
+{
+ /* Check range. */
+ assert(index <= (PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER -
+ PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER));
+
+ data_block[index + 0] = dword0;
+
+ PVR_PDS_PRINT_DATA("WriteConstant32", (uint64_t)dword0, index);
+}
+
+/**
+ * Write a 64-bit constant indexed by the long range.
+ *
+ * \param data_block Pointer to data block to write to.
+ * \param index Index within the data to write to.
+ * \param dword0 Lower half of the 64 bit constant.
+ * \param dword1 Upper half of the 64 bit constant.
+ */
+static void pvr_pds_write_constant64(uint32_t *data_block,
+ uint32_t index,
+ uint32_t dword0,
+ uint32_t dword1)
+{
+ /* Has to be on 64 bit boundary. */
+ assert((index & 1) == 0);
+
+ /* Check range. */
+ assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
+ PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
+
+ data_block[index + 0] = dword0;
+ data_block[index + 1] = dword1;
+
+ PVR_PDS_PRINT_DATA("WriteConstant64",
+ ((uint64_t)dword0 << 32) | (uint64_t)dword1,
+ index);
+}
+
+/**
+ * Write a 64-bit constant from a single wide word indexed by the long-range
+ * number.
+ *
+ * \param data_block Pointer to data block to write to.
+ * \param index Index within the data to write to.
+ * \param word The 64-bit constant to write.
+ */
+
+static void
+pvr_pds_write_wide_constant(uint32_t *data_block, uint32_t index, uint64_t word)
+{
+ /* Has to be on 64 bit boundary. */
+ assert((index & 1) == 0);
+
+ /* Check range. */
+ assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
+ PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
+
+ data_block[index + 0] = L32(word);
+ data_block[index + 1] = H32(word);
+
+ PVR_PDS_PRINT_DATA("WriteWideConstant", word, index);
+}
+
+static void pvr_pds_write_dma_address(uint32_t *data_block,
+ uint32_t index,
+ uint64_t address,
+ bool coherent,
+ const struct pvr_device_info *dev_info)
+{
+ /* Has to be on 64 bit boundary. */
+ assert((index & 1) == 0);
+
+ if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
+ address |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
+
+ /* Check range. */
+ assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
+ PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
+
+ data_block[index + 0] = L32(address);
+ data_block[index + 1] = H32(address);
+
+ PVR_PDS_PRINT_DATA("WriteDMAAddress", address, index);
+}
+
+/**
+ * External API to append a 64-bit constant to an existing data segment
+ * allocation.
+ *
+ * \param constants Pointer to start of data segment.
+ * \param constant_value Value to write to constant.
+ * \param data_size The number of constants allocated.
+ * \returns The address of the next constant.
+ */
+uint32_t pvr_pds_append_constant64(uint32_t *constants,
+ uint64_t constant_value,
+ uint32_t *data_size)
+{
+ /* Calculate next constant from current data size. */
+ uint32_t next_constant = *data_size;
+ uint32_t constant = pvr_pds_get_constants(&next_constant, 2, data_size);
+
+ /* Set the value. */
+ pvr_pds_write_wide_constant(constants, constant, constant_value);
+
+ return constant;
+}
+
+void pvr_pds_pixel_shader_sa_initialize(
+ struct pvr_pds_pixel_shader_sa_program *program)
+{
+ memset(program, 0, sizeof(*program));
+}
+
+/**
+ * Encode a DMA burst.
+ *
+ * \param dma_control DMA control words.
+ * \param dma_address DMA address.
+ * \param dest_offset Destination offset in the attribute.
+ * \param dma_size The size of the DMA in words.
+ * \param src_address Source address for the burst.
+ * \param dev_info PVR device info structure.
+ * \returns The number of DMA transfers required.
+ */
+
+uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
+ uint64_t *dma_address,
+ uint32_t dest_offset,
+ uint32_t dma_size,
+ uint64_t src_address,
+ const struct pvr_device_info *dev_info)
+{
+ /* Simplified for MS2. */
+
+ /* Force to 1 DMA. */
+ const uint32_t num_kicks = 1;
+
+ dma_control[0] = dma_size
+ << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
+ dma_control[0] |= dest_offset
+ << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
+
+ dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE;
+
+ dma_address[0] = src_address;
+ if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
+ dma_address[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
+ }
+
+ return num_kicks;
+}
+
+/* FIXME: use the csbgen interface and pvr_csb_pack.
+ * FIXME: use bool for phase_rate_change.
+ */
+/**
+ * Sets up the USC control words for a DOUTU.
+ *
+ * \param usc_task_control USC task control structure to be setup.
+ * \param execution_address USC execution virtual address.
+ * \param usc_temps Number of USC temps.
+ * \param sample_rate Sample rate for the DOUTU.
+ * \param phase_rate_change Phase rate change for the DOUTU.
+ */
+void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control,
+ uint64_t execution_address,
+ uint32_t usc_temps,
+ uint32_t sample_rate,
+ uint32_t phase_rate_change)
+{
+ usc_task_control->src0 = UINT64_C(0);
+
+ /* Set the execution address. */
+ pvr_set_usc_execution_address64(&(usc_task_control->src0),
+ execution_address);
+
+ if (usc_temps > 0) {
+ /* Temps are allocated in blocks of 4 dwords. */
+ usc_temps =
+ DIV_ROUND_UP(usc_temps,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSIZE);
+
+ /* Check for losing temps due to too many requested. */
+ assert((usc_temps & PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK) ==
+ usc_temps);
+
+ usc_task_control->src0 |=
+ ((uint64_t)(usc_temps &
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK))
+ << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_SHIFT;
+ }
+
+ if (sample_rate > 0) {
+ usc_task_control->src0 |=
+ ((uint64_t)sample_rate)
+ << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SHIFT;
+ }
+
+ if (phase_rate_change) {
+ usc_task_control->src0 |=
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_EN;
+ }
+}
+
+/**
+ * Generates the PDS pixel event program.
+ *
+ * \param program Pointer to the PDS pixel event program.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Generate either a data segment or code segment.
+ * \param dev_info PVR device info structure.
+ * \returns Pointer to just beyond the buffer for the program.
+ */
+uint32_t *
+pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info)
+{
+ uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+ uint32_t *constants = buffer;
+
+ uint32_t data_size = 0;
+
+ /* Copy the DMA control words and USC task control words to constants, then
+ * arrange them so that the 64-bit words are together followed by the 32-bit
+ * words.
+ */
+ uint32_t control_constant =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ uint32_t emit_constant =
+ pvr_pds_get_constants(&next_constant,
+ (2 * program->num_emit_word_pairs),
+ &data_size);
+
+ uint32_t control_word_constant =
+ pvr_pds_get_constants(&next_constant,
+ program->num_emit_word_pairs,
+ &data_size);
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ /* Src0 for DOUTU. */
+ pvr_pds_write_wide_constant(buffer,
+ control_constant,
+ program->task_control.src0); /* DOUTU */
+ /* 64-bit Src0. */
+
+ /* Emit words for end of tile program. */
+ for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
+ pvr_pds_write_constant64(constants,
+ emit_constant + (2 * i),
+ program->emit_words[(2 * i) + 0],
+ program->emit_words[(2 * i) + 1]);
+ }
+
+ /* Control words. */
+ for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
+ uint32_t doutw = pvr_pds_encode_doutw_src1(
+ (2 * i),
+ PVR_PDS_DOUTW_LOWER64,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+ false,
+ dev_info);
+
+ if (i == (program->num_emit_word_pairs - 1))
+ doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+
+ pvr_pds_write_constant32(constants, control_word_constant + i, doutw);
+ }
+ }
+
+ else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ /* DOUTW the state into the shared register. */
+ for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
+ *buffer++ = pvr_pds_encode_doutw64(
+ /* cc */ 0,
+ /* END */ 0,
+ /* SRC1 */ (control_word_constant + i), /* DOUTW 32-bit Src1 */
+ /* SRC0 */ (emit_constant + (2 * i)) >> 1); /* DOUTW 64-bit Src0
+ */
+ }
+
+ /* Kick the USC. */
+ *buffer++ = pvr_pds_encode_doutu(
+ /* cc */ 0,
+ /* END */ 1,
+ /* SRC0 */ control_constant >> 1);
+ }
+
+ uint32_t code_size = 1 + program->num_emit_word_pairs;
+
+ /* Save the data segment Pointer and size. */
+ program->data_segment = constants;
+ program->data_size = data_size;
+ program->code_size = code_size;
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
+ return (constants + next_constant);
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
+ return buffer;
+
+ return NULL;
+}
+
+/**
+ * Checks if any of the vertex streams contains instance data.
+ *
+ * \param streams Streams contained in the vertex shader.
+ * \param num_streams Number of vertex streams.
+ * \returns true if one or more of the given vertex streams contains
+ * instance data, otherwise false.
+ */
+static bool pvr_pds_vertex_streams_contains_instance_data(
+ const struct pvr_pds_vertex_stream *streams,
+ uint32_t num_streams)
+{
+ for (uint32_t i = 0; i < num_streams; i++) {
+ const struct pvr_pds_vertex_stream *vertex_stream = &streams[i];
+ if (vertex_stream->instance_data)
+ return true;
+ }
+
+ return false;
+}
+
+static uint32_t pvr_pds_get_bank_based_constants(uint32_t num_backs,
+ uint32_t *next_constant,
+ uint32_t num_constants,
+ uint32_t *count)
+{
+ /* Allocate constant for PDS vertex shader where constant is divided into
+ * banks.
+ */
+ uint32_t constant;
+
+ assert(num_constants == 1 || num_constants == 2);
+
+ if (*next_constant >= (num_backs << 3))
+ return pvr_pds_get_constants(next_constant, num_constants, count);
+
+ if ((*next_constant % 8) == 0) {
+ constant = *next_constant;
+
+ if (num_constants == 1)
+ *next_constant += 1;
+ else
+ *next_constant += 8;
+ } else if (num_constants == 1) {
+ constant = *next_constant;
+ *next_constant += 7;
+ } else {
+ *next_constant += 7;
+ constant = *next_constant;
+
+ if (*next_constant >= (num_backs << 3)) {
+ *next_constant += 2;
+ *count += 2;
+ } else {
+ *next_constant += 8;
+ }
+ }
+ return constant;
+}
+
+/**
+ * Generates a PDS program to load USC vertex inputs based from one or more
+ * vertex buffers, each containing potentially multiple elements, and then a
+ * DOUTU to execute the USC.
+ *
+ * \param program Pointer to the description of the program which should be
+ * generated.
+ * \param buffer Pointer to buffer that receives the output of this function.
+ * Will either be the data segment or code segment depending on
+ * gen_mode.
+ * \param gen_mode Which part to generate, either data segment or
+ * code segment. If PDS_GENERATE_SIZES is specified, nothing is
+ * written, but size information in program is updated.
+ * \param dev_info PVR device info structure.
+ * \returns Pointer to just beyond the buffer for the data - i.e the value
+ * of the buffer after writing its contents.
+ */
+uint32_t *
+pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info)
+{
+ uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+ uint32_t next_stream_constant;
+ uint32_t next_temp;
+ uint32_t usc_control_constant64;
+ uint32_t stride_constant32 = 0;
+ uint32_t dma_address_constant64 = 0;
+ uint32_t dma_control_constant64;
+ uint32_t multiplier_constant32 = 0;
+ uint32_t base_instance_const32 = 0;
+
+ uint32_t temp = 0;
+ uint32_t index_temp64 = 0;
+ uint32_t num_vertices_temp64 = 0;
+ uint32_t pre_index_temp = (uint32_t)(-1);
+ bool first_ddmadt = true;
+ uint32_t input_register0;
+ uint32_t input_register1;
+ uint32_t input_register2;
+
+ struct pvr_pds_vertex_stream *vertex_stream;
+ struct pvr_pds_vertex_element *vertex_element;
+ uint32_t shift_2s_comp;
+
+ uint32_t data_size = 0;
+ uint32_t code_size = 0;
+ uint32_t temps_used = 0;
+
+ bool direct_writes_needed = false;
+
+ uint32_t consts_size = 0;
+ uint32_t vertex_id_control_word_const32 = 0;
+ uint32_t instance_id_control_word_const32 = 0;
+ uint32_t instance_id_modifier_word_const32 = 0;
+ uint32_t geometry_id_control_word_const64 = 0;
+ uint32_t empty_dma_control_constant64 = 0;
+
+ bool any_instanced_stream =
+ pvr_pds_vertex_streams_contains_instance_data(program->streams,
+ program->num_streams);
+
+ uint32_t base_instance_register = 0;
+ uint32_t ddmadt_enables = 0;
+
+ bool issue_empty_ddmad = false;
+ uint32_t last_stream_index = program->num_streams - 1;
+ bool current_p0 = false;
+ uint32_t skip_stream_flag = 0;
+
+ /* Generate the PDS vertex shader data. */
+
+#if defined(DEBUG)
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ for (uint32_t i = 0; i < program->data_size; i++)
+ buffer[i] = 0xDEADBEEF;
+ }
+#endif
+
+ /* Generate the PDS vertex shader program */
+ next_temp = PVR_PDS_TEMPS_BLOCK_BASE;
+ /* IR0 is in first 32-bit temp, temp[0].32, vertex_Index. */
+ input_register0 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
+ /* IR1 is in second 32-bit temp, temp[1].32, instance_ID. */
+ input_register1 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
+
+ if (program->iterate_remap_id)
+ input_register2 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
+ else
+ input_register2 = 0; /* Not used, but need to silence the compiler. */
+
+ /* Generate the PDS vertex shader code. The constants in the data block are
+ * arranged as follows:
+ *
+ * 64 bit bank 0 64 bit bank 1 64 bit bank 2 64 bit bank
+ * 3 Not used (tmps) Stride | Multiplier Address Control
+ */
+
+ /* Find out how many constants are needed by streams. */
+ for (uint32_t stream = 0; stream < program->num_streams; stream++) {
+ pvr_pds_get_constants(&next_constant,
+ 8 * program->streams[stream].num_elements,
+ &consts_size);
+ }
+
+ /* If there are no vertex streams allocate the first bank for USC Code
+ * Address.
+ */
+ if (consts_size == 0)
+ pvr_pds_get_constants(&next_constant, 2, &consts_size);
+ else
+ next_constant = 8;
+
+ direct_writes_needed = program->iterate_instance_id ||
+ program->iterate_vtx_id || program->iterate_remap_id;
+
+ if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
+ /* Evaluate what config of DDMAD should be used for each stream. */
+ for (uint32_t stream = 0; stream < program->num_streams; stream++) {
+ vertex_stream = &program->streams[stream];
+
+ if (vertex_stream->use_ddmadt) {
+ ddmadt_enables |= (1 << stream);
+
+ /* The condition for index value is:
+ * index * stride + size <= bufferSize (all in unit of byte)
+ */
+ if (vertex_stream->stride == 0) {
+ if (vertex_stream->elements[0].size <=
+ vertex_stream->buffer_size_in_bytes) {
+ /* index can be any value -> no need to use DDMADT. */
+ ddmadt_enables &= (~(1 << stream));
+ } else {
+ /* No index works -> no need to issue DDMAD instruction.
+ */
+ skip_stream_flag |= (1 << stream);
+ }
+ } else {
+ /* index * stride + size <= bufferSize
+ *
+ * can be converted to:
+ * index <= (bufferSize - size) / stride
+ *
+ * where maximum index is:
+ * integer((bufferSize - size) / stride).
+ */
+ if (vertex_stream->buffer_size_in_bytes <
+ vertex_stream->elements[0].size) {
+ /* No index works -> no need to issue DDMAD instruction.
+ */
+ skip_stream_flag |= (1 << stream);
+ } else {
+ uint32_t max_index = (vertex_stream->buffer_size_in_bytes -
+ vertex_stream->elements[0].size) /
+ vertex_stream->stride;
+ if (max_index == 0xFFFFFFFFu) {
+ /* No need to use DDMADT as all possible indices can
+ * pass the test.
+ */
+ ddmadt_enables &= (~(1 << stream));
+ } else {
+ /* In this case, test condition can be changed to
+ * index < max_index + 1.
+ */
+ program->streams[stream].num_vertices =
+ pvr_pds_get_bank_based_constants(program->num_streams,
+ &next_constant,
+ 1,
+ &consts_size);
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ pvr_pds_write_constant32(
+ buffer,
+ program->streams[stream].num_vertices,
+ max_index + 1);
+ }
+ }
+ }
+ }
+ }
+
+ if ((skip_stream_flag & (1 << stream)) == 0) {
+ issue_empty_ddmad = (ddmadt_enables & (1 << stream)) != 0;
+ last_stream_index = stream;
+ }
+ }
+ } else {
+ if (program->num_streams > 0 &&
+ program->streams[program->num_streams - 1].use_ddmadt) {
+ issue_empty_ddmad = true;
+ }
+ }
+
+ if (direct_writes_needed)
+ issue_empty_ddmad = false;
+
+ if (issue_empty_ddmad) {
+ /* An empty DMA control const (DMA size = 0) is required in case the
+ * last DDMADD is predicated out and last flag does not have any usage.
+ */
+ empty_dma_control_constant64 =
+ pvr_pds_get_bank_based_constants(program->num_streams,
+ &next_constant,
+ 2,
+ &consts_size);
+ }
+
+ /* Assign constants for non stream or base instance if there is any
+ * instanced stream.
+ */
+ if (direct_writes_needed || any_instanced_stream ||
+ program->instance_ID_modifier) {
+ if (program->iterate_vtx_id) {
+ vertex_id_control_word_const32 =
+ pvr_pds_get_bank_based_constants(program->num_streams,
+ &next_constant,
+ 1,
+ &consts_size);
+ }
+
+ if (program->iterate_instance_id || program->instance_ID_modifier) {
+ if (program->instance_ID_modifier == 0) {
+ instance_id_control_word_const32 =
+ pvr_pds_get_bank_based_constants(program->num_streams,
+ &next_constant,
+ 1,
+ &consts_size);
+ } else {
+ instance_id_modifier_word_const32 =
+ pvr_pds_get_bank_based_constants(program->num_streams,
+ &next_constant,
+ 1,
+ &consts_size);
+ if ((instance_id_modifier_word_const32 % 2) == 0) {
+ instance_id_control_word_const32 =
+ pvr_pds_get_bank_based_constants(program->num_streams,
+ &next_constant,
+ 1,
+ &consts_size);
+ } else {
+ instance_id_control_word_const32 =
+ instance_id_modifier_word_const32;
+ instance_id_modifier_word_const32 =
+ pvr_pds_get_bank_based_constants(program->num_streams,
+ &next_constant,
+ 1,
+ &consts_size);
+ }
+ }
+ }
+
+ if (program->base_instance != 0) {
+ base_instance_const32 =
+ pvr_pds_get_bank_based_constants(program->num_streams,
+ &next_constant,
+ 1,
+ &consts_size);
+ }
+
+ if (program->iterate_remap_id) {
+ geometry_id_control_word_const64 =
+ pvr_pds_get_bank_based_constants(program->num_streams,
+ &next_constant,
+ 2,
+ &consts_size);
+ }
+ }
+
+ if (program->instance_ID_modifier != 0) {
+ /* This instanceID modifier is used when a draw array instanced call
+ * sourcing from client data cannot fit into vertex buffer and needs to
+ * be broken down into several draw calls.
+ */
+
+ code_size += 1;
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ pvr_pds_write_constant32(buffer,
+ instance_id_modifier_word_const32,
+ program->instance_ID_modifier);
+ } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ *buffer++ = pvr_pds_inst_encode_add32(
+ /* cc */ 0x0,
+ /* ALUM */ 0, /* Unsigned */
+ /* SNA */ 0, /* Add */
+ /* SRC0 32b */ instance_id_modifier_word_const32,
+ /* SRC1 32b */ input_register1,
+ /* DST 32b */ input_register1);
+ }
+ }
+
+ /* Adjust instanceID if necessary. */
+ if (any_instanced_stream || program->iterate_instance_id) {
+ if (program->base_instance != 0) {
+ assert(!program->draw_indirect);
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ pvr_pds_write_constant32(buffer,
+ base_instance_const32,
+ program->base_instance);
+ }
+
+ base_instance_register = base_instance_const32;
+ }
+
+ if (program->draw_indirect) {
+ assert((program->instance_ID_modifier == 0) &&
+ (program->base_instance == 0));
+
+ base_instance_register = PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER + 1;
+ }
+ }
+
+ next_constant = next_stream_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+ usc_control_constant64 =
+ pvr_pds_get_constants(&next_stream_constant, 2, &data_size);
+
+ for (uint32_t stream = 0; stream < program->num_streams; stream++) {
+ bool instance_data_with_base_instance;
+
+ if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
+ ((skip_stream_flag & (1 << stream)) != 0)) {
+ continue;
+ }
+
+ vertex_stream = &program->streams[stream];
+
+ instance_data_with_base_instance =
+ ((vertex_stream->instance_data) &&
+ ((program->base_instance > 0) || (program->draw_indirect)));
+
+ /* Get all 8 32-bit constants at once, only 6 for first stream due to
+ * USC constants.
+ */
+ if (stream == 0) {
+ stride_constant32 =
+ pvr_pds_get_constants(&next_stream_constant, 6, &data_size);
+ } else {
+ next_constant =
+ pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
+
+ /* Skip bank 0. */
+ stride_constant32 = next_constant + 2;
+ }
+
+ multiplier_constant32 = stride_constant32 + 1;
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ pvr_pds_write_constant32(buffer,
+ stride_constant32,
+ vertex_stream->stride);
+
+ /* Vertex stream frequency multiplier. */
+ if (vertex_stream->multiplier)
+ pvr_pds_write_constant32(buffer,
+ multiplier_constant32,
+ vertex_stream->multiplier);
+ }
+
+ /* Update the code size count and temps count for the above code
+ * segment.
+ */
+ if (vertex_stream->current_state) {
+ code_size += 1;
+ temp = pvr_pds_get_temps(&next_temp, 1, &temps_used); /* 32-bit */
+ } else {
+ unsigned int num_temps_required = 0;
+
+ if (vertex_stream->multiplier) {
+ num_temps_required += 2;
+ code_size += 3;
+
+ if (vertex_stream->shift) {
+ code_size += 1;
+
+ if ((int32_t)vertex_stream->shift > 0)
+ code_size += 1;
+ }
+ } else if (vertex_stream->shift) {
+ code_size += 1;
+ num_temps_required += 1;
+ } else if (instance_data_with_base_instance) {
+ num_temps_required += 1;
+ }
+
+ if (num_temps_required != 0) {
+ temp = pvr_pds_get_temps(&next_temp,
+ num_temps_required,
+ &temps_used); /* 64-bit */
+ } else {
+ temp = vertex_stream->instance_data ? input_register1
+ : input_register0;
+ }
+
+ if (instance_data_with_base_instance)
+ code_size += 1;
+ }
+
+ /* The real code segment. */
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ /* If it's current state stream, then index = 0 always. */
+ if (vertex_stream->current_state) {
+ /* Put zero in temp. */
+ *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
+ } else if (vertex_stream->multiplier) {
+ /* old: Iout = (Iin * (Multiplier+2^24)) >> (Shift+24)
+ * new: Iout = (Iin * Multiplier) >> (shift+31)
+ */
+
+ /* Put zero in temp. Need zero for add part of the following
+ * MAD. MAD source is 64 bit, so need two LIMMs.
+ */
+ *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
+ /* Put zero in temp. Need zero for add part of the following
+ * MAD.
+ */
+ *buffer++ = pvr_pds_inst_encode_limm(0, temp + 1, 0, 0);
+
+ /* old: (Iin * (Multiplier+2^24))
+ * new: (Iin * Multiplier)
+ */
+ *buffer++ = pvr_rogue_inst_encode_mad(
+ 0, /* Sign of add is positive. */
+ 0, /* Unsigned ALU mode */
+ 0, /* Unconditional */
+ multiplier_constant32,
+ vertex_stream->instance_data ? input_register1 : input_register0,
+ temp / 2,
+ temp / 2);
+
+ if (vertex_stream->shift) {
+ int32_t shift = (int32_t)vertex_stream->shift;
+
+ /* new: >> (shift + 31) */
+ shift += 31;
+ shift *= -1;
+
+ if (shift < -31) {
+ /* >> (31) */
+ shift_2s_comp = 0xFFFE1;
+ *buffer++ = pvr_pds_inst_encode_stflp64(
+ /* cc */ 0,
+ /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
+ /* IM */ 1, /* enable immediate */
+ /* SRC0 */ temp / 2,
+ /* SRC1 */ input_register0, /* This won't be used in
+ * a shift operation.
+ */
+ /* SRC2 (Shift) */ shift_2s_comp,
+ /* DST */ temp / 2);
+ shift += 31;
+ }
+
+ /* old: >> (Shift+24)
+ * new: >> (shift + 31)
+ */
+ shift_2s_comp = *((uint32_t *)&shift);
+ *buffer++ = pvr_pds_inst_encode_stflp64(
+ /* cc */ 0,
+ /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
+ /* IM */ 1, /*enable immediate */
+ /* SRC0 */ temp / 2,
+ /* SRC1 */ input_register0, /* This won't be used in
+ * a shift operation.
+ */
+ /* SRC2 (Shift) */ shift_2s_comp,
+ /* DST */ temp / 2);
+ }
+
+ if (instance_data_with_base_instance) {
+ *buffer++ =
+ pvr_pds_inst_encode_add32(0, /* cc */
+ 0, /* ALNUM */
+ 0, /* SNA */
+ base_instance_register, /* src0
+ */
+ temp, /* src1 */
+ temp /* dst */
+ );
+ }
+ } else { /* NOT vertex_stream->multiplier */
+ if (vertex_stream->shift) {
+ /* Shift Index/InstanceNum Right by shift bits. Put result
+ * in a Temp.
+ */
+
+ /* 2's complement of shift as this will be a right shift. */
+ shift_2s_comp = ~(vertex_stream->shift) + 1;
+
+ *buffer++ = pvr_pds_inst_encode_stflp32(
+ /* IM */ 1, /* enable immediate. */
+ /* cc */ 0,
+ /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
+ /* SRC0 */ vertex_stream->instance_data ? input_register1
+ : input_register0,
+ /* SRC1 */ input_register0, /* This won't be used in
+ * a shift operation.
+ */
+ /* SRC2 (Shift) */ shift_2s_comp,
+ /* DST */ temp);
+
+ if (instance_data_with_base_instance) {
+ *buffer++ =
+ pvr_pds_inst_encode_add32(0, /* cc */
+ 0, /* ALNUM */
+ 0, /* SNA */
+ base_instance_register, /* src0
+ */
+ temp, /* src1 */
+ temp /* dst */
+ );
+ }
+ } else {
+ if (instance_data_with_base_instance) {
+ *buffer++ =
+ pvr_pds_inst_encode_add32(0, /* cc */
+ 0, /* ALNUM */
+ 0, /* SNA */
+ base_instance_register, /* src0
+ */
+ input_register1, /* src1 */
+ temp /* dst */
+ );
+ } else {
+ /* If the shift instruction doesn't happen, use the IR
+ * directly into the following MAD.
+ */
+ temp = vertex_stream->instance_data ? input_register1
+ : input_register0;
+ }
+ }
+ }
+ }
+
+ if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
+ if (vertex_stream->use_ddmadt)
+ ddmadt_enables |= (1 << stream);
+ } else {
+ if ((ddmadt_enables & (1 << stream)) != 0) {
+ /* Emulate what DDMADT does for range checking. */
+ if (first_ddmadt) {
+ /* Get an 64 bits temp such that cmp current index with
+ * allowed vertex number can work.
+ */
+ index_temp64 =
+ pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
+ */
+ num_vertices_temp64 =
+ pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
+ */
+
+ index_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
+ num_vertices_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
+
+ code_size += 3;
+ current_p0 = true;
+ }
+
+ code_size += (temp == pre_index_temp ? 1 : 2);
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ if (first_ddmadt) {
+ /* Set predicate to be P0. */
+ *buffer++ = pvr_pds_encode_bra(
+ PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
+ */
+ 0, /* Neg */
+ PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETCC
+ */
+ 1); /* Addr */
+
+ *buffer++ =
+ pvr_pds_inst_encode_limm(0, index_temp64 + 1, 0, 0);
+ *buffer++ =
+ pvr_pds_inst_encode_limm(0, num_vertices_temp64 + 1, 0, 0);
+ }
+
+ if (temp != pre_index_temp) {
+ *buffer++ = pvr_pds_inst_encode_stflp32(
+ /* IM */ 1, /* enable immediate. */
+ /* cc */ 0,
+ /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
+ /* SRC0 */ temp - PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER,
+ /* SRC1 */ 0,
+ /* SRC2 (Shift) */ 0,
+ /* DST */ index_temp64);
+ }
+
+ *buffer++ = pvr_pds_inst_encode_stflp32(
+ /* IM */ 1, /* enable immediate. */
+ /* cc */ 0,
+ /* LOP */ PVR_ROGUE_PDSINST_LOP_OR,
+ /* SRC0 */ num_vertices_temp64 + 1,
+ /* SRC1 */ vertex_stream->num_vertices,
+ /* SRC2 (Shift) */ 0,
+ /* DST */ num_vertices_temp64);
+ }
+
+ first_ddmadt = false;
+
+ pre_index_temp = temp;
+ }
+ }
+
+ /* Process the elements in the stream. */
+ for (uint32_t element = 0; element < vertex_stream->num_elements;
+ element++) {
+ bool terminate = false;
+
+ vertex_element = &vertex_stream->elements[element];
+ /* Check if last DDMAD needs terminate or not. */
+ if ((element == (vertex_stream->num_elements - 1)) &&
+ (stream == last_stream_index)) {
+ terminate = !issue_empty_ddmad && !direct_writes_needed;
+ }
+
+ /* Get a new set of constants for this element. */
+ if (element) {
+ /* Get all 8 32 bit constants at once. */
+ next_constant =
+ pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
+ }
+
+ dma_address_constant64 = next_constant + 4;
+ dma_control_constant64 = dma_address_constant64 + 2;
+
+ if (vertex_element->component_size == 0) {
+ /* Standard DMA.
+ *
+ * Write the DMA transfer control words into the PDS data
+ * section.
+ *
+ * DMA Address is 40-bit.
+ */
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ uint32_t dma_control_word;
+ uint64_t dma_control_word64 = 0;
+ uint32_t dma_size;
+
+ /* Write the address to the constant. */
+ pvr_pds_write_dma_address(buffer,
+ dma_address_constant64,
+ vertex_stream->address +
+ (uint64_t)vertex_element->offset,
+ false,
+ dev_info);
+ {
+ if (program->stream_patch_offsets) {
+ program
+ ->stream_patch_offsets[program->num_stream_patches++] =
+ (stream << 16) | (dma_address_constant64 >> 1);
+ }
+ }
+
+ /* Size is in bytes - round up to nearest 32 bit word. */
+ dma_size =
+ (vertex_element->size + (1 << PVR_PDS_DWORD_SHIFT) - 1) >>
+ PVR_PDS_DWORD_SHIFT;
+
+ assert(dma_size <= PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_UPPER);
+
+ /* Set up the dma transfer control word. */
+ dma_control_word =
+ dma_size << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
+
+ dma_control_word |=
+ vertex_element->reg
+ << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
+
+ dma_control_word |=
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
+
+ if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
+ if ((ddmadt_enables & (1 << stream)) != 0) {
+ assert(
+ ((((uint64_t)vertex_stream->buffer_size_in_bytes
+ << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
+ ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK) >>
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) ==
+ (uint64_t)vertex_stream->buffer_size_in_bytes);
+ dma_control_word64 =
+ (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_EN |
+ (((uint64_t)vertex_stream->buffer_size_in_bytes
+ << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
+ ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK));
+ }
+ }
+ /* If this is the last dma then also set the last flag. */
+ if (terminate) {
+ dma_control_word |=
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
+ }
+
+ /* Write the 32-Bit SRC3 word to a 64-bit constant as per
+ * spec.
+ */
+ pvr_pds_write_wide_constant(buffer,
+ dma_control_constant64,
+ dma_control_word64 |
+ (uint64_t)dma_control_word);
+ }
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
+ if ((ddmadt_enables & (1 << stream)) != 0) {
+ *buffer++ = pvr_pds_inst_encode_cmp(
+ 0, /* cc enable */
+ PVR_ROGUE_PDSINST_COP_LT, /* Operation */
+ index_temp64 >> 1, /* SRC0 (REGS64TP) */
+ (num_vertices_temp64 >> 1) +
+ PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER); /* SRC1
+ (REGS64)
+ */
+ }
+ }
+ /* Multiply by the vertex stream stride and add the base
+ * followed by a DOUTD.
+ *
+ * dmad32 (C0 * T0) + C1, C2
+ * src0 = stride src1 = index src2 = baseaddr src3 =
+ * doutd part
+ */
+
+ uint32_t cc;
+ if (PVR_HAS_FEATURE(dev_info, pds_ddmadt))
+ cc = 0;
+ else
+ cc = (ddmadt_enables & (1 << stream)) != 0 ? 1 : 0;
+
+ *buffer++ = pvr_pds_inst_encode_ddmad(
+ /* cc */ cc,
+ /* END */ 0,
+ /* SRC0 */ stride_constant32, /* Stride 32-bit*/
+ /* SRC1 */ temp, /* Index 32-bit*/
+ /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
+ * Address
+ * +
+ * Offset
+ */
+ /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
+ * Transfer
+ * Control
+ * Word.
+ */
+ );
+ }
+
+ if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
+ ((ddmadt_enables & (1 << stream)) != 0)) {
+ code_size += 1;
+ }
+ code_size += 1;
+ } else {
+ /* Repeat DMA.
+ *
+ * Write the DMA transfer control words into the PDS data
+ * section.
+ *
+ * DMA address is 40-bit.
+ */
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ uint32_t dma_control_word;
+
+ /* Write the address to the constant. */
+ pvr_pds_write_dma_address(buffer,
+ dma_address_constant64,
+ vertex_stream->address +
+ (uint64_t)vertex_element->offset,
+ false,
+ dev_info);
+
+ /* Set up the DMA transfer control word. */
+ dma_control_word =
+ vertex_element->size
+ << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
+
+ dma_control_word |=
+ vertex_element->reg
+ << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
+
+ switch (vertex_element->component_size) {
+ case 4: {
+ dma_control_word |=
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_FOUR;
+ break;
+ }
+ case 3: {
+ dma_control_word |=
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_THREE;
+ break;
+ }
+ case 2: {
+ dma_control_word |=
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_TWO;
+ break;
+ }
+ default: {
+ dma_control_word |=
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_ONE;
+ break;
+ }
+ }
+
+ dma_control_word |=
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_REPEAT;
+
+ dma_control_word |=
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
+
+ /* If this is the last dma then also set the last flag. */
+ if (terminate) {
+ dma_control_word |=
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
+ }
+
+ /* Write the 32-Bit SRC3 word to a 64-bit constant as per
+ * spec.
+ */
+ pvr_pds_write_wide_constant(buffer,
+ dma_control_constant64,
+ (uint64_t)dma_control_word);
+ }
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ /* Multiply by the vertex stream stride and add the base
+ * followed by a DOUTD.
+ *
+ * dmad32 (C0 * T0) + C1, C2
+ * src0 = stride src1 = index src2 = baseaddr src3 =
+ * doutd part
+ */
+ *buffer++ = pvr_pds_inst_encode_ddmad(
+ /* cc */ 0,
+ /* END */ 0,
+ /* SRC0 */ stride_constant32, /* Stride 32-bit*/
+ /* SRC1 */ temp, /* Index 32-bit*/
+ /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
+ * Address
+ * +
+ * Offset.
+ */
+ /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
+ * Transfer
+ * Control
+ * Word.
+ */
+ );
+ }
+
+ code_size += 1;
+ } /* End of repeat DMA. */
+ } /* Element loop */
+ } /* Stream loop */
+
+ if (issue_empty_ddmad) {
+ /* Issue an empty last DDMAD, always executed. */
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ pvr_pds_write_wide_constant(
+ buffer,
+ empty_dma_control_constant64,
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN);
+ }
+
+ code_size += 1;
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ *buffer++ = pvr_pds_inst_encode_ddmad(
+ /* cc */ 0,
+ /* END */ 0,
+ /* SRC0 */ stride_constant32, /* Stride 32-bit*/
+ /* SRC1 */ temp, /* Index 32-bit*/
+ /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
+ *Address +
+ *Offset.
+ */
+ /* SRC3 64-bit */ empty_dma_control_constant64 >> 1 /* DMA
+ * Transfer
+ * Control
+ * Word.
+ */
+ );
+ }
+ }
+
+ if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
+ if (current_p0) {
+ code_size += 1;
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ /* Revert predicate back to IF0 which is required by DOUTU. */
+ *buffer++ =
+ pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
+ */
+ 0, /* Neg */
+ PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC
+ */
+ 1); /* Addr */
+ }
+ }
+ }
+ /* Send VertexID if requested. */
+ if (program->iterate_vtx_id) {
+ if (program->draw_indirect) {
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ *buffer++ = pvr_pds_inst_encode_add32(
+ /* cc */ 0x0,
+ /* ALUM */ 0, /* Unsigned */
+ /* SNA */ 1, /* Minus */
+ /* SRC0 32b */ input_register0, /* vertexID */
+ /* SRC1 32b */ PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER, /* base
+ * vertexID.
+ */
+ /* DST 32b */ input_register0);
+ }
+
+ code_size += 1;
+ }
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ uint32_t doutw = pvr_pds_encode_doutw_src1(
+ program->vtx_id_register,
+ PVR_PDS_DOUTW_LOWER32,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+ false,
+ dev_info);
+
+ if (!program->iterate_instance_id && !program->iterate_remap_id)
+ doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+
+ pvr_pds_write_constant32(buffer,
+ vertex_id_control_word_const32,
+ doutw);
+ } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ *buffer++ = pvr_pds_encode_doutw64(
+ /* cc */ 0,
+ /* END */ 0,
+ /* SRC1 */ vertex_id_control_word_const32, /* DOUTW 32-bit Src1
+ */
+ /* SRC0 */ input_register0 >> 1); /* DOUTW 64-bit Src0 */
+ }
+
+ code_size += 1;
+ }
+
+ /* Send InstanceID if requested. */
+ if (program->iterate_instance_id) {
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ uint32_t doutw = pvr_pds_encode_doutw_src1(
+ program->instance_id_register,
+ PVR_PDS_DOUTW_UPPER32,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+ true,
+ dev_info);
+
+ if (!program->iterate_remap_id)
+ doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+
+ pvr_pds_write_constant32(buffer,
+ instance_id_control_word_const32,
+ doutw);
+ } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ *buffer++ = pvr_pds_encode_doutw64(
+ /* cc */ 0,
+ /* END */ 0,
+ /* SRC1 */ instance_id_control_word_const32, /* DOUTW 32-bit Src1 */
+ /* SRC0 */ input_register1 >> 1); /* DOUTW 64-bit Src0 */
+ }
+
+ code_size += 1;
+ }
+
+ /* Send remapped index number to vi0. */
+ if (program->iterate_remap_id) {
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ uint32_t doutw = pvr_pds_encode_doutw_src1(
+ 0 /* vi0 */,
+ PVR_PDS_DOUTW_LOWER32,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
+ false,
+ dev_info);
+
+ pvr_pds_write_constant64(buffer,
+ geometry_id_control_word_const64,
+ doutw,
+ 0);
+ } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ *buffer++ = pvr_pds_encode_doutw64(
+ /* cc */ 0,
+ /* END */ 0,
+ /* SRC1 */ geometry_id_control_word_const64, /* DOUTW 32-bit
+ * Src1
+ */
+ /* SRC0 */ input_register2 >> 1); /* DOUTW 64-bit Src0 */
+ }
+
+ code_size += 1;
+ }
+
+ /* Copy the USC task control words to constants. */
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ pvr_pds_write_wide_constant(buffer,
+ usc_control_constant64,
+ program->usc_task_control.src0); /* 64-bit
+ * Src0
+ */
+ if (program->stream_patch_offsets) {
+ /* USC TaskControl is always the first patch. */
+ program->stream_patch_offsets[0] = usc_control_constant64 >> 1;
+ }
+ }
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ /* Conditionally (if last in task) issue the task to the USC
+ * (if0) DOUTU src1=USC Code Base address, src2=DOUTU word 2.
+ */
+
+ *buffer++ = pvr_pds_encode_doutu(
+ /* cc */ 1,
+ /* END */ 1,
+ /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0 */
+
+ /* End the program if the Dout did not already end it. */
+ *buffer++ = pvr_pds_inst_encode_halt(0);
+ }
+
+ code_size += 2;
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ /* Set the data segment pointer and ensure we return 1 past the buffer
+ * ptr.
+ */
+ program->data_segment = buffer;
+
+ buffer += consts_size;
+ }
+
+ program->temps_used = temps_used;
+ program->data_size = consts_size;
+ program->code_size = code_size;
+ program->ddmadt_enables = ddmadt_enables;
+ if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt))
+ program->skip_stream_flag = skip_stream_flag;
+
+ return buffer;
+}
+
+/**
+ * Generates a PDS program to load USC compute shader global/local/workgroup
+ * sizes/ids and then a DOUTU to execute the USC.
+ *
+ * \param program Pointer to description of the program that should be
+ * generated.
+ * \param buffer Pointer to buffer that receives the output of this function.
+ * This will be either the data segment, or the code depending on
+ * gen_mode.
+ * \param gen_mode Which part to generate, either data segment or code segment.
+ * If PDS_GENERATE_SIZES is specified, nothing is written, but
+ * size information in program is updated.
+ * \param dev_info PVR device info struct.
+ * \returns Pointer to just beyond the buffer for the data - i.e. the value of
+ * the buffer after writing its contents.
+ */
+uint32_t *
+pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info)
+{
+ uint32_t usc_control_constant64;
+ uint32_t usc_control_constant64_coeff_update = 0;
+ uint32_t zero_constant64 = 0;
+
+ uint32_t data_size = 0;
+ uint32_t code_size = 0;
+ uint32_t temps_used = 0;
+ uint32_t doutw = 0;
+
+ uint32_t barrier_ctrl_word = 0;
+ uint32_t barrier_ctrl_word2 = 0;
+
+ /* Even though there are 3 IDs for local and global we only need max one
+ * DOUTW for local, and two for global.
+ */
+ uint32_t work_group_id_ctrl_words[2] = { 0 };
+ uint32_t local_id_ctrl_word = 0;
+ uint32_t local_input_register;
+
+ /* For the constant value to load into ptemp (SW fence). */
+ uint64_t predicate_ld_src0_constant = 0;
+ uint32_t cond_render_negate_constant = 0;
+
+ uint32_t cond_render_pred_temp;
+ uint32_t cond_render_negate_temp;
+
+ /* 2x 64 bit registers that will mask out the Predicate load. */
+ uint32_t cond_render_pred_mask_constant = 0;
+
+#if defined(DEBUG)
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ for (uint32_t j = 0; j < program->data_size; j++)
+ buffer[j] = 0xDEADBEEF;
+ }
+#endif
+
+ /* All the compute input registers are in temps. */
+ temps_used += PVR_PDS_NUM_COMPUTE_INPUT_REGS;
+
+ uint32_t next_temp = PVR_PDS_TEMPS_BLOCK_BASE + temps_used;
+
+ uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+
+ if (program->kick_usc) {
+ /* Copy the USC task control words to constants. */
+ usc_control_constant64 =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ }
+
+ if (program->has_coefficient_update_task) {
+ usc_control_constant64_coeff_update =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ }
+
+ if (program->conditional_render) {
+ predicate_ld_src0_constant =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ cond_render_negate_constant =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ cond_render_pred_mask_constant =
+ pvr_pds_get_constants(&next_constant, 4, &data_size);
+
+ /* LD will load a 64 bit value. */
+ cond_render_pred_temp = pvr_pds_get_temps(&next_temp, 4, &temps_used);
+ cond_render_negate_temp = pvr_pds_get_temps(&next_temp, 2, &temps_used);
+
+ program->cond_render_const_offset_in_dwords = predicate_ld_src0_constant;
+ program->cond_render_pred_temp = cond_render_pred_temp;
+ }
+
+ if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+ (program->clear_pds_barrier) ||
+ (program->kick_usc && program->conditional_render)) {
+ zero_constant64 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+ }
+
+ if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ barrier_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
+ if (PVR_HAS_QUIRK(dev_info, 51210)) {
+ barrier_ctrl_word2 =
+ pvr_pds_get_constants(&next_constant, 1, &data_size);
+ }
+ }
+
+ if (program->work_group_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
+ program->work_group_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ work_group_id_ctrl_words[0] =
+ pvr_pds_get_constants(&next_constant, 1, &data_size);
+ }
+
+ if (program->work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ work_group_id_ctrl_words[1] =
+ pvr_pds_get_constants(&next_constant, 1, &data_size);
+ }
+
+ if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+ (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+ (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
+ local_id_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
+ }
+
+ if (program->add_base_workgroup) {
+ for (uint32_t workgroup_component = 0; workgroup_component < 3;
+ workgroup_component++) {
+ if (program->work_group_input_regs[workgroup_component] !=
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ program
+ ->base_workgroup_constant_offset_in_dwords[workgroup_component] =
+ pvr_pds_get_constants(&next_constant, 1, &data_size);
+ }
+ }
+ }
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ if (program->kick_usc) {
+ /* Src0 for DOUTU */
+ pvr_pds_write_wide_constant(buffer,
+ usc_control_constant64,
+ program->usc_task_control.src0); /* 64-bit
+ * Src0.
+ */
+ }
+
+ if (program->has_coefficient_update_task) {
+ /* Src0 for DOUTU. */
+ pvr_pds_write_wide_constant(
+ buffer,
+ usc_control_constant64_coeff_update,
+ program->usc_task_control_coeff_update.src0); /* 64-bit Src0 */
+ }
+
+ if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+ (program->clear_pds_barrier) ||
+ (program->kick_usc && program->conditional_render)) {
+ pvr_pds_write_wide_constant(buffer, zero_constant64, 0); /* 64-bit
+ * Src0
+ */
+ }
+
+ if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ if (PVR_HAS_QUIRK(dev_info, 51210)) {
+ /* Write the constant for the coefficient register write. */
+ doutw = pvr_pds_encode_doutw_src1(
+ program->barrier_coefficient + 4,
+ PVR_PDS_DOUTW_LOWER64,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+ true,
+ dev_info);
+ pvr_pds_write_constant32(buffer, barrier_ctrl_word2, doutw);
+ }
+ /* Write the constant for the coefficient register write. */
+ doutw = pvr_pds_encode_doutw_src1(
+ program->barrier_coefficient,
+ PVR_PDS_DOUTW_LOWER64,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+ true,
+ dev_info);
+
+ /* Check whether the barrier is going to be the last DOUTW done by
+ * the coefficient sync task.
+ */
+ if ((program->work_group_input_regs[0] ==
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
+ (program->work_group_input_regs[1] ==
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
+ (program->work_group_input_regs[2] ==
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
+ doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+ }
+
+ pvr_pds_write_constant32(buffer, barrier_ctrl_word, doutw);
+ }
+
+ /* If we want work-group id X, see if we also want work-group id Y. */
+ if (program->work_group_input_regs[0] !=
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED &&
+ program->work_group_input_regs[1] !=
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ /* Make sure we are going to DOUTW them into adjacent registers
+ * otherwise we can't do it in one.
+ */
+ assert(program->work_group_input_regs[1] ==
+ (program->work_group_input_regs[0] + 1));
+
+ doutw = pvr_pds_encode_doutw_src1(
+ program->work_group_input_regs[0],
+ PVR_PDS_DOUTW_LOWER64,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+ true,
+ dev_info);
+
+ /* If we don't want the Z work-group id then this is the last one.
+ */
+ if (program->work_group_input_regs[2] ==
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+ }
+
+ pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[0], doutw);
+ }
+ /* If we only want one of X or Y then handle them separately. */
+ else {
+ if (program->work_group_input_regs[0] !=
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ doutw = pvr_pds_encode_doutw_src1(
+ program->work_group_input_regs[0],
+ PVR_PDS_DOUTW_LOWER32,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+ true,
+ dev_info);
+
+ /* If we don't want the Z work-group id then this is the last
+ * one.
+ */
+ if (program->work_group_input_regs[2] ==
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+ }
+
+ pvr_pds_write_constant32(buffer,
+ work_group_id_ctrl_words[0],
+ doutw);
+ } else if (program->work_group_input_regs[1] !=
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ doutw = pvr_pds_encode_doutw_src1(
+ program->work_group_input_regs[1],
+ PVR_PDS_DOUTW_UPPER32,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+ true,
+ dev_info);
+
+ /* If we don't want the Z work-group id then this is the last
+ * one.
+ */
+ if (program->work_group_input_regs[2] ==
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+ }
+
+ pvr_pds_write_constant32(buffer,
+ work_group_id_ctrl_words[0],
+ doutw);
+ }
+ }
+
+ /* Handle work-group id Z. */
+ if (program->work_group_input_regs[2] !=
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ doutw = pvr_pds_encode_doutw_src1(
+ program->work_group_input_regs[2],
+ PVR_PDS_DOUTW_UPPER32,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE |
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
+ true,
+ dev_info);
+
+ pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[1], doutw);
+ }
+
+ /* Handle the local IDs. */
+ if ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+ (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
+ uint32_t dest_reg;
+
+ /* If we want local id Y and Z make sure the compiler wants them in
+ * the same register.
+ */
+ if (!program->flattened_work_groups) {
+ if ((program->local_input_regs[1] !=
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
+ (program->local_input_regs[2] !=
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
+ assert(program->local_input_regs[1] ==
+ program->local_input_regs[2]);
+ }
+ }
+
+ if (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
+ dest_reg = program->local_input_regs[1];
+ else
+ dest_reg = program->local_input_regs[2];
+
+ /* If we want local id X and (Y or Z) then we can do that in a
+ * single 64-bit DOUTW.
+ */
+ if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ assert(dest_reg == (program->local_input_regs[0] + 1));
+
+ doutw = pvr_pds_encode_doutw_src1(
+ program->local_input_regs[0],
+ PVR_PDS_DOUTW_LOWER64,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+ true,
+ dev_info);
+
+ doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+
+ pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
+ }
+ /* Otherwise just DMA in Y and Z together in a single 32-bit DOUTW.
+ */
+ else {
+ doutw = pvr_pds_encode_doutw_src1(
+ dest_reg,
+ PVR_PDS_DOUTW_UPPER32,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+ true,
+ dev_info);
+
+ doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+
+ pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
+ }
+ }
+ /* If we don't want Y or Z then just DMA in X in a single 32-bit DOUTW.
+ */
+ else if (program->local_input_regs[0] !=
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ doutw = pvr_pds_encode_doutw_src1(
+ program->local_input_regs[0],
+ PVR_PDS_DOUTW_LOWER32,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
+ true,
+ dev_info);
+
+ pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
+ }
+ }
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
+ gen_mode == PDS_GENERATE_SIZES) {
+ const bool encode = (gen_mode == PDS_GENERATE_CODE_SEGMENT);
+#define APPEND(X) \
+ if (encode) { \
+ *buffer = X; \
+ buffer++; \
+ } else { \
+ code_size += sizeof(uint32_t); \
+ }
+
+ /* Assert that coeff_update_task_branch_size is > 0 because if it is 0
+ * then we will be doing an infinite loop.
+ */
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
+ assert(program->coeff_update_task_branch_size > 0);
+
+ /* Test whether this is the coefficient update task or not. */
+ APPEND(
+ pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SRCC */
+ PVR_ROGUE_PDSINST_NEG_ENABLE, /* NEG */
+ PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC */
+ program->coeff_update_task_branch_size /* ADDR */));
+
+ /* Do we need to initialize the barrier coefficient? */
+ if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ if (PVR_HAS_QUIRK(dev_info, 51210)) {
+ /* Initialize the second barrier coefficient registers to zero.
+ */
+ APPEND(pvr_pds_encode_doutw64(0, /* cc */
+ 0, /* END */
+ barrier_ctrl_word2, /* SRC1 */
+ zero_constant64 >> 1)); /* SRC0 */
+ }
+ /* Initialize the coefficient register to zero. */
+ APPEND(pvr_pds_encode_doutw64(0, /* cc */
+ 0, /* END */
+ barrier_ctrl_word, /* SRC1 */
+ zero_constant64 >> 1)); /* SRC0 */
+ }
+
+ if (program->add_base_workgroup) {
+ const uint32_t temp_values[3] = { 0, 1, 3 };
+ for (uint32_t workgroup_component = 0; workgroup_component < 3;
+ workgroup_component++) {
+ if (program->work_group_input_regs[workgroup_component] ==
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
+ continue;
+
+ APPEND(pvr_pds_inst_encode_add32(
+ /* cc */ 0x0,
+ /* ALUM */ 0,
+ /* SNA */ 0,
+ /* SRC0 (R32)*/ PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER +
+ program->base_workgroup_constant_offset_in_dwords
+ [workgroup_component],
+ /* SRC1 (R32)*/ PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER +
+ PVR_PDS_CDM_WORK_GROUP_ID_X +
+ temp_values[workgroup_component],
+ /* DST (R32TP)*/ PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER +
+ PVR_PDS_CDM_WORK_GROUP_ID_X +
+ temp_values[workgroup_component]));
+ }
+ }
+
+ /* If we are going to put the work-group IDs in coefficients then we
+ * just need to do the DOUTWs.
+ */
+ if ((program->work_group_input_regs[0] !=
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+ (program->work_group_input_regs[1] !=
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
+ uint32_t dest_reg;
+
+ if (program->work_group_input_regs[0] !=
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_X;
+ } else {
+ dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Y;
+ }
+
+ APPEND(pvr_pds_encode_doutw64(0, /* cc */
+ 0, /* END */
+ work_group_id_ctrl_words[0], /* SRC1
+ */
+ dest_reg >> 1)); /* SRC0 */
+ }
+
+ if (program->work_group_input_regs[2] !=
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ APPEND(pvr_pds_encode_doutw64(
+ 0, /* cc */
+ 0, /* END */
+ work_group_id_ctrl_words[1], /* SRC1 */
+ (PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Z) >>
+ 1)); /* SRC0 */
+ }
+
+ /* Issue the task to the USC. */
+ if (program->kick_usc && program->has_coefficient_update_task) {
+ APPEND(pvr_pds_encode_doutu(0, /* cc */
+ 1, /* END */
+ usc_control_constant64_coeff_update >>
+ 1)); /* SRC0; DOUTU 64-bit Src0 */
+ }
+
+ /* Encode a HALT */
+ APPEND(pvr_pds_inst_encode_halt(0));
+
+ /* Set the branch size used to skip the coefficient sync task. */
+ program->coeff_update_task_branch_size = code_size / sizeof(uint32_t);
+
+ /* DOUTW in the local IDs. */
+
+ /* If we want X and Y or Z, we only need one DOUTW. */
+ if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
+ ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+ (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED))) {
+ local_input_register =
+ PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
+ } else {
+ /* If we just want X. */
+ if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ local_input_register =
+ PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
+ }
+ /* If we just want Y or Z. */
+ else if (program->local_input_regs[1] !=
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
+ program->local_input_regs[2] !=
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+ local_input_register =
+ PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_YZ;
+ }
+ }
+
+ if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+ (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+ (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
+ APPEND(pvr_pds_encode_doutw64(0, /* cc */
+ 0, /* END */
+ local_id_ctrl_word, /* SRC1 */
+ local_input_register >> 1)); /* SRC0
+ */
+ }
+
+ if (program->clear_pds_barrier) {
+ /* Zero the persistent temp (SW fence for context switch). */
+ APPEND(pvr_pds_inst_encode_add64(
+ 0, /* cc */
+ PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
+ PVR_ROGUE_PDSINST_MAD_SNA_ADD,
+ PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+ (zero_constant64 >> 1), /* src0 = 0 */
+ PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+ (zero_constant64 >> 1), /* src1 = 0 */
+ PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0)); /* dest =
+ * ptemp64[0]
+ */
+ }
+
+ /* If this is a fence, issue the DOUTC. */
+ if (program->fence) {
+ APPEND(pvr_pds_inst_encode_doutc(0, /* cc */
+ 0 /* END */));
+ }
+
+ if (program->kick_usc) {
+ if (program->conditional_render) {
+ /* Skip if coefficient update task. */
+ APPEND(pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1,
+ 0,
+ PVR_ROGUE_PDSINST_PREDICATE_KEEP,
+ 16));
+
+ /* Load the predicate. */
+ APPEND(pvr_pds_inst_encode_ld(0, predicate_ld_src0_constant >> 1));
+
+ /* Load negate constant into temp for CMP. */
+ APPEND(pvr_pds_inst_encode_add64(
+ 0, /* cc */
+ PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
+ PVR_ROGUE_PDSINST_MAD_SNA_ADD,
+ PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+ (cond_render_negate_constant >> 1), /* src0 = 0 */
+ PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+ (zero_constant64 >> 1), /* src1 = 0 */
+ PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER +
+ (cond_render_negate_temp >> 1))); /* dest = ptemp64[0]
+ */
+
+ APPEND(pvr_pds_inst_encode_wdf(0));
+
+ for (uint32_t i = 0; i < 4; i++) {
+ APPEND(pvr_pds_inst_encode_stflp32(
+ 1, /* enable immediate */
+ 0, /* cc */
+ PVR_ROGUE_PDSINST_LOP_AND, /* LOP */
+ cond_render_pred_temp + i, /* SRC0 */
+ cond_render_pred_mask_constant + i, /* SRC1 */
+ 0, /* SRC2 (Shift) */
+ cond_render_pred_temp + i)); /* DST */
+
+ APPEND(
+ pvr_pds_inst_encode_stflp32(1, /* enable immediate */
+ 0, /* cc */
+ PVR_ROGUE_PDSINST_LOP_OR, /* LOP
+ */
+ cond_render_pred_temp + i, /* SRC0
+ */
+ cond_render_pred_temp, /* SRC1 */
+ 0, /* SRC2 (Shift) */
+ cond_render_pred_temp)); /* DST */
+ }
+
+ APPEND(pvr_pds_inst_encode_limm(0, /* cc */
+ cond_render_pred_temp + 1, /* SRC1
+ */
+ 0, /* SRC0 */
+ 0)); /* GLOBALREG */
+
+ APPEND(pvr_pds_inst_encode_stflp32(1, /* enable immediate */
+ 0, /* cc */
+ PVR_ROGUE_PDSINST_LOP_XOR, /* LOP
+ */
+ cond_render_pred_temp, /* SRC0 */
+ cond_render_negate_temp, /* SRC1
+ */
+ 0, /* SRC2 (Shift) */
+ cond_render_pred_temp)); /* DST
+ */
+
+ /* Check that the predicate is 0. */
+ APPEND(pvr_pds_inst_encode_cmpi(
+ 0, /* cc */
+ PVR_ROGUE_PDSINST_COP_EQ, /* LOP */
+ (cond_render_pred_temp >> 1) +
+ PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER, /* SRC0 */
+ 0)); /* SRC1 */
+
+ /* If predicate is 0, skip DOUTU. */
+ APPEND(pvr_pds_inst_encode_bra(
+ PVR_ROGUE_PDSINST_PREDICATE_P0, /* SRCC:
+ P0 */
+ 0, /* NEG */
+ PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC:
+ keep
+ */
+ 2));
+ }
+
+ /* Issue the task to the USC.
+ * DoutU src1=USC Code Base address, src2=doutu word 2.
+ */
+ APPEND(pvr_pds_encode_doutu(1, /* cc */
+ 1, /* END */
+ usc_control_constant64 >> 1)); /* SRC0;
+ * DOUTU
+ * 64-bit
+ * Src0.
+ */
+ }
+
+ /* End the program if the Dout did not already end it. */
+ APPEND(pvr_pds_inst_encode_halt(0));
+#undef APPEND
+ }
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ /* Set the data segment pointer and ensure we return 1 past the buffer
+ * ptr.
+ */
+ program->data_segment = buffer;
+
+ buffer += next_constant;
+ }
+
+ /* Require at least one DWORD of PDS data so the program runs. */
+ data_size = MAX2(1, data_size);
+
+ program->temps_used = temps_used;
+ program->highest_temp = temps_used;
+ program->data_size = data_size;
+ if (gen_mode == PDS_GENERATE_SIZES)
+ program->code_size = code_size;
+
+ return buffer;
+}
+
+/**
+ * Generates the PDS vertex shader data or code block. This program will do a
+ * DMA into USC Constants followed by a DOUTU.
+ *
+ * \param program Pointer to the PDS vertex shader program.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Generate code or data.
+ * \param dev_info PVR device information struct.
+ * \returns Pointer to just beyond the code/data.
+ */
+uint32_t *pvr_pds_vertex_shader_sa(
+ struct pvr_pds_vertex_shader_sa_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info)
+{
+ uint32_t next_constant;
+ uint32_t data_size = 0;
+ uint32_t code_size = 0;
+
+ uint32_t usc_control_constant64 = 0;
+ uint32_t dma_address_constant64 = 0;
+ uint32_t dma_control_constant32 = 0;
+ uint32_t doutw_value_constant64 = 0;
+ uint32_t doutw_control_constant32 = 0;
+ uint32_t fence_constant_word = 0;
+ uint32_t *buffer_base;
+ uint32_t kick_index;
+
+ uint32_t total_num_doutw =
+ program->num_dword_doutw + program->num_q_word_doutw;
+ uint32_t total_size_dma =
+ program->num_dword_doutw + 2 * program->num_q_word_doutw;
+
+ next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+
+ /* Copy the DMA control words and USC task control words to constants.
+ *
+ * Arrange them so that the 64-bit words are together followed by the 32-bit
+ * words.
+ */
+ if (program->kick_usc) {
+ usc_control_constant64 =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ }
+
+ if (program->clear_pds_barrier) {
+ fence_constant_word =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ }
+ dma_address_constant64 = pvr_pds_get_constants(&next_constant,
+ 2 * program->num_dma_kicks,
+ &data_size);
+
+ /* Assign all unaligned constants together to avoid alignment issues caused
+ * by pvr_pds_get_constants with even allocation sizes.
+ */
+ doutw_value_constant64 = pvr_pds_get_constants(
+ &next_constant,
+ total_size_dma + total_num_doutw + program->num_dma_kicks,
+ &data_size);
+ doutw_control_constant32 = doutw_value_constant64 + total_size_dma;
+ dma_control_constant32 = doutw_control_constant32 + total_num_doutw;
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ buffer_base = buffer;
+
+ if (program->kick_usc) {
+ /* Src0 for DOUTU. */
+ pvr_pds_write_wide_constant(buffer_base,
+ usc_control_constant64,
+ program->usc_task_control.src0); /* DOUTU
+ * 64-bit
+ * Src0.
+ */
+ buffer += 2;
+ }
+
+ if (program->clear_pds_barrier) {
+ /* Encode the fence constant src0. Fence barrier is initialized to
+ * zero.
+ */
+ pvr_pds_write_wide_constant(buffer_base, fence_constant_word, 0);
+ buffer += 2;
+ }
+
+ if (total_num_doutw > 0) {
+ for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
+ /* Write the constant for the coefficient register write. */
+ pvr_pds_write_constant64(buffer_base,
+ doutw_value_constant64,
+ program->q_word_doutw_value[2 * i],
+ program->q_word_doutw_value[2 * i + 1]);
+ pvr_pds_write_constant32(
+ buffer_base,
+ doutw_control_constant32,
+ program->q_word_doutw_control[i] |
+ ((!program->num_dma_kicks && i == total_num_doutw - 1)
+ ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
+ : 0));
+
+ doutw_value_constant64 += 2;
+ doutw_control_constant32 += 1;
+ }
+
+ for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
+ /* Write the constant for the coefficient register write. */
+ pvr_pds_write_constant32(buffer_base,
+ doutw_value_constant64,
+ program->dword_doutw_value[i]);
+ pvr_pds_write_constant32(
+ buffer_base,
+ doutw_control_constant32,
+ program->dword_doutw_control[i] |
+ ((!program->num_dma_kicks && i == program->num_dword_doutw - 1)
+ ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
+ : 0));
+
+ doutw_value_constant64 += 1;
+ doutw_control_constant32 += 1;
+ }
+
+ buffer += total_size_dma + total_num_doutw;
+ }
+
+ if (program->num_dma_kicks == 1) /* Most-common case. */
+ {
+ /* Src0 for DOUTD - Address. */
+ pvr_pds_write_dma_address(buffer_base,
+ dma_address_constant64,
+ program->dma_address[0],
+ false,
+ dev_info);
+
+ /* Src1 for DOUTD - Control Word. */
+ pvr_pds_write_constant32(
+ buffer_base,
+ dma_control_constant32,
+ program->dma_control[0] |
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
+
+ /* Move the buffer ptr along as we will return 1 past the buffer. */
+ buffer += 3;
+ } else if (program->num_dma_kicks > 1) {
+ for (kick_index = 0; kick_index < program->num_dma_kicks - 1;
+ kick_index++) {
+ /* Src0 for DOUTD - Address. */
+ pvr_pds_write_dma_address(buffer_base,
+ dma_address_constant64,
+ program->dma_address[kick_index],
+ false,
+ dev_info);
+
+ /* Src1 for DOUTD - Control Word. */
+ pvr_pds_write_constant32(buffer_base,
+ dma_control_constant32,
+ program->dma_control[kick_index]);
+ dma_address_constant64 += 2;
+ dma_control_constant32 += 1;
+ }
+
+ /* Src0 for DOUTD - Address. */
+ pvr_pds_write_dma_address(buffer_base,
+ dma_address_constant64,
+ program->dma_address[kick_index],
+ false,
+ dev_info);
+
+ /* Src1 for DOUTD - Control Word. */
+ pvr_pds_write_constant32(
+ buffer_base,
+ dma_control_constant32,
+ program->dma_control[kick_index] |
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
+
+ buffer += 3 * program->num_dma_kicks;
+ }
+ } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ if (program->clear_pds_barrier) {
+ /* Zero the persistent temp (SW fence for context switch). */
+ *buffer++ = pvr_pds_inst_encode_add64(
+ 0, /* cc */
+ PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
+ PVR_ROGUE_PDSINST_MAD_SNA_ADD,
+ PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+ (fence_constant_word >> 1), /* src0 = 0 */
+ PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+ (fence_constant_word >> 1), /* src1 = 0 */
+ PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
+ * ptemp[0]
+ */
+ }
+
+ if (total_num_doutw > 0) {
+ for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
+ /* Set the coefficient register to data value. */
+ *buffer++ = pvr_pds_encode_doutw64(
+ /* cc */ 0,
+ /* END */ !program->num_dma_kicks && !program->kick_usc &&
+ (i == total_num_doutw - 1),
+ /* SRC1 */ doutw_control_constant32,
+ /* SRC0 */ doutw_value_constant64 >> 1);
+
+ doutw_value_constant64 += 2;
+ doutw_control_constant32 += 1;
+ }
+
+ for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
+ /* Set the coefficient register to data value. */
+ *buffer++ = pvr_pds_encode_doutw64(
+ /* cc */ 0,
+ /* END */ !program->num_dma_kicks && !program->kick_usc &&
+ (i == program->num_dword_doutw - 1),
+ /* SRC1 */ doutw_control_constant32,
+ /* SRC0 */ doutw_value_constant64 >> 1);
+
+ doutw_value_constant64 += 1;
+ doutw_control_constant32 += 1;
+ }
+ }
+
+ if (program->num_dma_kicks != 0) {
+ /* DMA the state into the secondary attributes. */
+
+ if (program->num_dma_kicks == 1) /* Most-common case. */
+ {
+ *buffer++ = pvr_pds_encode_doutd(
+ /* cc */ 0,
+ /* END */ !program->kick_usc,
+ /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit Src1 */
+ /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD 64-bit
+ * Src0.
+ */
+ } else {
+ for (kick_index = 0; kick_index < program->num_dma_kicks;
+ kick_index++) {
+ *buffer++ = pvr_pds_encode_doutd(
+ /* cc */ 0,
+ /* END */ (!program->kick_usc) &&
+ (kick_index + 1 == program->num_dma_kicks),
+ /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit
+ * Src1.
+ */
+ /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD
+ * 64-bit
+ * Src0.
+ */
+ dma_address_constant64 += 2;
+ dma_control_constant32 += 1;
+ }
+ }
+ }
+
+ if (program->kick_usc) {
+ /* Kick the USC. */
+ *buffer++ = pvr_pds_encode_doutu(
+ /* cc */ 0,
+ /* END */ 1,
+ /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0.
+ */
+ }
+
+ if (!program->kick_usc && program->num_dma_kicks == 0 &&
+ total_num_doutw == 0) {
+ *buffer++ = pvr_pds_inst_encode_halt(0);
+ }
+ }
+
+ code_size = program->num_dma_kicks + total_num_doutw;
+ if (program->clear_pds_barrier)
+ code_size++; /* ADD64 instruction. */
+
+ if (program->kick_usc)
+ code_size++;
+
+ /* If there are no DMAs and no USC kick then code is HALT only. */
+ if (code_size == 0)
+ code_size = 1;
+
+ program->data_size = data_size;
+ program->code_size = code_size;
+
+ return buffer;
+}
+
+/**
+ * Writes the Uniform Data block for the PDS pixel shader secondary attributes
+ * program.
+ *
+ * \param program Pointer to the PDS pixel shader secondary attributes program.
+ * \param buffer Pointer to the buffer for the code/data.
+ * \param gen_mode Either code or data can be generated or sizes only updated.
+ * \returns Pointer to just beyond the buffer for the program/data.
+ */
+uint32_t *pvr_pds_pixel_shader_uniform_texture_code(
+ struct pvr_pds_pixel_shader_sa_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode)
+{
+ uint32_t *instruction;
+ uint32_t code_size = 0;
+ uint32_t data_size = 0;
+ uint32_t temps_used = 0;
+ uint32_t next_constant;
+
+ assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
+ 0);
+
+ assert(gen_mode != PDS_GENERATE_DATA_SEGMENT);
+
+ /* clang-format off */
+ /* Shape of code segment (note: clear is different)
+ *
+ * Code
+ * +------------+
+ * | BRA if0 |
+ * | DOUTD |
+ * | ... |
+ * | DOUTD.halt |
+ * | uniform |
+ * | DOUTD |
+ * | ... |
+ * | ... |
+ * | DOUTW |
+ * | ... |
+ * | ... |
+ * | DOUTU.halt |
+ * | HALT |
+ * +------------+
+ */
+ /* clang-format on */
+ instruction = buffer;
+
+ next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+
+ /* The clear color can arrive packed in the right form in the first (or
+ * first 2) dwords of the shared registers and the program will issue a
+ * single doutw for this.
+ */
+ if (program->clear && program->packed_clear) {
+ uint32_t color_constant1 =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+ uint32_t control_word_constant1 =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+ /* DOUTW the clear color to the USC constants. Predicate with
+ * uniform loading flag (IF0).
+ */
+ *instruction++ = pvr_pds_encode_doutw64(
+ /* cc */ 1, /* Only for uniform loading program. */
+ /* END */ program->kick_usc ? 0 : 1, /* Last
+ * instruction
+ * for a clear.
+ */
+ /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
+ /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
+
+ code_size += 1;
+ }
+ } else if (program->clear) {
+ uint32_t color_constant1, color_constant2;
+
+ if (program->clear_color_dest_reg & 0x1) {
+ uint32_t color_constant3, control_word_constant1,
+ control_word_constant2, color_constant4;
+
+ color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
+ color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+ color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
+
+ control_word_constant1 =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ control_word_constant2 =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+ /* DOUTW the clear color to the USSE constants. Predicate with
+ * uniform loading flag (IF0).
+ */
+ *instruction++ = pvr_pds_encode_doutw64(
+ /* cc */ 1, /* Only for Uniform Loading program */
+ /* END */ 0,
+ /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
+ /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
+
+ *instruction++ = pvr_pds_encode_doutw64(
+ /* cc */ 1, /* Only for Uniform Loading program */
+ /* END */ 0,
+ /* SRC1 */ control_word_constant2, /* DOUTW 32-bit Src1 */
+ /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
+
+ *instruction++ = pvr_pds_encode_doutw64(
+ /* cc */ 1, /* Only for uniform loading program */
+ /* END */ program->kick_usc ? 0 : 1, /* Last
+ * instruction
+ * for a clear.
+ */
+ /* SRC1 */ color_constant4, /* DOUTW 32-bit Src1 */
+ /* SRC0 */ color_constant3 >> 1); /* DOUTW 64-bit Src0 */
+ }
+
+ code_size += 3;
+ } else {
+ uint32_t control_word_constant, control_word_last_constant;
+
+ /* Put the clear color and control words into the first 8
+ * constants.
+ */
+ color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+ color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+ control_word_constant =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ control_word_last_constant =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+ /* DOUTW the clear color to the USSE constants. Predicate with
+ * uniform loading flag (IF0).
+ */
+ *instruction++ = pvr_pds_encode_doutw64(
+ /* cc */ 1, /* Only for Uniform Loading program */
+ /* END */ 0,
+ /* SRC1 */ control_word_constant, /* DOUTW 32-bit Src1 */
+ /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
+
+ *instruction++ = pvr_pds_encode_doutw64(
+ /* cc */ 1, /* Only for uniform loading program */
+ /* END */ program->kick_usc ? 0 : 1, /* Last
+ * instruction
+ * for a clear.
+ */
+ /* SRC1 */ control_word_last_constant, /* DOUTW 32-bit Src1
+ */
+ /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
+ }
+
+ code_size += 2;
+ }
+
+ if (program->kick_usc) {
+ uint32_t doutu_constant64;
+
+ doutu_constant64 =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+ /* Issue the task to the USC.
+ *
+ * dout ds1[constant_use], ds0[constant_use],
+ * ds1[constant_use], emit
+ */
+ *instruction++ = pvr_pds_encode_doutu(
+ /* cc */ 0,
+ /* END */ 1,
+ /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0
+ */
+ }
+
+ code_size += 1;
+ }
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+ /* End the program. */
+ *instruction++ = pvr_pds_inst_encode_halt(0);
+ }
+ code_size += 1;
+ } else {
+ uint32_t total_num_doutw =
+ program->num_dword_doutw + program->num_q_word_doutw;
+ bool both_textures_and_uniforms =
+ ((program->num_texture_dma_kicks > 0) &&
+ ((program->num_uniform_dma_kicks > 0 || total_num_doutw > 0) ||
+ program->kick_usc));
+ uint32_t doutu_constant64 = 0;
+
+ if (both_textures_and_uniforms) {
+ /* If the size of a PDS data section is 0, the hardware won't run
+ * it. We therefore don't need to branch when there is only a
+ * texture OR a uniform update program.
+ */
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+ uint32_t branch_address =
+ MAX2(1 + program->num_texture_dma_kicks, 2);
+
+ /* Use If0 to BRAnch to uniform code. */
+ *instruction++ = pvr_pds_encode_bra(
+ /* SRCC */ PVR_ROGUE_PDSINST_PREDICATE_IF0,
+ /* NEG */ PVR_ROGUE_PDSINST_NEG_DISABLE,
+ /* SETC */ PVR_ROGUE_PDSINST_PREDICATE_KEEP,
+ /* ADDR */ branch_address);
+ }
+
+ code_size += 1;
+ }
+
+ if (program->num_texture_dma_kicks > 0) {
+ uint32_t dma_address_constant64;
+ uint32_t dma_control_constant32;
+ /* Allocate 3 constant spaces for each kick. The 64-bit constants
+ * come first followed by the 32-bit constants.
+ */
+ dma_address_constant64 = PVR_PDS_CONSTANTS_BLOCK_BASE;
+ dma_control_constant32 =
+ dma_address_constant64 + (program->num_texture_dma_kicks * 2);
+
+ for (uint32_t dma = 0; dma < program->num_texture_dma_kicks; dma++) {
+ code_size += 1;
+ if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction)
+ continue;
+
+ /* DMA the state into the secondary attributes. */
+ *instruction++ = pvr_pds_encode_doutd(
+ /* cc */ 0,
+ /* END */ dma == (program->num_texture_dma_kicks - 1),
+ /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1 */
+ /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
+ * 64-bit
+ * Src0
+ */
+ dma_address_constant64 += 2;
+ dma_control_constant32 += 1;
+ }
+ } else if (both_textures_and_uniforms) {
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+ /* End the program. */
+ *instruction++ = pvr_pds_inst_encode_halt(0);
+ }
+
+ code_size += 1;
+ }
+
+ /* Reserve space at the beginning of the data segment for the DOUTU Task
+ * Control if one is needed.
+ */
+ if (program->kick_usc) {
+ doutu_constant64 =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ }
+
+ /* Allocate 3 constant spaces for each DMA and 2 for a USC kick. The
+ * 64-bit constants come first followed by the 32-bit constants.
+ */
+ uint32_t total_size_dma =
+ program->num_dword_doutw + 2 * program->num_q_word_doutw;
+
+ uint32_t dma_address_constant64 = pvr_pds_get_constants(
+ &next_constant,
+ program->num_uniform_dma_kicks * 3 + total_size_dma + total_num_doutw,
+ &data_size);
+ uint32_t doutw_value_constant64 =
+ dma_address_constant64 + program->num_uniform_dma_kicks * 2;
+ uint32_t dma_control_constant32 = doutw_value_constant64 + total_size_dma;
+ uint32_t doutw_control_constant32 =
+ dma_control_constant32 + program->num_uniform_dma_kicks;
+
+ if (total_num_doutw > 0) {
+ pvr_pds_get_constants(&next_constant, 0, &data_size);
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
+ /* Set the coefficient register to data value. */
+ *instruction++ = pvr_pds_encode_doutw64(
+ /* cc */ 0,
+ /* END */ !program->num_uniform_dma_kicks &&
+ !program->kick_usc && (i == total_num_doutw - 1),
+ /* SRC1 */ doutw_control_constant32,
+ /* SRC0 */ doutw_value_constant64 >> 1);
+
+ doutw_value_constant64 += 2;
+ doutw_control_constant32 += 1;
+ }
+
+ for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
+ /* Set the coefficient register to data value. */
+ *instruction++ = pvr_pds_encode_doutw64(
+ /* cc */ 0,
+ /* END */ !program->num_uniform_dma_kicks &&
+ !program->kick_usc && (i == program->num_dword_doutw - 1),
+ /* SRC1 */ doutw_control_constant32,
+ /* SRC0 */ doutw_value_constant64 >> 1);
+
+ doutw_value_constant64 += 1;
+ doutw_control_constant32 += 1;
+ }
+ }
+ code_size += total_num_doutw;
+ }
+
+ if (program->num_uniform_dma_kicks > 0) {
+ for (uint32_t dma = 0; dma < program->num_uniform_dma_kicks; dma++) {
+ code_size += 1;
+
+ if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction)
+ continue;
+
+ bool last_instruction = false;
+ if (!program->kick_usc &&
+ (dma == program->num_uniform_dma_kicks - 1)) {
+ last_instruction = true;
+ }
+ /* DMA the state into the secondary attributes. */
+ *instruction++ = pvr_pds_encode_doutd(
+ /* cc */ 0,
+ /* END */ last_instruction,
+ /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1
+ */
+ /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
+ * 64-bit
+ * Src0
+ */
+ dma_address_constant64 += 2;
+ dma_control_constant32 += 1;
+ }
+ }
+
+ if (program->kick_usc) {
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+ /* Issue the task to the USC.
+ *
+ * dout ds1[constant_use], ds0[constant_use],
+ * ds1[constant_use], emit
+ */
+
+ *instruction++ = pvr_pds_encode_doutu(
+ /* cc */ 0,
+ /* END */ 1,
+ /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0 */
+ }
+
+ code_size += 1;
+ } else if (program->num_uniform_dma_kicks == 0 && total_num_doutw == 0) {
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+ /* End the program. */
+ *instruction++ = pvr_pds_inst_encode_halt(0);
+ }
+
+ code_size += 1;
+ }
+ }
+
+ /* Minimum temp count is 1. */
+ program->temps_used = MAX2(temps_used, 1);
+ program->code_size = code_size;
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
+ return instruction;
+ else
+ return NULL;
+}
+
+/**
+ * Writes the Uniform Data block for the PDS pixel shader secondary attributes
+ * program.
+ *
+ * \param program Pointer to the PDS pixel shader secondary attributes program.
+ * \param buffer Pointer to the buffer for the code/data.
+ * \param gen_mode Either code or data can be generated or sizes only updated.
+ * \param dev_info PVR device information struct.
+ * \returns Pointer to just beyond the buffer for the program/data.
+ */
+uint32_t *pvr_pds_pixel_shader_uniform_texture_data(
+ struct pvr_pds_pixel_shader_sa_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ bool uniform,
+ const struct pvr_device_info *dev_info)
+{
+ uint32_t *constants = buffer;
+ uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+ uint32_t temps_used = 0;
+ uint32_t data_size = 0;
+
+ assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
+ 0);
+
+ assert(gen_mode != PDS_GENERATE_CODE_SEGMENT);
+
+ /* Shape of data segment (note: clear is different).
+ *
+ * Uniform Texture
+ * +--------------+ +-------------+
+ * | USC Task L | | USC Task L |
+ * | H | | H |
+ * | DMA1 Src0 L | | DMA1 Src0 L |
+ * | H | | H |
+ * | DMA2 Src0 L | | |
+ * | H | | |
+ * | DMA1 Src1 | | DMA1 Src1 |
+ * | DMA2 Src1 | | |
+ * | DOUTW0 Src1 | | |
+ * | DOUTW1 Src1 | | |
+ * | ... | | |
+ * | DOUTWn Srcn | | |
+ * | other data | | |
+ * +--------------+ +-------------+
+ */
+
+ /* Generate the PDS pixel shader secondary attributes data.
+ *
+ * Packed Clear
+ * The clear color can arrive packed in the right form in the first (or
+ * first 2) dwords of the shared registers and the program will issue a
+ * single DOUTW for this.
+ */
+ if (program->clear && uniform && program->packed_clear) {
+ uint32_t color_constant1 =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+ uint32_t control_word_constant1 =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ uint32_t doutw;
+
+ pvr_pds_write_constant64(constants,
+ color_constant1,
+ program->clear_color[0],
+ program->clear_color[1]);
+
+ /* Load into first constant in common store. */
+ doutw = pvr_pds_encode_doutw_src1(
+ program->clear_color_dest_reg,
+ PVR_PDS_DOUTW_LOWER64,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+ false,
+ dev_info);
+
+ /* Set the last flag. */
+ doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+ pvr_pds_write_constant64(constants, control_word_constant1, doutw, 0);
+ }
+ } else if (program->clear && uniform) {
+ uint32_t color_constant1, color_constant2;
+
+ if (program->clear_color_dest_reg & 0x1) {
+ uint32_t color_constant3, control_word_constant1,
+ control_word_constant2, color_constant4;
+
+ color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
+ color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+ color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
+
+ control_word_constant1 =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ control_word_constant2 =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ uint32_t doutw;
+
+ pvr_pds_write_constant32(constants,
+ color_constant1,
+ program->clear_color[0]);
+
+ pvr_pds_write_constant64(constants,
+ color_constant2,
+ program->clear_color[1],
+ program->clear_color[2]);
+
+ pvr_pds_write_constant32(constants,
+ color_constant3,
+ program->clear_color[3]);
+
+ /* Load into first constant in common store. */
+ doutw = pvr_pds_encode_doutw_src1(
+ program->clear_color_dest_reg,
+ PVR_PDS_DOUTW_LOWER32,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+ false,
+ dev_info);
+
+ pvr_pds_write_constant64(constants,
+ control_word_constant1,
+ doutw,
+ 0);
+
+ /* Move the destination register along. */
+ doutw = pvr_pds_encode_doutw_src1(
+ program->clear_color_dest_reg + 1,
+ PVR_PDS_DOUTW_LOWER64,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+ false,
+ dev_info);
+
+ pvr_pds_write_constant64(constants,
+ control_word_constant2,
+ doutw,
+ 0);
+
+ /* Move the destination register along. */
+ doutw = pvr_pds_encode_doutw_src1(
+ program->clear_color_dest_reg + 3,
+ PVR_PDS_DOUTW_LOWER32,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+ false,
+ dev_info);
+
+ /* Set the last flag. */
+ doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+ pvr_pds_write_constant64(constants, color_constant4, doutw, 0);
+ }
+ } else {
+ uint32_t control_word_constant, control_word_last_constant;
+
+ /* Put the clear color and control words into the first 8
+ * constants.
+ */
+ color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+ color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+ control_word_constant =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ control_word_last_constant =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ uint32_t doutw;
+ pvr_pds_write_constant64(constants,
+ color_constant1,
+ program->clear_color[0],
+ program->clear_color[1]);
+
+ pvr_pds_write_constant64(constants,
+ color_constant2,
+ program->clear_color[2],
+ program->clear_color[3]);
+
+ /* Load into first constant in common store. */
+ doutw = pvr_pds_encode_doutw_src1(
+ program->clear_color_dest_reg,
+ PVR_PDS_DOUTW_LOWER64,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+ false,
+ dev_info);
+
+ pvr_pds_write_constant64(constants, control_word_constant, doutw, 0);
+
+ /* Move the destination register along. */
+ doutw &= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_CLRMSK;
+ doutw |= (program->clear_color_dest_reg + 2)
+ << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
+
+ /* Set the last flag. */
+ doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+ pvr_pds_write_constant64(constants,
+ control_word_last_constant,
+ doutw,
+ 0);
+ }
+ }
+
+ /* Constants for the DOUTU Task Control, if needed. */
+ if (program->kick_usc) {
+ uint32_t doutu_constant64 =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ pvr_pds_write_wide_constant(
+ constants,
+ doutu_constant64,
+ program->usc_task_control.src0); /* 64-bit
+ */
+ /* Src0 */
+ }
+ }
+ } else {
+ if (uniform) {
+ /* Reserve space at the beginning of the data segment for the DOUTU
+ * Task Control if one is needed.
+ */
+ if (program->kick_usc) {
+ uint32_t doutu_constant64 =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ pvr_pds_write_wide_constant(
+ constants,
+ doutu_constant64,
+ program->usc_task_control.src0); /* 64-bit Src0 */
+ }
+ }
+
+ uint32_t total_num_doutw =
+ program->num_dword_doutw + program->num_q_word_doutw;
+ uint32_t total_size_dma =
+ program->num_dword_doutw + 2 * program->num_q_word_doutw;
+
+ /* Allocate 3 constant spaces for each kick. The 64-bit constants
+ * come first followed by the 32-bit constants.
+ */
+ uint32_t dma_address_constant64 =
+ pvr_pds_get_constants(&next_constant,
+ program->num_uniform_dma_kicks * 3 +
+ total_size_dma + total_num_doutw,
+ &data_size);
+ uint32_t doutw_value_constant64 =
+ dma_address_constant64 + program->num_uniform_dma_kicks * 2;
+ uint32_t dma_control_constant32 =
+ doutw_value_constant64 + total_size_dma;
+ uint32_t doutw_control_constant32 =
+ dma_control_constant32 + program->num_uniform_dma_kicks;
+
+ if (total_num_doutw > 0) {
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
+ pvr_pds_write_constant64(
+ constants,
+ doutw_value_constant64,
+ program->q_word_doutw_value[2 * i],
+ program->q_word_doutw_value[2 * i + 1]);
+ pvr_pds_write_constant32(
+ constants,
+ doutw_control_constant32,
+ program->q_word_doutw_control[i] |
+ ((!program->num_uniform_dma_kicks &&
+ i == total_num_doutw - 1)
+ ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
+ : 0));
+
+ doutw_value_constant64 += 2;
+ doutw_control_constant32 += 1;
+ }
+
+ for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
+ pvr_pds_write_constant32(constants,
+ doutw_value_constant64,
+ program->dword_doutw_value[i]);
+ pvr_pds_write_constant32(
+ constants,
+ doutw_control_constant32,
+ program->dword_doutw_control[i] |
+ ((!program->num_uniform_dma_kicks &&
+ i == program->num_dword_doutw - 1)
+ ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
+ : 0));
+
+ doutw_value_constant64 += 1;
+ doutw_control_constant32 += 1;
+ }
+ }
+ }
+
+ if (program->num_uniform_dma_kicks > 0) {
+ uint32_t kick;
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ for (kick = 0; kick < program->num_uniform_dma_kicks - 1;
+ kick++) {
+ /* Copy the dma control words to constants. */
+ pvr_pds_write_dma_address(constants,
+ dma_address_constant64,
+ program->uniform_dma_address[kick],
+ false,
+ dev_info);
+ pvr_pds_write_constant32(constants,
+ dma_control_constant32,
+ program->uniform_dma_control[kick]);
+
+ dma_address_constant64 += 2;
+ dma_control_constant32 += 1;
+ }
+
+ pvr_pds_write_dma_address(constants,
+ dma_address_constant64,
+ program->uniform_dma_address[kick],
+ false,
+ dev_info);
+ pvr_pds_write_constant32(
+ constants,
+ dma_control_constant32,
+ program->uniform_dma_control[kick] |
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
+ }
+ }
+
+ } else if (program->num_texture_dma_kicks > 0) {
+ /* Allocate 3 constant spaces for each kick. The 64-bit constants
+ * come first followed by the 32-bit constants.
+ */
+ uint32_t dma_address_constant64 =
+ pvr_pds_get_constants(&next_constant,
+ program->num_texture_dma_kicks * 3,
+ &data_size);
+ uint32_t dma_control_constant32 =
+ dma_address_constant64 + (program->num_texture_dma_kicks * 2);
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ uint32_t kick;
+ for (kick = 0; kick < program->num_texture_dma_kicks - 1; kick++) {
+ /* Copy the DMA control words to constants. */
+ pvr_pds_write_dma_address(constants,
+ dma_address_constant64,
+ program->texture_dma_address[kick],
+ false,
+ dev_info);
+
+ pvr_pds_write_constant32(constants,
+ dma_control_constant32,
+ program->texture_dma_control[kick]);
+
+ dma_address_constant64 += 2;
+ dma_control_constant32 += 1;
+ }
+
+ pvr_pds_write_dma_address(constants,
+ dma_address_constant64,
+ program->texture_dma_address[kick],
+ false,
+ dev_info);
+
+ pvr_pds_write_constant32(
+ constants,
+ dma_control_constant32,
+ program->texture_dma_control[kick] |
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
+ }
+ }
+ }
+
+ /* Save the data segment pointer and size. */
+ program->data_segment = constants;
+
+ /* Minimum temp count is 1. */
+ program->temps_used = MAX2(temps_used, 1);
+ program->data_size = data_size;
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
+ return (constants + next_constant);
+ else
+ return NULL;
+}
+
+/**
+ * Generates generic DOUTC PDS program.
+ *
+ * \param program Pointer to the PDS kick USC.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Either code and data can be generated, or sizes only updated.
+ * \returns Pointer to just beyond the buffer for the code or program segment.
+ */
+uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode)
+{
+ uint32_t constant = 0;
+
+ /* Automatically get a data size of 1x 128bit chunks. */
+ uint32_t data_size = 0, code_size = 0;
+
+ /* Setup the data part. */
+ uint32_t *constants = buffer; /* Constants placed at front of buffer. */
+ uint32_t *instruction = buffer;
+ uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
+ * dwords.
+ */
+
+ /* Update the program sizes. */
+ program->data_size = data_size;
+ program->code_size = code_size;
+ program->data_segment = constants;
+
+ if (gen_mode == PDS_GENERATE_SIZES)
+ return NULL;
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ /* Copy the USC task control words to constants. */
+
+ constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
+ pvr_pds_write_wide_constant(constants, constant + 0, 0); /* 64-bit
+ * Src0
+ */
+
+ uint32_t control_word_constant =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ pvr_pds_write_constant64(constants, control_word_constant, 0, 0); /* 32-bit
+ * Src1
+ */
+
+ program->data_size = data_size;
+ buffer += data_size;
+
+ return buffer;
+ } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+ *instruction++ = pvr_pds_inst_encode_doutc(
+ /* cc */ 0,
+ /* END */ 0);
+
+ code_size++;
+
+ /* End the program. */
+ *instruction++ = pvr_pds_inst_encode_halt(0);
+ code_size++;
+
+ program->code_size = code_size;
+ }
+
+ return instruction;
+}
+
+/**
+ * Generates generic kick DOUTU PDS program in a single data+code block.
+ *
+ * \param control Pointer to the PDS kick USC.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \param dev_info PVR device information structure.
+ * \returns Pointer to just beyond the buffer for the code or program segment.
+ */
+uint32_t *pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict control,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info)
+{
+ uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+ uint32_t doutw;
+ uint32_t data_size = 0, code_size = 0;
+ uint32_t constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
+ uint32_t control_word_constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
+
+ /* Assert if buffer is exceeded. */
+ assert(control->num_const64 <= PVR_PDS_MAX_NUM_DOUTW_CONSTANTS);
+
+ uint32_t *constants = buffer;
+ uint32_t *instruction = buffer;
+
+ /* Put the constants and control words interleaved in the data region. */
+ for (uint32_t const_pair = 0; const_pair < control->num_const64;
+ const_pair++) {
+ constant[const_pair] =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ control_word_constant[const_pair] =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+ }
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ /* Data segment points to start of constants. */
+ control->data_segment = constants;
+
+ for (uint32_t const_pair = 0; const_pair < control->num_const64;
+ const_pair++) {
+ pvr_pds_write_constant64(constants,
+ constant[const_pair],
+ H32(control->doutw_data[const_pair]),
+ L32(control->doutw_data[const_pair]));
+
+ /* Start loading at offset 0. */
+ if (control->dest_store == PDS_COMMON_STORE) {
+ doutw = pvr_pds_encode_doutw_src1(
+ (2 * const_pair),
+ PVR_PDS_DOUTW_LOWER64,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+ false,
+ dev_info);
+ } else {
+ doutw = pvr_pds_encode_doutw_src1(
+ (2 * const_pair),
+ PVR_PDS_DOUTW_LOWER64,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+ false,
+ dev_info);
+ }
+
+ if (const_pair + 1 == control->num_const64) {
+ /* Set the last flag for the MCU (assume there are no following
+ * DOUTD's).
+ */
+ doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+ }
+ pvr_pds_write_constant64(constants,
+ control_word_constant[const_pair],
+ doutw,
+ 0);
+ }
+
+ control->data_size = data_size;
+ } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+ /* Code section. */
+
+ for (uint32_t const_pair = 0; const_pair < control->num_const64;
+ const_pair++) {
+ /* DOUTW the PDS data to the USC constants. */
+ *instruction++ = pvr_pds_encode_doutw64(
+ /* cc */ 0,
+ /* END */ control->last_instruction &&
+ (const_pair + 1 == control->num_const64),
+ /* SRC1 */ control_word_constant[const_pair], /* DOUTW 32-bit
+ * Src1.
+ */
+ /* SRC0 */ constant[const_pair] >> 1); /* DOUTW 64-bit Src0. */
+
+ code_size++;
+ }
+
+ if (control->last_instruction) {
+ /* End the program. */
+ *instruction++ = pvr_pds_inst_encode_halt(0);
+ code_size++;
+ }
+
+ control->code_size = code_size;
+ }
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
+ return (constants + next_constant);
+ else
+ return instruction;
+}
+
+/**
+ * Generates generic kick DOUTU PDS program in a single data+code block.
+ *
+ * \param program Pointer to the PDS kick USC.
+ * \param buffer Pointer to the buffer for the program.
+ * \param start_next_constant Next constant in data segment. Non-zero if another
+ * instruction precedes the DOUTU.
+ * \param cc_enabled If true then the DOUTU is predicated (cc set).
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \returns Pointer to just beyond the buffer for the code or program segment.
+ */
+uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program,
+ uint32_t *restrict buffer,
+ uint32_t start_next_constant,
+ bool cc_enabled,
+ enum pvr_pds_generate_mode gen_mode)
+{
+ uint32_t constant = 0;
+
+ /* Automatically get a data size of 2 128bit chunks. */
+ uint32_t data_size = ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE;
+ uint32_t code_size = 1; /* Single doutu */
+ uint32_t dummy_count = 0;
+
+ /* Setup the data part. */
+ uint32_t *constants = buffer; /* Constants placed at front of buffer. */
+ uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
+ * dwords.
+ */
+
+ /* Update the program sizes. */
+ program->data_size = data_size;
+ program->code_size = code_size;
+ program->data_segment = constants;
+
+ if (gen_mode == PDS_GENERATE_SIZES)
+ return NULL;
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT ||
+ gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
+ /* Copy the USC task control words to constants. */
+
+ constant = pvr_pds_get_constants(&next_constant, 2, &dummy_count);
+
+ pvr_pds_write_wide_constant(constants,
+ constant + 0,
+ program->usc_task_control.src0); /* 64-bit
+ * Src0.
+ */
+ buffer += data_size;
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
+ return buffer;
+ }
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
+ gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
+ /* Generate the PDS pixel shader code. */
+
+ /* Setup the instruction pointer. */
+ uint32_t *instruction = buffer;
+
+ /* Issue the task to the USC.
+ *
+ * dout ds1[constant_use], ds0[constant_use], ds1[constant_use], emit ;
+ * halt halt
+ */
+
+ *instruction++ = pvr_pds_encode_doutu(
+ /* cc */ cc_enabled,
+ /* END */ 1,
+ /* SRC0 */ (constant + start_next_constant) >> 1); /* DOUTU
+ * 64-bit Src0
+ */
+
+ /* Return pointer to just after last instruction. */
+ return instruction;
+ }
+
+ /* Execution should never reach here; keep compiler happy. */
+ return NULL;
+}
+
+uint32_t *pvr_pds_generate_compute_barrier_conditional(
+ uint32_t *buffer,
+ enum pvr_pds_generate_mode gen_mode)
+{
+ /* Compute barriers supported. Need to test for coeff sync task. */
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
+ return buffer; /* No data segment. */
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ /* Test whether this is the coefficient update task or not. */
+ *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
+ */
+ PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
+ */
+ PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SETC
+ */
+ 1 /* ADDR */);
+
+ /* Encode a HALT. */
+ *buffer++ = pvr_pds_inst_encode_halt(1);
+
+ /* Reset the default predicate to IF0. */
+ *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
+ */
+ PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
+ */
+ PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETC
+ */
+ 1 /* ADDR */);
+ }
+
+ return buffer;
+}
+
+/**
+ * Generates program to kick the USC task to store shared.
+ *
+ * \param program Pointer to the PDS shared register.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \param dev_info PVR device information structure.
+ * \returns Pointer to just beyond the buffer for the program.
+ */
+uint32_t *pvr_pds_generate_shared_storing_program(
+ struct pvr_pds_shared_storing_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info)
+{
+ struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
+ struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
+
+ if (gen_mode == PDS_GENERATE_SIZES)
+ return NULL;
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ uint32_t *constants = buffer;
+
+ constants =
+ pvr_pds_generate_doutw(doutw_control, constants, gen_mode, dev_info);
+ program->data_size = doutw_control->data_size;
+
+ constants = pvr_pds_kick_usc(kick_usc_program,
+ constants,
+ 0,
+ program->cc_enable,
+ gen_mode);
+ program->data_size += kick_usc_program->data_size;
+
+ return constants;
+ }
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ /* Generate PDS code segment. */
+ uint32_t *instruction = buffer;
+
+ /* doutw vi1, vi0
+ * doutu ds1[constant_use], ds0[constant_use], ds1[constant_use],
+ * emit
+ */
+ instruction =
+ pvr_pds_generate_doutw(doutw_control, buffer, gen_mode, dev_info);
+ program->code_size = doutw_control->code_size;
+
+ /* Offset into data segment follows on from doutw data segment. */
+ instruction = pvr_pds_kick_usc(kick_usc_program,
+ instruction,
+ doutw_control->data_size,
+ program->cc_enable,
+ gen_mode);
+ program->code_size += kick_usc_program->code_size;
+
+ return instruction;
+ }
+
+ /* Execution should never reach here. */
+ return NULL;
+}
+
+uint32_t *pvr_pds_generate_fence_terminate_program(
+ struct pvr_pds_fence_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info)
+{
+ uint32_t data_size = 0;
+ uint32_t code_size = 0;
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ /* Data segment. */
+ uint32_t *constants, *constants_base;
+
+ constants = constants_base = (uint32_t *)buffer;
+
+ /* DOUTC sources are not used, but they must be valid. */
+ pvr_pds_generate_doutc(program, constants, PDS_GENERATE_DATA_SEGMENT);
+ data_size += program->data_size;
+
+ if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
+ /* Append a 64-bit constant with value 1. Used to increment ptemp.
+ * Return the offset into the data segment.
+ */
+ program->fence_constant_word =
+ pvr_pds_append_constant64(constants_base, 1, &data_size);
+ }
+
+ program->data_size = data_size;
+ return constants;
+ }
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ /* Code segment. */
+ uint32_t *instruction = (uint32_t *)buffer;
+
+ instruction = pvr_pds_generate_compute_barrier_conditional(
+ instruction,
+ PDS_GENERATE_CODE_SEGMENT);
+ code_size += 3;
+
+ if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
+ /* lock */
+ *instruction++ = pvr_pds_inst_encode_lock(0); /* cc */
+
+ /* add64 pt[0], pt[0], #1 */
+ *instruction++ = pvr_pds_inst_encode_add64(
+ 0, /* cc */
+ PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
+ PVR_ROGUE_PDSINST_MAD_SNA_ADD,
+ PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER + 0, /* src0 = ptemp[0]
+ */
+ PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+ (program->fence_constant_word >> 1), /* src1 = 1 */
+ PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
+ * ptemp[0]
+ */
+
+ /* release */
+ *instruction++ = pvr_pds_inst_encode_release(0); /* cc */
+
+ /* cmp pt[0] EQ 0x4 == Number of USC clusters per phantom */
+ *instruction++ = pvr_pds_inst_encode_cmpi(
+ 0, /* cc */
+ PVR_ROGUE_PDSINST_COP_EQ,
+ PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0, /* src0
+ * = ptemp[0]
+ */
+ PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0));
+
+ /* bra -1 */
+ *instruction++ =
+ pvr_pds_encode_bra(0, /* cc */
+ 1, /* PVR_ROGUE_PDSINST_BRA_NEG_ENABLE
+ */
+ 0, /* PVR_ROGUE_PDSINST_BRA_SETC_P0
+ */
+ -1); /* bra PC */
+ code_size += 5;
+ }
+
+ /* DOUTC */
+ instruction = pvr_pds_generate_doutc(program,
+ instruction,
+ PDS_GENERATE_CODE_SEGMENT);
+ code_size += program->code_size;
+
+ program->code_size = code_size;
+ return instruction;
+ }
+
+ /* Execution should never reach here. */
+ return NULL;
+}
+
+/**
+ * Generates program to kick the USC task to load shared registers from memory.
+ *
+ * \param program Pointer to the PDS shared register.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \param dev_info PVR device information struct.
+ * \returns Pointer to just beyond the buffer for the program.
+ */
+uint32_t *pvr_pds_generate_compute_shared_loading_program(
+ struct pvr_pds_shared_storing_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info)
+{
+ struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
+ struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
+
+ uint32_t next_constant;
+ uint32_t data_size = 0;
+ uint32_t code_size = 0;
+
+ /* This needs to persist to the CODE_SEGMENT call. */
+ static uint32_t fence_constant_word = 0;
+ uint64_t zero_constant64 = 0;
+
+ if (gen_mode == PDS_GENERATE_SIZES)
+ return NULL;
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ uint32_t *constants = buffer;
+
+ constants = pvr_pds_generate_doutw(doutw_control,
+ constants,
+ PDS_GENERATE_DATA_SEGMENT,
+ dev_info);
+ data_size += doutw_control->data_size;
+
+ constants = pvr_pds_kick_usc(kick_usc_program,
+ constants,
+ 0,
+ program->cc_enable,
+ gen_mode);
+ data_size += kick_usc_program->data_size;
+
+ /* Copy the fence constant value (64-bit). */
+ next_constant = data_size; /* Assumes data words fully packed. */
+ fence_constant_word =
+ pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+ /* Encode the fence constant src0 (offset measured from start of data
+ * buffer). Fence barrier is initialized to zero.
+ */
+ pvr_pds_write_wide_constant(buffer, fence_constant_word, zero_constant64);
+ /* Update the const size. */
+ data_size += 2;
+ constants += 2;
+
+ program->data_size = data_size;
+ return constants;
+ }
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ /* Generate PDS code segment. */
+ uint32_t *instruction = buffer;
+
+ /* add64 pt0, c0, c0
+ * IF [2x Phantoms]
+ * add64 pt1, c0, c0
+ * st [constant_mem_addr], pt0, 4
+ * ENDIF
+ * doutw vi1, vi0
+ * doutu ds1[constant_use], ds0[constant_use], ds1[constant_use],
+ * emit
+ *
+ * Zero the persistent temp (SW fence for context switch).
+ */
+ *instruction++ = pvr_pds_inst_encode_add64(
+ 0, /* cc */
+ PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
+ PVR_ROGUE_PDSINST_MAD_SNA_ADD,
+ PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+ (fence_constant_word >> 1), /* src0
+ * = 0
+ */
+ PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+ (fence_constant_word >> 1), /* src1
+ * = 0
+ */
+ PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest = ptemp64[0]
+ */
+ code_size++;
+
+ instruction = pvr_pds_generate_doutw(doutw_control,
+ instruction,
+ PDS_GENERATE_CODE_SEGMENT,
+ dev_info);
+ code_size += doutw_control->code_size;
+
+ /* Offset into data segment follows on from doutw data segment. */
+ instruction = pvr_pds_kick_usc(kick_usc_program,
+ instruction,
+ doutw_control->data_size,
+ program->cc_enable,
+ gen_mode);
+ code_size += kick_usc_program->code_size;
+
+ program->code_size = code_size;
+ return instruction;
+ }
+
+ /* Execution should never reach here. */
+ return NULL;
+}
+
+/**
+ * Generates both code and data when gen_mode is not PDS_GENERATE_SIZES.
+ * Relies on num_fpu_iterators being initialized for size calculation.
+ * Relies on num_fpu_iterators, destination[], and FPU_iterators[] being
+ * initialized for program generation.
+ *
+ * \param program Pointer to the PDS pixel shader program.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \returns Pointer to just beyond the buffer for the program.
+ */
+uint32_t *pvr_pds_coefficient_loading(
+ struct pvr_pds_coeff_loading_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode)
+{
+ uint32_t constant;
+ uint32_t *instruction;
+ uint32_t total_data_size, code_size;
+
+ /* Place constants at the front of the buffer. */
+ uint32_t *constants = buffer;
+ /* Start counting constants from 0. */
+ uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+
+ /* Save the data segment pointer and size. */
+ program->data_segment = constants;
+
+ total_data_size = 0;
+ code_size = 0;
+
+ total_data_size += 2 * program->num_fpu_iterators;
+ code_size += program->num_fpu_iterators;
+
+ /* Instructions start where constants finished, but we must take note of
+ * alignment.
+ *
+ * 128-bit boundary = 4 dwords.
+ */
+ total_data_size = ALIGN_POT(total_data_size, 4);
+ if (gen_mode != PDS_GENERATE_SIZES) {
+ uint32_t data_size = 0;
+ uint32_t iterator = 0;
+
+ instruction = buffer + total_data_size;
+
+ while (iterator < program->num_fpu_iterators) {
+ uint64_t iterator_word;
+
+ /* Copy the USC task control words to constants. */
+ constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+ /* Write the first iterator. */
+ iterator_word =
+ (uint64_t)program->FPU_iterators[iterator]
+ << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHIFT;
+
+ /* Write the destination. */
+ iterator_word |=
+ (uint64_t)program->destination[iterator++]
+ << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_SHIFT;
+
+ /* If this is the last DOUTI word the "Last Issue" bit should be
+ * set.
+ */
+ if (iterator >= program->num_fpu_iterators) {
+ iterator_word |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_EN;
+ }
+
+ /* Write the word to the buffer. */
+ pvr_pds_write_wide_constant(constants,
+ constant,
+ iterator_word); /* 64-bit
+ Src0
+ */
+
+ /* Write the DOUT instruction. */
+ *instruction++ = pvr_pds_encode_douti(
+ /* cc */ 0,
+ /* END */ 0,
+ /* SRC0 */ constant >> 1); /* DOUT Issue word 0 64-bit */
+ }
+
+ /* Update the last DOUTI instruction to have the END flag set. */
+ *(instruction - 1) |= 1 << PVR_ROGUE_PDSINST_DOUT_END_SHIFT;
+ } else {
+ instruction = NULL;
+ }
+
+ /* Update the data size and code size. Minimum temp count is 1. */
+ program->temps_used = 1;
+ program->data_size = total_data_size;
+ program->code_size = code_size;
+
+ return instruction;
+}
+
+/**
+ * Generate a single ld/st instruction. This can correspond to one or more
+ * real ld/st instructions based on the value of count.
+ *
+ * \param ld true to generate load, false to generate store.
+ * \param control Cache mode control.
+ * \param temp_index Dest temp for load/source temp for store, in 32bits
+ * register index.
+ * \param address Source for load/dest for store in bytes.
+ * \param count Number of dwords for load/store.
+ * \param next_constant
+ * \param total_data_size
+ * \param total_code_size
+ * \param buffer Pointer to the buffer for the program.
+ * \param data_fence Issue data fence.
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \param dev_info PVR device information structure.
+ * \returns Pointer to just beyond the buffer for the program.
+ */
+uint32_t *pvr_pds_generate_single_ldst_instruction(
+ bool ld,
+ const struct pvr_pds_ldst_control *control,
+ uint32_t temp_index,
+ uint64_t address,
+ uint32_t count,
+ uint32_t *next_constant,
+ uint32_t *total_data_size,
+ uint32_t *total_code_size,
+ uint32_t *restrict buffer,
+ bool data_fence,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info)
+{
+ /* A single ld/ST here does NOT actually correspond to a single ld/ST
+ * instruction, but may needs multiple ld/ST instructions because each ld/ST
+ * instruction can only ld/ST a restricted max number of dwords which may
+ * less than count passed here.
+ */
+
+ uint32_t num_inst;
+ uint32_t constant;
+
+ if (ld) {
+ /* ld must operate on 64bits unit, and it needs to load from and to 128
+ * bits aligned. Apart from the last ld, all the other need to ld 2x(x =
+ * 1, 2, ...) times 64bits unit.
+ */
+ uint32_t per_inst_count = 0;
+ uint32_t last_inst_count;
+
+ assert((gen_mode == PDS_GENERATE_SIZES) ||
+ (((count % 2) == 0) && ((address % 16) == 0) &&
+ (temp_index % 2) == 0));
+
+ count >>= 1;
+ temp_index >>= 1;
+
+ /* Found out how many ld instructions are needed and ld size for the all
+ * possible ld instructions.
+ */
+ if (count <= PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE) {
+ num_inst = 1;
+ last_inst_count = count;
+ } else {
+ per_inst_count = PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE;
+ if ((per_inst_count % 2) != 0)
+ per_inst_count -= 1;
+
+ num_inst = count / per_inst_count;
+ last_inst_count = count - per_inst_count * num_inst;
+ num_inst += 1;
+ }
+
+ /* Generate all the instructions. */
+ for (uint32_t i = 0; i < num_inst; i++) {
+ if ((i == (num_inst - 1)) && (last_inst_count == 0))
+ break;
+
+ /* A single load instruction. */
+ constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ uint64_t ld_src0 = 0;
+
+ ld_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
+ << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
+ ld_src0 |= (((uint64_t)((i == num_inst - 1) ? last_inst_count
+ : per_inst_count) &
+ PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
+ << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
+ ld_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+ << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
+
+ if (!control) {
+ ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED;
+
+ if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
+ ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED;
+
+ } else {
+ ld_src0 |= control->cache_control_const;
+ }
+
+ /* Write it to the constant. */
+ pvr_pds_write_constant64(buffer,
+ constant,
+ (uint32_t)(ld_src0),
+ (uint32_t)(ld_src0 >> 32));
+
+ /* Adjust value for next ld instruction. */
+ temp_index += per_inst_count;
+ address += (((uint64_t)(per_inst_count)) << 3);
+ }
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ *buffer++ = pvr_pds_inst_encode_ld(0, constant >> 1);
+
+ if (data_fence)
+ *buffer++ = pvr_pds_inst_encode_wdf(0);
+ }
+ }
+ } else {
+ /* ST needs source memory address to be 32bits aligned. */
+ assert((gen_mode == PDS_GENERATE_SIZES) || ((address % 4) == 0));
+
+ /* Found out how many ST instructions are needed, each ST can only store
+ * PVR_ROGUE_PDSINST_ST_COUNT4_MASK number of 32bits.
+ */
+ num_inst = count / PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE;
+ num_inst += ((count % PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE) == 0 ? 0 : 1);
+
+ /* Generate all the instructions. */
+ for (uint32_t i = 0; i < num_inst; i++) {
+ /* A single store instruction. */
+ constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+ uint32_t per_inst_count =
+ (count <= PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE
+ ? count
+ : PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE);
+ uint64_t st_src0 = 0;
+
+ st_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
+ << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
+ st_src0 |=
+ (((uint64_t)per_inst_count & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
+ << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
+ st_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS32TP_MASK)
+ << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
+
+ if (!control) {
+ st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH;
+
+ if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
+ st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH;
+ }
+
+ } else {
+ st_src0 |= control->cache_control_const;
+ }
+
+ /* Write it to the constant. */
+ pvr_pds_write_constant64(buffer,
+ constant,
+ (uint32_t)(st_src0),
+ (uint32_t)(st_src0 >> 32));
+
+ /* Adjust value for next ST instruction. */
+ temp_index += per_inst_count;
+ count -= per_inst_count;
+ address += (((uint64_t)(per_inst_count)) << 2);
+ }
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ *buffer++ = pvr_pds_inst_encode_st(0, constant >> 1);
+
+ if (data_fence)
+ *buffer++ = pvr_pds_inst_encode_wdf(0);
+ }
+ }
+ }
+
+ (*total_code_size) += num_inst;
+ if (data_fence)
+ (*total_code_size) += num_inst;
+
+ if (gen_mode != PDS_GENERATE_SIZES)
+ return buffer;
+ return NULL;
+}
+
+/**
+ * Generate programs used to prepare stream out, i.e., clear stream out buffer
+ * overflow flags and update Persistent temps by a ld instruction.
+ *
+ * This must be used in PPP state update.
+ *
+ * \param program Pointer to the stream out program.
+ * \param buffer Pointer to the buffer for the program.
+ * \param store_mode If true then the data is stored to memory. If false then
+ * the data is loaded from memory.
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \param dev_info PVR device information structure.
+ * \returns Pointer to just beyond the buffer for the program.
+ */
+uint32_t *pvr_pds_generate_stream_out_init_program(
+ struct pvr_pds_stream_out_init_program *restrict program,
+ uint32_t *restrict buffer,
+ bool store_mode,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info)
+{
+ uint32_t total_data_size = 0;
+ uint32_t PTDst = PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER;
+
+ /* Start counting constants from 0. */
+ uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+
+ uint32_t total_code_size = 1;
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ /* We only need to clear global stream out predicate, other predicates
+ * are not used during the stream out buffer overflow test.
+ */
+ *buffer++ = pvr_pds_inst_encode_stmc(0, 0x10);
+ }
+
+ for (uint32_t index = 0; index < program->num_buffers; index++) {
+ if (program->dev_address_for_buffer_data[index] != 0) {
+ /* Generate load/store program to load/store persistent temps. */
+
+ /* NOTE: store_mode == true case should be handled by
+ * StreamOutTerminate.
+ */
+ buffer = pvr_pds_generate_single_ldst_instruction(
+ !store_mode,
+ NULL,
+ PTDst,
+ program->dev_address_for_buffer_data[index],
+ program->pds_buffer_data_size[index],
+ &next_constant,
+ &total_data_size,
+ &total_code_size,
+ buffer,
+ false,
+ gen_mode,
+ dev_info);
+ }
+
+ PTDst += program->pds_buffer_data_size[index];
+ }
+
+ total_code_size += 2;
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ /* We need to fence the loading. */
+ *buffer++ = pvr_pds_inst_encode_wdf(0);
+ *buffer++ = pvr_pds_inst_encode_halt(0);
+ }
+
+ /* Save size information to program */
+ program->stream_out_init_pds_data_size =
+ ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
+ /* PDS program code size. */
+ program->stream_out_init_pds_code_size = total_code_size;
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
+ return buffer + program->stream_out_init_pds_data_size;
+ else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
+ return buffer;
+
+ return NULL;
+}
+
+/**
+ * Generate stream out terminate program for stream out.
+ *
+ * If pds_persistent_temp_size_to_store is 0, the final primitive written value
+ * will be stored.
+ *
+ * If pds_persistent_temp_size_to_store is non 0, the value of persistent temps
+ * will be stored into memory.
+ *
+ * The stream out terminate program is used to update the PPP state and the data
+ * and code section cannot be separate.
+ *
+ * \param program Pointer to the stream out program.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \param dev_info PVR device info structure.
+ * \returns Pointer to just beyond the buffer for the program.
+ */
+uint32_t *pvr_pds_generate_stream_out_terminate_program(
+ struct pvr_pds_stream_out_terminate_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info)
+{
+ uint32_t next_constant;
+ uint32_t total_data_size = 0, total_code_size = 0;
+
+ /* Start counting constants from 0. */
+ next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+
+ /* Generate store program to store persistent temps. */
+ buffer = pvr_pds_generate_single_ldst_instruction(
+ false,
+ NULL,
+ PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER,
+ program->dev_address_for_storing_persistent_temp,
+ program->pds_persistent_temp_size_to_store,
+ &next_constant,
+ &total_data_size,
+ &total_code_size,
+ buffer,
+ false,
+ gen_mode,
+ dev_info);
+
+ total_code_size += 2;
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ *buffer++ = pvr_pds_inst_encode_wdf(0);
+ *buffer++ = pvr_pds_inst_encode_halt(0);
+ }
+
+ /* Save size information to program. */
+ program->stream_out_terminate_pds_data_size =
+ ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
+ /* PDS program code size. */
+ program->stream_out_terminate_pds_code_size = total_code_size;
+
+ if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
+ return buffer + program->stream_out_terminate_pds_data_size;
+ else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
+ return buffer;
+
+ return NULL;
+}
+
+/* DrawArrays works in several steps:
+ *
+ * 1) load data from draw_indirect buffer
+ * 2) tweak data to match hardware formats
+ * 3) write data to indexblock
+ * 4) signal the VDM to continue
+ *
+ * This is complicated by HW limitations on alignment, as well as a HWBRN.
+ *
+ * 1) Load data.
+ * Loads _must_ be 128-bit aligned. Because there is no such limitation in the
+ * spec we must deal with this by choosing an appropriate earlier address and
+ * loading enough dwords that we load the entirety of the buffer.
+ *
+ * if addr & 0xf:
+ * load [addr & ~0xf] 6 dwords -> tmp[0, 1, 2, 3, 4, 5]
+ * data = tmp[0 + (uiAddr & 0xf) >> 2]...
+ * else
+ * load [addr] 4 dwords -> tmp[0, 1, 2, 3]
+ * data = tmp[0]...
+ *
+ *
+ * 2) Tweak data.
+ * primCount in the spec does not match the encoding of INDEX_INSTANCE_COUNT in
+ * the VDM control stream. We must subtract 1 from the loaded primCount.
+ *
+ * However, there is a HWBRN that disallows the ADD32 instruction from sourcing
+ * a tmp that is non-64-bit-aligned. To work around this, we must move primCount
+ * into another tmp that has the correct alignment. Note: this is only required
+ * when data = tmp[even], as primCount is data+1:
+ *
+ * if data = tmp[even]:
+ * primCount = data + 1 = tmp[odd] -- not 64-bit aligned!
+ * else:
+ * primCount = data + 1 = tmp[even] -- already aligned, don't need workaround.
+ *
+ * This boils down to:
+ *
+ * primCount = data[1]
+ * primCountSrc = data[1]
+ * if brn_present && (data is even):
+ * mov scratch, primCount
+ * primCountSrc = scratch
+ * endif
+ * sub primCount, primCountSrc, 1
+ *
+ * 3) Store Data.
+ * Write the now-tweaked data over the top of the indexblock.
+ * To ensure the write completes before the VDM re-reads the data, we must cause
+ * a data hazard by doing a dummy (dummy meaning we don't care about the
+ * returned data) load from the same addresses. Again, because the ld must
+ * always be 128-bit aligned (note: the ST is dword-aligned), we must ensure the
+ * index block is 128-bit aligned. This is the client driver's responsibility.
+ *
+ * st data[0, 1, 2] -> (idxblock + 4)
+ * load [idxblock] 4 dwords
+ *
+ * 4) Signal the VDM
+ * This is simply a DOUTV with a src1 of 0, indicating the VDM should continue
+ * where it is currently fenced on a dummy idxblock that has been inserted by
+ * the driver.
+ */
+
+#include "pvr_draw_indirect_arrays0.h"
+#include "pvr_draw_indirect_arrays1.h"
+#include "pvr_draw_indirect_arrays2.h"
+#include "pvr_draw_indirect_arrays3.h"
+
+#include "pvr_draw_indirect_arrays_base_instance0.h"
+#include "pvr_draw_indirect_arrays_base_instance1.h"
+#include "pvr_draw_indirect_arrays_base_instance2.h"
+#include "pvr_draw_indirect_arrays_base_instance3.h"
+
+#include "pvr_draw_indirect_arrays_base_instance_drawid0.h"
+#include "pvr_draw_indirect_arrays_base_instance_drawid1.h"
+#include "pvr_draw_indirect_arrays_base_instance_drawid2.h"
+#include "pvr_draw_indirect_arrays_base_instance_drawid3.h"
+
+#define ENABLE_SLC_MCU_CACHE_CONTROLS(device) \
+ ((device)->features.has_slc_mcu_cache_controls \
+ ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED \
+ : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS)
+
+void pvr_pds_generate_draw_arrays_indirect(
+ struct pvr_pds_drawindirect_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info)
+{
+ if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
+ (gen_mode == PDS_GENERATE_SIZES)) {
+ const struct pvr_psc_program_output *psc_program = NULL;
+ switch ((program->arg_buffer >> 2) % 4) {
+ case 0:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ psc_program =
+ &pvr_draw_indirect_arrays_base_instance_drawid0_program;
+ } else {
+ psc_program = &pvr_draw_indirect_arrays_base_instance0_program;
+ }
+ } else {
+ psc_program = &pvr_draw_indirect_arrays0_program;
+ }
+ break;
+ case 1:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ psc_program =
+ &pvr_draw_indirect_arrays_base_instance_drawid1_program;
+ } else {
+ psc_program = &pvr_draw_indirect_arrays_base_instance1_program;
+ }
+ } else {
+ psc_program = &pvr_draw_indirect_arrays1_program;
+ }
+ break;
+ case 2:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ psc_program =
+ &pvr_draw_indirect_arrays_base_instance_drawid2_program;
+ } else {
+ psc_program = &pvr_draw_indirect_arrays_base_instance2_program;
+ }
+ } else {
+ psc_program = &pvr_draw_indirect_arrays2_program;
+ }
+ break;
+ case 3:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ psc_program =
+ &pvr_draw_indirect_arrays_base_instance_drawid3_program;
+ } else {
+ psc_program = &pvr_draw_indirect_arrays_base_instance3_program;
+ }
+ } else {
+ psc_program = &pvr_draw_indirect_arrays3_program;
+ }
+ break;
+ }
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ memcpy(buffer,
+ psc_program->code,
+ psc_program->code_size * sizeof(uint32_t));
+#if defined(DUMP_PDS)
+ for (uint32_t i = 0; i < psc_program->code_size; i++)
+ PVR_PDS_PRINT_INST(buffer[i]);
+#endif
+ }
+
+ program->program = *psc_program;
+ } else {
+ switch ((program->arg_buffer >> 2) % 4) {
+ case 0:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ pvr_write_draw_indirect_arrays_base_instance_drawid0_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_arrays_base_instance_drawid0_write_vdm(
+ buffer,
+ program->index_list_addr_buffer + 4);
+ pvr_write_draw_indirect_arrays_base_instance_drawid0_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_arrays_base_instance_drawid0_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_arrays_base_instance_drawid0_immediates(
+ buffer);
+ } else {
+ pvr_write_draw_indirect_arrays_base_instance0_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_arrays_base_instance0_write_vdm(
+ buffer,
+ program->index_list_addr_buffer + 4);
+ pvr_write_draw_indirect_arrays_base_instance0_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_arrays_base_instance0_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_arrays_base_instance0_immediates(buffer);
+ }
+ } else {
+ pvr_write_draw_indirect_arrays0_di_data(buffer,
+ program->arg_buffer &
+ ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_arrays0_write_vdm(
+ buffer,
+ program->index_list_addr_buffer + 4);
+ pvr_write_draw_indirect_arrays0_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_arrays0_num_views(buffer,
+ program->num_views);
+ pvr_write_draw_indirect_arrays0_immediates(buffer);
+ }
+ break;
+ case 1:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ pvr_write_draw_indirect_arrays_base_instance_drawid1_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_arrays_base_instance_drawid1_write_vdm(
+ buffer,
+ program->index_list_addr_buffer + 4);
+ pvr_write_draw_indirect_arrays_base_instance_drawid1_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_arrays_base_instance_drawid1_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_arrays_base_instance_drawid1_immediates(
+ buffer);
+ } else {
+ pvr_write_draw_indirect_arrays_base_instance1_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_arrays_base_instance1_write_vdm(
+ buffer,
+ program->index_list_addr_buffer + 4);
+ pvr_write_draw_indirect_arrays_base_instance1_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_arrays_base_instance1_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_arrays_base_instance1_immediates(buffer);
+ }
+ } else {
+ pvr_write_draw_indirect_arrays1_di_data(buffer,
+ program->arg_buffer &
+ ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_arrays1_write_vdm(
+ buffer,
+ program->index_list_addr_buffer + 4);
+ pvr_write_draw_indirect_arrays1_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_arrays1_num_views(buffer,
+ program->num_views);
+ pvr_write_draw_indirect_arrays1_immediates(buffer);
+ }
+ break;
+ case 2:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ pvr_write_draw_indirect_arrays_base_instance_drawid2_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_arrays_base_instance_drawid2_write_vdm(
+ buffer,
+ program->index_list_addr_buffer + 4);
+ pvr_write_draw_indirect_arrays_base_instance_drawid2_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_arrays_base_instance_drawid2_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_arrays_base_instance_drawid2_immediates(
+ buffer);
+ } else {
+ pvr_write_draw_indirect_arrays_base_instance2_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_arrays_base_instance2_write_vdm(
+ buffer,
+ program->index_list_addr_buffer + 4);
+ pvr_write_draw_indirect_arrays_base_instance2_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_arrays_base_instance2_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_arrays_base_instance2_immediates(buffer);
+ }
+ } else {
+ pvr_write_draw_indirect_arrays2_di_data(buffer,
+ program->arg_buffer &
+ ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_arrays2_write_vdm(
+ buffer,
+ program->index_list_addr_buffer + 4);
+ pvr_write_draw_indirect_arrays2_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_arrays2_num_views(buffer,
+ program->num_views);
+ pvr_write_draw_indirect_arrays2_immediates(buffer);
+ }
+ break;
+ case 3:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ pvr_write_draw_indirect_arrays_base_instance_drawid3_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_arrays_base_instance_drawid3_write_vdm(
+ buffer,
+ program->index_list_addr_buffer + 4);
+ pvr_write_draw_indirect_arrays_base_instance_drawid3_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_arrays_base_instance_drawid3_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_arrays_base_instance_drawid3_immediates(
+ buffer);
+ } else {
+ pvr_write_draw_indirect_arrays_base_instance3_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_arrays_base_instance3_write_vdm(
+ buffer,
+ program->index_list_addr_buffer + 4);
+ pvr_write_draw_indirect_arrays_base_instance3_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_arrays_base_instance3_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_arrays_base_instance3_immediates(buffer);
+ }
+ } else {
+ pvr_write_draw_indirect_arrays3_di_data(buffer,
+ program->arg_buffer &
+ ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_arrays3_write_vdm(
+ buffer,
+ program->index_list_addr_buffer + 4);
+ pvr_write_draw_indirect_arrays3_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_arrays3_num_views(buffer,
+ program->num_views);
+ pvr_write_draw_indirect_arrays3_immediates(buffer);
+ }
+ break;
+ }
+ }
+}
+
+#include "pvr_draw_indirect_elements0.h"
+#include "pvr_draw_indirect_elements1.h"
+#include "pvr_draw_indirect_elements2.h"
+#include "pvr_draw_indirect_elements3.h"
+#include "pvr_draw_indirect_elements_base_instance0.h"
+#include "pvr_draw_indirect_elements_base_instance1.h"
+#include "pvr_draw_indirect_elements_base_instance2.h"
+#include "pvr_draw_indirect_elements_base_instance3.h"
+#include "pvr_draw_indirect_elements_base_instance_drawid0.h"
+#include "pvr_draw_indirect_elements_base_instance_drawid1.h"
+#include "pvr_draw_indirect_elements_base_instance_drawid2.h"
+#include "pvr_draw_indirect_elements_base_instance_drawid3.h"
+
+void pvr_pds_generate_draw_elements_indirect(
+ struct pvr_pds_drawindirect_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info)
+{
+ if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
+ (gen_mode == PDS_GENERATE_SIZES)) {
+ const struct pvr_psc_program_output *psc_program = NULL;
+ switch ((program->arg_buffer >> 2) % 4) {
+ case 0:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ psc_program =
+ &pvr_draw_indirect_elements_base_instance_drawid0_program;
+ } else {
+ psc_program = &pvr_draw_indirect_elements_base_instance0_program;
+ }
+ } else {
+ psc_program = &pvr_draw_indirect_elements0_program;
+ }
+ break;
+ case 1:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ psc_program =
+ &pvr_draw_indirect_elements_base_instance_drawid1_program;
+ } else {
+ psc_program = &pvr_draw_indirect_elements_base_instance1_program;
+ }
+ } else {
+ psc_program = &pvr_draw_indirect_elements1_program;
+ }
+ break;
+ case 2:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ psc_program =
+ &pvr_draw_indirect_elements_base_instance_drawid2_program;
+ } else {
+ psc_program = &pvr_draw_indirect_elements_base_instance2_program;
+ }
+ } else {
+ psc_program = &pvr_draw_indirect_elements2_program;
+ }
+ break;
+ case 3:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ psc_program =
+ &pvr_draw_indirect_elements_base_instance_drawid3_program;
+ } else {
+ psc_program = &pvr_draw_indirect_elements_base_instance3_program;
+ }
+ } else {
+ psc_program = &pvr_draw_indirect_elements3_program;
+ }
+ break;
+ }
+
+ if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+ memcpy(buffer,
+ psc_program->code,
+ psc_program->code_size * sizeof(uint32_t));
+
+#if defined(DUMP_PDS)
+ for (uint32_t i = 0; i < psc_program->code_size; i++)
+ PVR_PDS_PRINT_INST(buffer[i]);
+#endif
+ }
+
+ program->program = *psc_program;
+ } else {
+ switch ((program->arg_buffer >> 2) % 4) {
+ case 0:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ pvr_write_draw_indirect_elements_base_instance_drawid0_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_elements_base_instance_drawid0_write_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance_drawid0_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance_drawid0_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_elements_base_instance_drawid0_idx_stride(
+ buffer,
+ program->index_stride);
+ pvr_write_draw_indirect_elements_base_instance_drawid0_idx_base(
+ buffer,
+ program->index_buffer);
+ pvr_write_draw_indirect_elements_base_instance_drawid0_idx_header(
+ buffer,
+ program->index_block_header);
+ pvr_write_draw_indirect_elements_base_instance_drawid0_immediates(
+ buffer);
+ } else {
+ pvr_write_draw_indirect_elements_base_instance0_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_elements_base_instance0_write_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance0_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance0_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_elements_base_instance0_idx_stride(
+ buffer,
+ program->index_stride);
+ pvr_write_draw_indirect_elements_base_instance0_idx_base(
+ buffer,
+ program->index_buffer);
+ pvr_write_draw_indirect_elements_base_instance0_idx_header(
+ buffer,
+ program->index_block_header);
+ pvr_write_draw_indirect_elements_base_instance0_immediates(
+ buffer);
+ }
+ } else {
+ pvr_write_draw_indirect_elements0_di_data(buffer,
+ program->arg_buffer &
+ ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_elements0_write_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements0_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements0_num_views(buffer,
+ program->num_views);
+ pvr_write_draw_indirect_elements0_idx_stride(buffer,
+ program->index_stride);
+ pvr_write_draw_indirect_elements0_idx_base(buffer,
+ program->index_buffer);
+ pvr_write_draw_indirect_elements0_idx_header(
+ buffer,
+ program->index_block_header);
+ pvr_write_draw_indirect_elements0_immediates(buffer);
+ }
+ break;
+ case 1:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ pvr_write_draw_indirect_elements_base_instance_drawid1_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_elements_base_instance_drawid1_write_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance_drawid1_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance_drawid1_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_elements_base_instance_drawid1_idx_stride(
+ buffer,
+ program->index_stride);
+ pvr_write_draw_indirect_elements_base_instance_drawid1_idx_base(
+ buffer,
+ program->index_buffer);
+ pvr_write_draw_indirect_elements_base_instance_drawid1_idx_header(
+ buffer,
+ program->index_block_header);
+ pvr_write_draw_indirect_elements_base_instance_drawid1_immediates(
+ buffer);
+ } else {
+ pvr_write_draw_indirect_elements_base_instance1_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_elements_base_instance1_write_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance1_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance1_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_elements_base_instance1_idx_stride(
+ buffer,
+ program->index_stride);
+ pvr_write_draw_indirect_elements_base_instance1_idx_base(
+ buffer,
+ program->index_buffer);
+ pvr_write_draw_indirect_elements_base_instance1_idx_header(
+ buffer,
+ program->index_block_header);
+ pvr_write_draw_indirect_elements_base_instance1_immediates(
+ buffer);
+ }
+ } else {
+ pvr_write_draw_indirect_elements1_di_data(buffer,
+ program->arg_buffer &
+ ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_elements1_write_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements1_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements1_num_views(buffer,
+ program->num_views);
+ pvr_write_draw_indirect_elements1_idx_stride(buffer,
+ program->index_stride);
+ pvr_write_draw_indirect_elements1_idx_base(buffer,
+ program->index_buffer);
+ pvr_write_draw_indirect_elements1_idx_header(
+ buffer,
+ program->index_block_header);
+ pvr_write_draw_indirect_elements1_immediates(buffer);
+ }
+ break;
+ case 2:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ pvr_write_draw_indirect_elements_base_instance_drawid2_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_elements_base_instance_drawid2_write_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance_drawid2_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance_drawid2_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_elements_base_instance_drawid2_idx_stride(
+ buffer,
+ program->index_stride);
+ pvr_write_draw_indirect_elements_base_instance_drawid2_idx_base(
+ buffer,
+ program->index_buffer);
+ pvr_write_draw_indirect_elements_base_instance_drawid2_idx_header(
+ buffer,
+ program->index_block_header);
+ pvr_write_draw_indirect_elements_base_instance_drawid2_immediates(
+ buffer);
+ } else {
+ pvr_write_draw_indirect_elements_base_instance2_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_elements_base_instance2_write_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance2_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance2_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_elements_base_instance2_idx_stride(
+ buffer,
+ program->index_stride);
+ pvr_write_draw_indirect_elements_base_instance2_idx_base(
+ buffer,
+ program->index_buffer);
+ pvr_write_draw_indirect_elements_base_instance2_idx_header(
+ buffer,
+ program->index_block_header);
+ pvr_write_draw_indirect_elements_base_instance2_immediates(
+ buffer);
+ }
+ } else {
+ pvr_write_draw_indirect_elements2_di_data(buffer,
+ program->arg_buffer &
+ ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_elements2_write_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements2_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements2_num_views(buffer,
+ program->num_views);
+ pvr_write_draw_indirect_elements2_idx_stride(buffer,
+ program->index_stride);
+ pvr_write_draw_indirect_elements2_idx_base(buffer,
+ program->index_buffer);
+ pvr_write_draw_indirect_elements2_idx_header(
+ buffer,
+ program->index_block_header);
+ pvr_write_draw_indirect_elements2_immediates(buffer);
+ }
+ break;
+ case 3:
+ if (program->support_base_instance) {
+ if (program->increment_draw_id) {
+ pvr_write_draw_indirect_elements_base_instance_drawid3_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_elements_base_instance_drawid3_write_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance_drawid3_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance_drawid3_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_elements_base_instance_drawid3_idx_stride(
+ buffer,
+ program->index_stride);
+ pvr_write_draw_indirect_elements_base_instance_drawid3_idx_base(
+ buffer,
+ program->index_buffer);
+ pvr_write_draw_indirect_elements_base_instance_drawid3_idx_header(
+ buffer,
+ program->index_block_header);
+ pvr_write_draw_indirect_elements_base_instance_drawid3_immediates(
+ buffer);
+ } else {
+ pvr_write_draw_indirect_elements_base_instance3_di_data(
+ buffer,
+ program->arg_buffer & ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_elements_base_instance3_write_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance3_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements_base_instance3_num_views(
+ buffer,
+ program->num_views);
+ pvr_write_draw_indirect_elements_base_instance3_idx_stride(
+ buffer,
+ program->index_stride);
+ pvr_write_draw_indirect_elements_base_instance3_idx_base(
+ buffer,
+ program->index_buffer);
+ pvr_write_draw_indirect_elements_base_instance3_idx_header(
+ buffer,
+ program->index_block_header);
+ pvr_write_draw_indirect_elements_base_instance3_immediates(
+ buffer);
+ }
+ } else {
+ pvr_write_draw_indirect_elements3_di_data(buffer,
+ program->arg_buffer &
+ ~0xfull,
+ dev_info);
+ pvr_write_draw_indirect_elements3_write_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements3_flush_vdm(
+ buffer,
+ program->index_list_addr_buffer);
+ pvr_write_draw_indirect_elements3_num_views(buffer,
+ program->num_views);
+ pvr_write_draw_indirect_elements3_idx_stride(buffer,
+ program->index_stride);
+ pvr_write_draw_indirect_elements3_idx_base(buffer,
+ program->index_buffer);
+ pvr_write_draw_indirect_elements3_idx_header(
+ buffer,
+ program->index_block_header);
+ pvr_write_draw_indirect_elements3_immediates(buffer);
+ }
+ break;
+ }
+ }
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_PDS_H
+#define PVR_PDS_H
+
+#include <stdbool.h>
+
+#include "pvr_device_info.h"
+#include "pvr_limits.h"
+#include "pds/pvr_rogue_pds_defs.h"
+#include "util/macros.h"
+
+#ifdef __cplusplus
+# define restrict __restrict__
+#endif
+
+/*****************************************************************************
+ Macro definitions
+*****************************************************************************/
+
+/* Based on Maximum number of passes that may emit DOUTW x Maximum number that
+ * might be emitted.
+ */
+#define PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW 6
+/* Based on Maximum number of passes that may emit DOUTW x Maximum number that
+ * might be emitted.
+ */
+#define PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW 3
+/* Based on max(max(UBOs,cbuffers), numTextures). */
+#define PVR_PDS_MAX_NUM_DMA_KICKS 32
+#define PVR_PDS_NUM_VERTEX_STREAMS 32
+#define PVR_PDS_NUM_VERTEX_ELEMENTS 32
+#define PVR_MAXIMUM_ITERATIONS 128
+
+#define PVR_PDS_NUM_COMPUTE_INPUT_REGS 3
+
+#define PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info) \
+ PVR_HAS_FEATURE(dev_info, compute_morton_capable) && \
+ !PVR_HAS_ERN(dev_info, 45493)
+
+/* FIXME: Change BIL to SPV. */
+/* Any variable location can have at most 4 32-bit components. */
+#define BIL_COMPONENTS_PER_LOCATION 4
+
+/* Maximum number of DDMAD's that may be performed (Num attribs * Num DMA's per
+ * attribute).
+ */
+#define PVR_MAX_VERTEX_ATTRIB_DMAS \
+ (PVR_MAX_VERTEX_INPUT_BINDINGS * BIL_COMPONENTS_PER_LOCATION)
+
+/*****************************************************************************
+ Typedefs
+*****************************************************************************/
+
+/* FIXME: We might need to change some bools to this. */
+typedef uint32_t PVR_PDS_BOOL;
+
+/*****************************************************************************
+ Enums
+*****************************************************************************/
+
+enum pvr_pds_generate_mode {
+ PDS_GENERATE_SIZES,
+ PDS_GENERATE_CODE_SEGMENT,
+ PDS_GENERATE_DATA_SEGMENT,
+ PDS_GENERATE_CODEDATA_SEGMENTS
+};
+
+enum pvr_pds_store_type { PDS_COMMON_STORE, PDS_UNIFIED_STORE };
+
+enum pvr_pds_vertex_attrib_program_type {
+ PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC,
+ PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE,
+ PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT,
+ PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT
+};
+
+/*****************************************************************************
+ Structure definitions
+*****************************************************************************/
+
+struct pvr_psc_register {
+ uint32_t num;
+
+ unsigned int size; /* size of each element. */
+ unsigned int dim : 4; /* max number of elements. */
+ unsigned int index; /* offset into array. */
+
+ unsigned int cast;
+
+ unsigned int type;
+ uint64_t name;
+ bool auto_assign;
+ unsigned int original_type;
+};
+
+struct pvr_psc_program_output {
+ const uint32_t *code;
+
+ struct pvr_psc_register *data;
+ unsigned int data_count;
+
+ unsigned int data_size_aligned;
+ unsigned int code_size_aligned;
+ unsigned int temp_size_aligned;
+
+ unsigned int data_size;
+ unsigned int code_size;
+ unsigned int temp_size;
+
+ void (*write_data)(void *data, uint32_t *buffer);
+};
+
+struct pvr_pds_usc_task_control {
+ uint64_t src0;
+};
+
+/* Up to 4 64-bit state words currently supported. */
+#define PVR_PDS_MAX_NUM_DOUTW_CONSTANTS 4
+
+/* Structure for DOUTW. */
+struct pvr_pds_doutw_control {
+ enum pvr_pds_store_type dest_store;
+ uint32_t num_const64;
+ uint64_t doutw_data[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
+ bool last_instruction;
+
+ uint32_t *data_segment;
+ uint32_t data_size;
+ uint32_t code_size;
+};
+
+/* Structure representing the PDS pixel event program.
+ *
+ * data_segment - pointer to the data segment
+ * task_control - USC task control words
+ * emit_words - array of Emit words
+ * data_size - size of data segment
+ * code_size - size of code segment
+ */
+struct pvr_pds_event_program {
+ uint32_t *data_segment;
+ struct pvr_pds_usc_task_control task_control;
+
+ uint32_t num_emit_word_pairs;
+ uint32_t *emit_words;
+
+ uint32_t data_size;
+ uint32_t code_size;
+};
+
+/*
+ * Structure representing the PDS pixel shader secondary attribute program.
+ *
+ * data_segment - pointer to the data segment
+ *
+ * num_uniform_dma_kicks - number of Uniform DMA kicks
+ * uniform_dma_control - array of Uniform DMA control words
+ * uniform_dma_address - array of Uniform DMA address words
+ *
+ * num_texture_dma_kicks - number of Texture State DMA kicks
+ * texture_dma_control - array of Texture State DMA control words
+ * texture_dma_address - array of Texture State DMA address words
+ *
+ * data_size - size of data segment
+ * code_size - size of code segment
+ *
+ * temps_used - PDS Temps
+ */
+struct pvr_pds_pixel_shader_sa_program {
+ uint32_t *data_segment;
+
+ uint32_t num_dword_doutw;
+ uint32_t dword_doutw_value[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
+ uint32_t dword_doutw_control[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
+
+ uint32_t num_q_word_doutw;
+ uint32_t q_word_doutw_value[2 * PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
+ uint32_t q_word_doutw_control[PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
+
+ uint32_t num_uniform_dma_kicks;
+ uint64_t uniform_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
+ uint32_t uniform_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
+
+ uint32_t num_texture_dma_kicks;
+ uint64_t texture_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
+ uint32_t texture_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
+
+ bool kick_usc;
+ bool write_tile_position;
+ uint32_t tile_position_attr_dest;
+ struct pvr_pds_usc_task_control usc_task_control;
+
+ bool clear;
+ uint32_t *clear_color;
+ uint32_t clear_color_dest_reg;
+ bool packed_clear;
+
+ uint32_t data_size;
+ uint32_t code_size;
+
+ uint32_t temps_used;
+};
+
+/* Structure representing the PDS pixel shader program.
+ *
+ * data_segment - pointer to the data segment
+ * usc_task_control - array of USC task control words
+ *
+ * data_size - size of data segment
+ * code_size - size of code segment
+ */
+struct pvr_pds_kickusc_program {
+ uint32_t *data_segment;
+ struct pvr_pds_usc_task_control usc_task_control;
+
+ uint32_t data_size;
+ uint32_t code_size;
+};
+
+/* Structure representing the PDS fence/doutc program.
+ *
+ * data_segment - pointer to the data segment
+ * data_size - size of data segment
+ * code_size - size of code segment
+ */
+struct pvr_pds_fence_program {
+ uint32_t *data_segment;
+ uint32_t fence_constant_word;
+ uint32_t data_size;
+ uint32_t code_size;
+};
+
+/* Structure representing the PDS coefficient loading.
+ *
+ * data_segment - pointer to the data segment
+ * num_fpu_iterators - number of FPU iterators
+ * FPU_iterators - array of FPU iterator control words
+ * destination - array of Common Store destinations
+ *
+ * data_size - size of data segment
+ * code_size - size of code segment
+ */
+struct pvr_pds_coeff_loading_program {
+ uint32_t *data_segment;
+ uint32_t num_fpu_iterators;
+ uint32_t FPU_iterators[PVR_MAXIMUM_ITERATIONS];
+ uint32_t destination[PVR_MAXIMUM_ITERATIONS];
+
+ uint32_t data_size;
+ uint32_t code_size;
+
+ uint32_t temps_used;
+};
+
+/* Structure representing the PDS vertex shader secondary attribute program.
+ *
+ * data_segment - pointer to the data segment
+ * num_dma_kicks - number of DMA kicks
+ * dma_control - array of DMA control words
+ * dma_address - array of DMA address words
+ *
+ * data_size - size of data segment
+ * code_size - size of code segment
+ */
+struct pvr_pds_vertex_shader_sa_program {
+ uint32_t *data_segment;
+
+ /* num_uniform_dma_kicks, uniform_dma_address, uniform_dma_control, are not
+ * used for generating PDS data section and code section, they are currently
+ * only used to simpler the driver implementation. The driver should correct
+ * these information into num_dma_kicks, dma_address and dma_control to get
+ * the PDS properly generated.
+ */
+
+ uint32_t num_dword_doutw;
+ uint32_t dword_doutw_value[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
+ uint32_t dword_doutw_control[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
+
+ uint32_t num_q_word_doutw;
+ uint32_t q_word_doutw_value[2 * PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
+ uint32_t q_word_doutw_control[PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
+
+ uint32_t num_uniform_dma_kicks;
+ uint64_t uniform_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
+ uint32_t uniform_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
+
+ uint32_t num_texture_dma_kicks;
+ uint64_t texture_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
+ uint32_t texture_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
+
+ uint32_t num_dma_kicks;
+ uint64_t dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
+ uint32_t dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
+
+ bool kick_usc;
+ struct pvr_pds_usc_task_control usc_task_control;
+
+ /* Shared register buffer base address (VDM/CDM context load case only). */
+ bool clear_pds_barrier;
+
+ uint32_t data_size;
+ uint32_t code_size;
+};
+
+/* Structure representing a PDS vertex stream element.
+ *
+ * There are two types of element, repeat DMA and non-repeat DMA.
+ *
+ * Non repeat DMA are the classic DMA of some number of bytes from an offset
+ * into contiguous registers. It is assumed the address and size are dword
+ * aligned. To use this, specify 0 for the component size. Each four bytes read
+ * will go to the next HW register.
+ *
+ * Repeat DMA enables copying of sub dword amounts at non dword aligned
+ * addresses. To use this, specify the component size as either 1,2,3 or 4
+ * bytes. Size specifies the number of components, and each component read
+ * will go to the next HW register.
+ *
+ * In both cases, HW registers are written contiguously.
+ *
+ * offset - offset of the vertex stream element
+ * size - size of the vertex stream element in bytes for non repeat DMA, or
+ * number of components for repeat DMA.
+ * reg - first vertex stream element register to DMA to.
+ * component_size - Size of component for repeat DMA, or 0 for non repeat dma.
+ */
+struct pvr_pds_vertex_element {
+ uint32_t offset;
+ uint32_t size;
+ uint16_t reg;
+ uint16_t component_size;
+};
+
+/* Structure representing a PDS vertex stream.
+ *
+ * instance_data - flag whether the vertex stream is indexed or instance data
+ * read_back - If True, vertex is reading back data output by GPU earlier in
+ * same kick. This will enable MCU coherency if relevant.
+ * multiplier - vertex stream frequency multiplier
+ * shift - vertex stream frequency shift
+ * address - vertex stream address in bytes
+ * buffer_size_in_bytes - buffer size in bytes if vertex attribute is sourced
+ * from buffer object
+ * stride - vertex stream stride in bytes
+ * num_vertices - number of vertices in buffer. Used for OOB checking.
+ - 0 = disable oob checking.
+ * num_elements - number of vertex stream elements
+ * elements - array of vertex stream elements
+ * use_ddmadt - When the has_pds_ddmadt feature is enabled. Boolean allowing
+ * DDMADT to be use per stream element.
+ */
+struct pvr_pds_vertex_stream {
+ bool current_state;
+ bool instance_data;
+ bool read_back;
+ uint32_t multiplier;
+ uint32_t shift;
+ uint64_t address;
+ uint32_t buffer_size_in_bytes;
+ uint32_t stride;
+ uint32_t num_vertices;
+ uint32_t num_elements;
+ struct pvr_pds_vertex_element elements[PVR_PDS_NUM_VERTEX_ELEMENTS];
+
+ bool use_ddmadt;
+};
+
+/* Structure representing the PDS vertex shader program.
+ *
+ * This structure describes the USC code and vertex buffers required
+ * by the PDS vertex loading program.
+ *
+ * data_segment - Pointer to the data segment.
+ * usc_task_control - Description of USC task for vertex shader program.
+ * num_streams - Number of vertex streams.
+ * iterate_vtx_id - If set, the vertex id should be iterated.
+ * vtx_id_register - The register to iterate the VertexID into (if applicable)
+ * vtx_id_modifier - Value to pvr_add/SUB from index value received by PDS.
+ * This is used because the index value received by PDS has
+ * INDEX_OFFSET added, and generally VertexID wouldn't.
+ * vtx_id_sub_modifier - If true, vtx_id_modifier is subtracted, else added.
+ * iterate_instance_id - If set, the instance id should be iterated.
+ * instance_id_register - The register to iterate the InstanceID into (if
+ * applicable). The vertex and instance id will both be
+ * iterated as unsigned ints
+ *
+ * iterate_remap_id - Should be set to true if vertex shader needs
+ * VS_REMAPPED_INDEX_ID (e.g. Another TA shader runs after
+ * it).
+ * null_idx - Indicates no index buffer is bound, so every index should be
+ * null_idx_value.
+ * null_idx_value - The value to use as index if null_idx set.
+ * data_size - Size of data segment, in dwords. Output by call to
+ * pvr_pds_vertex_shader, and used as input when generating data.
+ * code_size - Size of code segment. Output by call to pvr_pds_vertex_shader.
+ * This is the number of dword instructions that are/were generated.
+ * temps_used - Number of temporaries used. Output by call to
+ * pvr_pds_vertex_shader.
+ */
+struct pvr_pds_vertex_shader_program {
+ uint32_t *data_segment;
+ struct pvr_pds_usc_task_control usc_task_control;
+ uint32_t num_streams;
+
+ bool iterate_vtx_id;
+ uint32_t vtx_id_register;
+ uint32_t vtx_id_modifier;
+ bool vtx_id_sub_modifier;
+
+ bool iterate_instance_id;
+ uint32_t instance_id_register;
+ uint32_t instance_ID_modifier;
+ uint32_t base_instance;
+
+ bool iterate_remap_id;
+
+ bool null_idx;
+ uint32_t null_idx_value;
+
+ uint32_t *stream_patch_offsets;
+ uint32_t num_stream_patches;
+
+ uint32_t data_size;
+ uint32_t code_size;
+ uint32_t temps_used;
+ uint32_t ddmadt_enables;
+ uint32_t skip_stream_flag;
+
+ bool draw_indirect;
+ bool indexed;
+
+ struct pvr_pds_vertex_stream streams[PVR_PDS_NUM_VERTEX_STREAMS];
+};
+
+/* Structure representing PDS shared reg storing program. */
+struct pvr_pds_shared_storing_program {
+ struct pvr_pds_doutw_control doutw_control; /*!< DOUTW state */
+ struct pvr_pds_kickusc_program usc_task; /*!< DOUTU state */
+ bool cc_enable; /*!< cc bit is set on the doutu instruction. */
+ uint32_t data_size; /*!< total data size, non-aligned. */
+ uint32_t code_size; /*!< total code size, non-aligned. */
+};
+
+#define PVR_MAX_STREAMOUT_BUFFERS 4
+
+/* Structure representing stream out init PDS programs. */
+struct pvr_pds_stream_out_init_program {
+ /* --- Input to PDS_STREAM_OUT_INT_PROGRAM --- */
+
+ /* Number of buffers to load/store.
+ * This indicates the number of entries in the next two arrays.
+ * Data is loaded/stored contiguously to persistent temps.
+ */
+ uint32_t num_buffers;
+
+ /* Number of persistent temps in dword to load/store for each buffer. */
+ uint32_t pds_buffer_data_size[PVR_MAX_STREAMOUT_BUFFERS];
+ /* The device address for loading/storing persistent temps for each buffer.
+ * If address is zero, then no data is loaded/stored
+ * into pt registers for the buffer.
+ */
+ uint64_t dev_address_for_buffer_data[PVR_MAX_STREAMOUT_BUFFERS];
+
+ /* PDS state update Stream Out Init Programs. */
+ uint32_t stream_out_init_pds_data_size;
+ uint32_t stream_out_init_pds_code_size;
+};
+
+/* Structure representing stream out terminate PDS program. */
+struct pvr_pds_stream_out_terminate_program {
+ /* Input to PDS_STREAM_OUT_TERMINATE_PROGRAM.
+ *
+ * Number of persistent temps in dword used in stream out PDS programs needs
+ * to be stored.
+ * The terminate program writes pds_persistent_temp_size_to_store number
+ * persistent temps to dev_address_for_storing_persistent_temp.
+ */
+ uint32_t pds_persistent_temp_size_to_store;
+
+ /* The device address for storing persistent temps. */
+ uint64_t dev_address_for_storing_persistent_temp;
+
+ /* PPP state update Stream Out Program for stream out terminate. */
+ uint32_t stream_out_terminate_pds_data_size;
+ uint32_t stream_out_terminate_pds_code_size;
+};
+
+/* Structure representing the PDS compute shader program.
+ * This structure describes the USC code and compute buffers required
+ * by the PDS compute task loading program
+ *
+ * data_segment
+ * pointer to the data segment
+ * usc_task_control
+ * Description of USC task for compute shader program.
+ * data_size
+ * Size of data segment, in dwords.
+ * Output by call to pvr_pds_compute_shader, and used as input when
+ * generating data. code_size Size of code segment. Output by call to
+ * pvr_pds_compute_shader. This is the number of dword instructions that
+ * are/were generated. temps_used Number of temporaries used. Output by call
+ *to pvr_pds_compute_shader. highest_temp The highest temp number used. Output
+ *by call to pvr_pds_compute_shader coeff_update_task_branch_size The number of
+ * instructions we need to branch over to skip the coefficient update task.
+ */
+
+struct pvr_pds_compute_shader_program {
+ uint32_t *data_segment;
+ struct pvr_pds_usc_task_control usc_task_control;
+ struct pvr_pds_usc_task_control usc_task_control_coeff_update;
+
+ uint32_t data_size;
+ uint32_t code_size;
+
+ uint32_t temps_used;
+ uint32_t highest_temp;
+
+ uint32_t local_input_regs[3];
+ uint32_t work_group_input_regs[3];
+ uint32_t global_input_regs[3];
+
+ uint32_t barrier_coefficient;
+
+ bool fence;
+
+ bool flattened_work_groups;
+
+ bool clear_pds_barrier;
+
+ bool has_coefficient_update_task;
+
+ uint32_t coeff_update_task_branch_size;
+
+ bool add_base_workgroup;
+ uint32_t base_workgroup_constant_offset_in_dwords[3];
+
+ bool kick_usc;
+
+ bool conditional_render;
+ uint32_t cond_render_const_offset_in_dwords;
+ uint32_t cond_render_pred_temp;
+};
+struct pvr_pds_ldst_control {
+ uint64_t cache_control_const;
+};
+
+/* Define a value we can use as a register number in the driver to denote that
+ * the value is unused.
+ */
+#define PVR_PDS_COMPUTE_INPUT_REG_UNUSED 0xFFFFFFFFU
+
+/*****************************************************************************
+ function declarations
+*****************************************************************************/
+
+/*****************************************************************************
+ Constructors
+*****************************************************************************/
+
+void pvr_pds_pixel_shader_sa_initialize(
+ struct pvr_pds_pixel_shader_sa_program *program);
+void pvr_pds_compute_shader_initialize(
+ struct pvr_pds_compute_shader_program *program);
+
+/* Utility */
+
+uint32_t pvr_pds_append_constant64(uint32_t *constants,
+ uint64_t constant_value,
+ uint32_t *data_size);
+
+uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
+ uint64_t *dma_address,
+ uint32_t dest_offset,
+ uint32_t dma_size,
+ uint64_t src_address,
+ const struct pvr_device_info *dev_info);
+
+void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control,
+ uint64_t execution_address,
+ uint32_t usc_temps,
+ uint32_t sample_rate,
+ uint32_t phase_rate_change);
+
+/* Pixel */
+#define pvr_pds_set_sizes_pixel_shader(X) \
+ pvr_pds_kick_usc(X, NULL, 0, false, PDS_GENERATE_SIZES)
+#define pvr_pds_generate_pixel_shader_program(X, Y) \
+ pvr_pds_kick_usc(X, Y, 0, false, PDS_GENERATE_CODEDATA_SEGMENTS)
+
+#define pvr_pds_generate_VDM_sync_program(X, Y) \
+ pvr_pds_kick_usc(X, Y, 0, false, PDS_GENERATE_CODEDATA_SEGMENTS)
+
+uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode);
+
+uint32_t *
+pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict psControl,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info);
+
+uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program,
+ uint32_t *restrict buffer,
+ uint32_t start_next_constant,
+ bool cc_enabled,
+ enum pvr_pds_generate_mode gen_mode);
+
+/* Pixel Secondary */
+#define pvr_pds_set_sizes_pixel_shader_sa_uniform_data(X, Y) \
+ pvr_pds_pixel_shader_uniform_texture_data(X, \
+ NULL, \
+ PDS_GENERATE_SIZES, \
+ true, \
+ Y)
+#define pvr_pds_set_sizes_pixel_shader_sa_texture_data(X, Y) \
+ pvr_pds_pixel_shader_uniform_texture_data(X, \
+ NULL, \
+ PDS_GENERATE_SIZES, \
+ false, \
+ Y)
+#define pvr_pds_set_sizes_pixel_shader_uniform_texture_code(X) \
+ pvr_pds_pixel_shader_uniform_texture_code(X, NULL, PDS_GENERATE_SIZES)
+
+#define pvr_pds_generate_pixel_shader_sa_texture_state_data(X, Y, Z) \
+ pvr_pds_pixel_shader_uniform_texture_data(X, \
+ Y, \
+ PDS_GENERATE_DATA_SEGMENT, \
+ false, \
+ Z)
+
+#define pvr_pds_generate_pixel_shader_sa_code_segment(X, Y) \
+ pvr_pds_pixel_shader_uniform_texture_code(X, Y, PDS_GENERATE_CODE_SEGMENT)
+
+uint32_t *pvr_pds_pixel_shader_uniform_texture_data(
+ struct pvr_pds_pixel_shader_sa_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ bool uniform,
+ const struct pvr_device_info *dev_info);
+
+uint32_t *pvr_pds_pixel_shader_uniform_texture_code(
+ struct pvr_pds_pixel_shader_sa_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode);
+
+/* Vertex */
+#define pvr_pds_set_sizes_vertex_shader(X, Y) \
+ pvr_pds_vertex_shader(X, NULL, PDS_GENERATE_SIZES, Y)
+
+#define pvr_pds_generate_vertex_shader_data_segment(X, Y, Z) \
+ pvr_pds_vertex_shader(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
+
+#define pvr_pds_generate_vertex_shader_code_segment(X, Y, Z) \
+ pvr_pds_vertex_shader(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
+
+uint32_t *
+pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info);
+
+/* Compute */
+uint32_t *
+pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info);
+
+#define pvr_pds_set_sizes_compute_shader(X, Y) \
+ pvr_pds_compute_shader(X, NULL, PDS_GENERATE_SIZES, Y)
+
+#define pvr_pds_generate_compute_shader_data_segment(X, Y, Z) \
+ pvr_pds_compute_shader(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
+
+#define pvr_pds_generate_compute_shader_code_segment(X, Y, Z) \
+ pvr_pds_compute_shader(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
+
+/* Vertex Secondary */
+#define pvr_pds_set_sizes_vertex_shader_sa(X, Y) \
+ pvr_pds_vertex_shader_sa(X, NULL, PDS_GENERATE_SIZES, Y)
+
+#define pvr_pds_generate_vertex_shader_sa_data_segment(X, Y, Z) \
+ pvr_pds_vertex_shader_sa(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
+
+#define pvr_pds_generate_vertex_shader_sa_code_segment(X, Y, Z) \
+ pvr_pds_vertex_shader_sa(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
+
+uint32_t *pvr_pds_vertex_shader_sa(
+ struct pvr_pds_vertex_shader_sa_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info);
+
+/* Pixel Event */
+#define pvr_pds_set_sizes_pixel_event(X) \
+ pvr_pds_generate_pixel_event(X, NULL, PDS_GENERATE_SIZES, NULL)
+
+#define pvr_pds_generate_pixel_event_data_segment(X, Y, Z) \
+ pvr_pds_generate_pixel_event(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
+
+#define pvr_pds_generate_pixel_event_code_segment(X, Y, Z) \
+ pvr_pds_generate_pixel_event(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
+
+uint32_t *
+pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info);
+
+/* Coefficient Loading */
+#define pvr_pds_set_sizes_coeff_loading(X) \
+ pvr_pds_coefficient_loading(X, NULL, PDS_GENERATE_SIZES)
+
+#define pvr_pds_generate_coeff_loading_program(X, Y) \
+ pvr_pds_coefficient_loading(X, Y, PDS_GENERATE_CODE_SEGMENT)
+
+uint32_t *pvr_pds_coefficient_loading(
+ struct pvr_pds_coeff_loading_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode);
+
+/* Compute DM barrier-specific conditional code */
+uint32_t *pvr_pds_generate_compute_barrier_conditional(
+ uint32_t *buffer,
+ enum pvr_pds_generate_mode gen_mode);
+
+/* Shared register storing */
+uint32_t *pvr_pds_generate_shared_storing_program(
+ struct pvr_pds_shared_storing_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info);
+
+/*Shared register loading */
+uint32_t *pvr_pds_generate_fence_terminate_program(
+ struct pvr_pds_fence_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info);
+
+/* CDM Shared register loading */
+uint32_t *pvr_pds_generate_compute_shared_loading_program(
+ struct pvr_pds_shared_storing_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info);
+
+/* Stream out */
+uint32_t *pvr_pds_generate_stream_out_init_program(
+ struct pvr_pds_stream_out_init_program *restrict program,
+ uint32_t *restrict buffer,
+ bool store_mode,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info);
+
+uint32_t *pvr_pds_generate_stream_out_terminate_program(
+ struct pvr_pds_stream_out_terminate_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info);
+
+/* Structure representing DrawIndirect PDS programs. */
+struct pvr_pds_drawindirect_program {
+ /* --- Input to pvr_pds_drawindirect_program --- */
+
+ /* Address of the index list block in the VDM control stream.
+ * This must point to a 128-bit aligned index list header.
+ */
+ uint64_t index_list_addr_buffer;
+ /* Address of arguments for Draw call. Layout is defined by eArgFormat. */
+ uint64_t arg_buffer;
+
+ /* Address of index buffer. */
+ uint64_t index_buffer;
+
+ /* The raw (without addr msb in [7:0]) index block header. */
+ uint32_t index_block_header;
+
+ /* Number of bytes per index. */
+ uint32_t index_stride;
+
+ /* Used during/after compilation to fill in constant buffer. */
+ struct pvr_psc_register data[32];
+
+ /* Results of compilation. */
+ struct pvr_psc_program_output program;
+
+ /* This is used for ARB_multi_draw_indirect. */
+ unsigned int count;
+ unsigned int stride;
+
+ /* Internal stuff. */
+ unsigned int num_views;
+
+ bool support_base_instance;
+ bool increment_draw_id;
+};
+
+void pvr_pds_generate_draw_arrays_indirect(
+ struct pvr_pds_drawindirect_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info);
+void pvr_pds_generate_draw_elements_indirect(
+ struct pvr_pds_drawindirect_program *restrict program,
+ uint32_t *restrict buffer,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info);
+
+uint64_t pvr_pds_encode_st_src0(uint64_t src,
+ uint64_t count4,
+ uint64_t dst_add,
+ bool write_through,
+ const struct pvr_device_info *dev_info);
+
+uint64_t pvr_pds_encode_ld_src0(uint64_t dest,
+ uint64_t count8,
+ uint64_t src_add,
+ bool cached,
+ const struct pvr_device_info *dev_info);
+
+uint32_t *pvr_pds_generate_single_ldst_instruction(
+ bool ld,
+ const struct pvr_pds_ldst_control *control,
+ uint32_t temp_index,
+ uint64_t address,
+ uint32_t count,
+ uint32_t *next_constant,
+ uint32_t *total_data_size,
+ uint32_t *total_code_size,
+ uint32_t *buffer,
+ bool data_fence,
+ enum pvr_pds_generate_mode gen_mode,
+ const struct pvr_device_info *dev_info);
+struct pvr_pds_descriptor_set {
+ unsigned int descriptor_set; /* id of the descriptor set. */
+ unsigned int size_in_dwords; /* Number of dwords to transfer. */
+ unsigned int destination; /* Destination shared register to which
+ * descriptor entries should be loaded.
+ */
+ bool primary; /* Primary or secondary? */
+ unsigned int offset_in_dwords; /* Offset from the start of the descriptor
+ * set to start DMA'ing from.
+ */
+};
+
+#define PVR_BUFFER_TYPE_UBO (0)
+#define PVR_BUFFER_TYPES_COMPILE_TIME (1)
+#define PVR_BUFFER_TYPE_BLEND_CONSTS (2)
+#define PVR_BUFFER_TYPE_PUSH_CONSTS (3)
+#define PVR_BUFFER_TYPES_BUFFER_LENGTHS (4)
+#define PVR_BUFFER_TYPE_DYNAMIC (5)
+#define PVR_BUFFER_TYPES_UBO_ZEROING (6)
+#define PVR_BUFFER_TYPE_INVALID (~0)
+
+struct pvr_pds_buffer {
+ uint16_t type;
+
+ uint16_t size_in_dwords;
+ uint32_t destination;
+
+ union {
+ uint32_t *data;
+ struct {
+ uint32_t buffer_id;
+ uint16_t desc_set;
+ uint16_t binding;
+ uint32_t source_offset;
+ };
+ };
+};
+
+#define PVR_PDS_MAX_BUFFERS (24)
+
+struct pvr_descriptor_program_input {
+ /* User-specified descriptor sets. */
+ unsigned int descriptor_set_count;
+ struct pvr_pds_descriptor_set descriptor_sets[8];
+
+ /* "State" buffers, including:
+ * compile-time constants
+ * blend constants
+ * push constants
+ * UBOs that have been hoisted.
+ */
+ uint32_t buffer_count;
+ struct pvr_pds_buffer buffers[PVR_PDS_MAX_BUFFERS];
+
+ uint32_t blend_constants_used_mask;
+
+ bool secondary_program_present;
+ struct pvr_pds_usc_task_control secondary_task_control;
+
+ bool must_not_be_empty;
+};
+
+#define PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED BITFIELD_BIT(0U)
+#define PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED BITFIELD_BIT(1U)
+#define PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT BITFIELD_BIT(2U)
+#define PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT BITFIELD_BIT(3U)
+#define PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED BITFIELD_BIT(4U)
+
+/* BaseVertex is used in shader. */
+#define PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED BITFIELD_BIT(5U)
+
+#define PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED BITFIELD_BIT(6U)
+
+#define PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE BITFIELD_BIT(0U)
+
+struct pvr_pds_vertex_dma {
+ /* Try and keep this structure packing as small as possible. */
+ uint16_t offset;
+ uint16_t stride;
+
+ uint8_t flags;
+ uint8_t size_in_dwords;
+ uint8_t component_size_in_bytes;
+ uint8_t destination;
+ uint8_t binding_index;
+ uint32_t divisor;
+
+ uint16_t robustness_buffer_offset;
+};
+
+struct pvr_pds_vertex_primary_program_input {
+ /* Control for the DOUTU that kicks the vertex USC shader. */
+ struct pvr_pds_usc_task_control usc_task_control;
+ /* List of DMAs (of size dma_count). */
+ struct pvr_pds_vertex_dma *dma_list;
+ uint32_t dma_count;
+
+ /* ORd bitfield of PVR_PDS_VERTEX_FLAGS_* */
+ uint32_t flags;
+
+ uint16_t vertex_id_register;
+ uint16_t instance_id_register;
+
+ /* API provided baseInstance (i.e. not from drawIndirect). */
+ uint32_t base_instance;
+
+ uint16_t base_instance_register;
+ uint16_t base_vertex_register;
+ uint16_t draw_index_register;
+};
+
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_NULL (0)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL64 (1)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32 (2)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET (3)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER (4)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER (5)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS (6)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS (7)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_ROBUST_VERTEX_ATTRIBUTE_ADDRESS (8)
+
+/* Use if pds_ddmadt is enabled. */
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTR_DDMADT_OOB_BUFFER_SIZE (9)
+
+/* Use if pds_ddmadt is not enabled. */
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_MAX_INDEX (9)
+
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE (10)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER_ZEROING (11)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_VERTEX (12)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_WORKGROUP (13)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_COND_RENDER (14)
+
+/* We pack all the following structs tightly into a buffer using += sizeof(x)
+ * offsets, this can lead to data that is not native aligned. Supplying the
+ * packed attribute indicates that unaligned accesses may be required, and the
+ * aligned attribute causes the size of the structure to be aligned to a
+ * specific boundary.
+ */
+#define PVR_ALIGNED __attribute__((packed, aligned(1)))
+
+struct pvr_const_map_entry {
+ uint8_t type;
+ uint8_t const_offset;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_literal32 {
+ uint8_t type;
+ uint8_t const_offset;
+
+ uint32_t literal_value;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_literal64 {
+ uint8_t type;
+ uint8_t const_offset;
+
+ uint64_t literal_value;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_descriptor_set {
+ uint8_t type;
+ uint8_t const_offset;
+
+ uint32_t descriptor_set;
+ PVR_PDS_BOOL primary;
+ uint32_t offset_in_dwords;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_constant_buffer {
+ uint8_t type;
+ uint8_t const_offset;
+
+ uint16_t buffer_id;
+ uint16_t desc_set;
+ uint16_t binding;
+ uint32_t offset;
+ uint32_t size_in_dwords;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_constant_buffer_zeroing {
+ uint8_t type;
+ uint8_t const_offset;
+
+ uint16_t buffer_id;
+ uint32_t offset;
+ uint32_t size_in_dwords;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_special_buffer {
+ uint8_t type;
+ uint8_t const_offset;
+
+ uint8_t buffer_type;
+ uint32_t buffer_index;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_doutu_address {
+ uint8_t type;
+ uint8_t const_offset;
+
+ uint64_t doutu_control;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_vertex_attribute_address {
+ uint8_t type;
+ uint8_t const_offset;
+
+ uint16_t offset;
+ uint16_t stride;
+ uint8_t binding_index;
+ uint8_t size_in_dwords;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_robust_vertex_attribute_address {
+ uint8_t type;
+ uint8_t const_offset;
+
+ uint16_t offset;
+ uint16_t stride;
+ uint8_t binding_index;
+ uint8_t size_in_dwords;
+ uint16_t robustness_buffer_offset;
+ uint8_t component_size_in_bytes;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_vertex_attribute_max_index {
+ uint8_t type;
+ uint8_t const_offset;
+
+ uint8_t binding_index;
+ uint8_t size_in_dwords;
+ uint16_t offset;
+ uint16_t stride;
+ uint8_t component_size_in_bytes;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_base_instance {
+ uint8_t type;
+ uint8_t const_offset;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_base_vertex {
+ uint8_t type;
+ uint8_t const_offset;
+};
+
+struct pvr_pds_const_map_entry_base_workgroup {
+ uint8_t type;
+ uint8_t const_offset;
+ uint8_t workgroup_component;
+} PVR_ALIGNED;
+
+struct pvr_pds_const_map_entry_vertex_attr_ddmadt_oob_buffer_size {
+ uint8_t type;
+ uint8_t const_offset;
+ uint8_t binding_index;
+} PVR_ALIGNED;
+
+struct pvr_pds_const_map_entry_cond_render {
+ uint8_t type;
+ uint8_t const_offset;
+
+ uint32_t cond_render_pred_temp;
+} PVR_ALIGNED;
+
+struct pvr_pds_info {
+ uint32_t temps_required;
+ uint32_t code_size_in_dwords;
+ uint32_t data_size_in_dwords;
+
+ uint32_t entry_count;
+ size_t entries_size_in_bytes;
+ size_t entries_written_size_in_bytes;
+ struct pvr_const_map_entry *entries;
+};
+
+void pvr_pds_generate_descriptor_upload_program(
+ struct pvr_descriptor_program_input *input_program,
+ uint32_t *code_section,
+ struct pvr_pds_info *info);
+void pvr_pds_generate_vertex_primary_program(
+ struct pvr_pds_vertex_primary_program_input *input_program,
+ uint32_t *code,
+ struct pvr_pds_info *info,
+ bool use_robust_vertex_fetch,
+ const struct pvr_device_info *dev_info);
+
+/**
+ * Generate USC address.
+ *
+ * \param doutu Location to write the generated address.
+ * \param execution_address Address to generate from.
+ */
+static ALWAYS_INLINE void
+pvr_set_usc_execution_address64(uint64_t *doutu, uint64_t execution_address)
+{
+ doutu[0] |= (((execution_address >>
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_ALIGNSHIFT)
+ << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_SHIFT) &
+ ~PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_CLRMSK);
+}
+
+#endif /* PVR_PDS_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "pvr_rogue_pds_defs.h"
+#include "pvr_rogue_pds_encode.h"
+#include "pvr_rogue_pds_disasm.h"
+#include "util/macros.h"
+
+static void pvr_error_check(PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error)
+{
+ if (err_callback)
+ err_callback(error);
+ else
+ fprintf(stderr, "ERROR: %s\n", error.text);
+}
+
+#define X(a) #a,
+static const char *const instructions[] = { PVR_INSTRUCTIONS };
+#undef X
+
+static void error_reg_range(uint32_t raw,
+ void *context,
+ PVR_ERR_CALLBACK err_callback,
+ uint32_t parameter,
+ struct pvr_dissassembler_error error)
+{
+ char param[32];
+
+ error.type = PVR_PDS_ERR_PARAM_RANGE;
+ error.instruction = error.instruction;
+ error.parameter = parameter;
+ error.raw = raw;
+
+ if (parameter == 0)
+ snprintf(param, sizeof(param), "dst");
+ else
+ snprintf(param, sizeof(param), "src%u", parameter - 1);
+
+ error.text = malloc(PVR_PDS_MAX_INST_STR_LEN);
+ assert(error.text);
+
+ snprintf(error.text,
+ PVR_PDS_MAX_INST_STR_LEN,
+ "Register out of range, instruction: %s, operand: %s, value: %u",
+ instructions[error.instruction],
+ param,
+ raw);
+ pvr_error_check(err_callback, error);
+}
+
+static struct pvr_operand *
+pvr_pds_disassemble_regs32(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction,
+ uint32_t parameter)
+{
+ struct pvr_operand *op = calloc(1, sizeof(*op));
+ assert(op);
+
+ op->type = UNRESOLVED;
+ instruction &= PVR_ROGUE_PDSINST_REGS32_MASK;
+ switch (pvr_pds_inst_decode_field_range_regs32(instruction)) {
+ case PVR_ROGUE_PDSINST_REGS32_CONST32:
+ op->type = CONST32;
+ op->address = instruction - PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER;
+ op->absolute_address = op->address;
+ break;
+ case PVR_ROGUE_PDSINST_REGS32_TEMP32:
+ op->type = TEMP32;
+ op->address = instruction - PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
+ op->absolute_address = op->address;
+ break;
+ case PVR_ROGUE_PDSINST_REGS32_PTEMP32:
+ op->type = PTEMP32;
+ op->address = instruction - PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER;
+ op->absolute_address = op->address;
+ break;
+ default:
+ error_reg_range(instruction, context, err_callback, parameter, error);
+ }
+ return op;
+}
+static struct pvr_operand *
+pvr_pds_disassemble_regs32tp(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction,
+ uint32_t parameter)
+{
+ struct pvr_operand *op = calloc(1, sizeof(*op));
+ assert(op);
+
+ op->type = UNRESOLVED;
+ instruction &= PVR_ROGUE_PDSINST_REGS32TP_MASK;
+ switch (pvr_pds_inst_decode_field_range_regs32tp(instruction)) {
+ case PVR_ROGUE_PDSINST_REGS32TP_TEMP32:
+ op->type = TEMP32;
+ op->address = instruction - PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER;
+ op->absolute_address = op->address;
+ break;
+ case PVR_ROGUE_PDSINST_REGS32TP_PTEMP32:
+ op->type = PTEMP32;
+ op->address = instruction - PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER;
+ op->absolute_address = op->address;
+ break;
+ default:
+ error_reg_range(instruction, context, err_callback, parameter, error);
+ }
+ return op;
+}
+static struct pvr_operand *
+pvr_pds_disassemble_regs32t(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction,
+ uint32_t parameter)
+{
+ struct pvr_operand *op = calloc(1, sizeof(*op));
+ assert(op);
+
+ op->type = UNRESOLVED;
+ instruction &= PVR_ROGUE_PDSINST_REGS32T_MASK;
+ switch (pvr_pds_inst_decode_field_range_regs32t(instruction)) {
+ case PVR_ROGUE_PDSINST_REGS32T_TEMP32:
+ op->type = TEMP32;
+ op->address = instruction - PVR_ROGUE_PDSINST_REGS32T_TEMP32_LOWER;
+ op->absolute_address = op->address;
+ break;
+ default:
+ error_reg_range(instruction, context, err_callback, parameter, error);
+ }
+ return op;
+}
+
+static struct pvr_operand *
+pvr_pds_disassemble_regs64(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction,
+ uint32_t parameter)
+{
+ struct pvr_operand *op = calloc(1, sizeof(*op));
+ assert(op);
+
+ op->type = UNRESOLVED;
+ instruction &= PVR_ROGUE_PDSINST_REGS64_MASK;
+ switch (pvr_pds_inst_decode_field_range_regs64(instruction)) {
+ case PVR_ROGUE_PDSINST_REGS64_CONST64:
+ op->type = CONST64;
+ op->address = instruction - PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER;
+ op->absolute_address = op->address * 2;
+ break;
+ case PVR_ROGUE_PDSINST_REGS64_TEMP64:
+ op->type = TEMP64;
+ op->address = instruction - PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER;
+ op->absolute_address = op->address * 2;
+ break;
+ case PVR_ROGUE_PDSINST_REGS64_PTEMP64:
+ op->type = PTEMP64;
+ op->address = instruction - PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER;
+ op->absolute_address = op->address * 2;
+ break;
+ default:
+ error_reg_range(instruction, context, err_callback, parameter, error);
+ }
+
+ return op;
+}
+static struct pvr_operand *
+pvr_pds_disassemble_regs64t(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction,
+ uint32_t parameter)
+{
+ struct pvr_operand *op = calloc(1, sizeof(*op));
+ assert(op);
+
+ op->type = UNRESOLVED;
+ instruction &= PVR_ROGUE_PDSINST_REGS64T_MASK;
+ switch (pvr_pds_inst_decode_field_range_regs64tp(instruction)) {
+ case PVR_ROGUE_PDSINST_REGS64T_TEMP64:
+ op->type = TEMP64;
+ op->address = instruction - PVR_ROGUE_PDSINST_REGS64T_TEMP64_LOWER;
+ op->absolute_address = op->address * 2;
+ break;
+ default:
+ error_reg_range(instruction, context, err_callback, parameter, error);
+ }
+ return op;
+}
+
+static struct pvr_operand *
+pvr_pds_disassemble_regs64C(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction,
+ uint32_t parameter)
+{
+ struct pvr_operand *op = calloc(1, sizeof(*op));
+ assert(op);
+
+ op->type = UNRESOLVED;
+ instruction &= PVR_ROGUE_PDSINST_REGS64C_MASK;
+ switch (pvr_rogue_pds_inst_decode_field_range_regs64c(instruction)) {
+ case PVR_ROGUE_PDSINST_REGS64C_CONST64:
+ op->type = CONST64;
+ op->address = instruction - PVR_ROGUE_PDSINST_REGS64C_CONST64_LOWER;
+ op->absolute_address = op->address * 2;
+ break;
+ default:
+ error_reg_range(instruction, context, err_callback, parameter, error);
+ }
+ return op;
+}
+
+static struct pvr_operand *
+pvr_pds_disassemble_regs64tp(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction,
+ uint32_t parameter)
+{
+ struct pvr_operand *op = calloc(1, sizeof(*op));
+ assert(op);
+
+ op->type = UNRESOLVED;
+ instruction &= PVR_ROGUE_PDSINST_REGS64TP_MASK;
+ switch (pvr_pds_inst_decode_field_range_regs64tp(instruction)) {
+ case PVR_ROGUE_PDSINST_REGS64TP_TEMP64:
+ op->type = TEMP64;
+ op->address = instruction - PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER;
+ op->absolute_address = op->address * 2;
+ break;
+ case PVR_ROGUE_PDSINST_REGS64TP_PTEMP64:
+ op->type = PTEMP64;
+ op->address = instruction - PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER;
+ op->absolute_address = op->address * 2;
+ break;
+ default:
+ error_reg_range(instruction, context, err_callback, parameter, error);
+ }
+ return op;
+}
+
+#define PVR_TYPE_OPCODE BITFIELD_BIT(31U)
+#define PVR_TYPE_OPCODE_SP BITFIELD_BIT(27U)
+#define PVR_TYPE_OPCODEB BITFIELD_BIT(30U)
+
+#define PVR_TYPE_OPCODE_SHIFT 28U
+#define PVR_TYPE_OPCODE_SP_SHIFT 23U
+#define PVR_TYPE_OPCODEB_SHIFT 29U
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_add64(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction)
+{
+ struct pvr_add *add = malloc(sizeof(*add));
+ assert(add);
+
+ add->instruction.type = INS_ADD64;
+ add->instruction.next = NULL;
+
+ add->cc = instruction & PVR_ROGUE_PDSINST_ADD64_CC_ENABLE;
+ add->alum = instruction & PVR_ROGUE_PDSINST_ADD64_ALUM_SIGNED;
+ add->sna = instruction & PVR_ROGUE_PDSINST_ADD64_SNA_SUB;
+
+ add->src0 = pvr_pds_disassemble_regs64(context,
+ err_callback,
+ error,
+ instruction >>
+ PVR_ROGUE_PDSINST_ADD64_SRC0_SHIFT,
+ 1);
+ add->src0->instruction = &add->instruction;
+ add->src1 = pvr_pds_disassemble_regs64(context,
+ err_callback,
+ error,
+ instruction >>
+ PVR_ROGUE_PDSINST_ADD64_SRC1_SHIFT,
+ 2);
+ add->src1->instruction = &add->instruction;
+ add->dst = pvr_pds_disassemble_regs64tp(context,
+ err_callback,
+ error,
+ instruction >>
+ PVR_ROGUE_PDSINST_ADD64_DST_SHIFT,
+ 0);
+ add->dst->instruction = &add->instruction;
+
+ return &add->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_add32(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction)
+{
+ struct pvr_add *add = malloc(sizeof(*add));
+ assert(add);
+
+ add->instruction.type = INS_ADD32;
+ add->instruction.next = NULL;
+
+ add->cc = instruction & PVR_ROGUE_PDSINST_ADD32_CC_ENABLE;
+ add->alum = instruction & PVR_ROGUE_PDSINST_ADD32_ALUM_SIGNED;
+ add->sna = instruction & PVR_ROGUE_PDSINST_ADD32_SNA_SUB;
+
+ add->src0 = pvr_pds_disassemble_regs32(context,
+ err_callback,
+ error,
+ instruction >>
+ PVR_ROGUE_PDSINST_ADD32_SRC0_SHIFT,
+ 1);
+ add->src0->instruction = &add->instruction;
+ add->src1 = pvr_pds_disassemble_regs32(context,
+ err_callback,
+ error,
+ instruction >>
+ PVR_ROGUE_PDSINST_ADD32_SRC1_SHIFT,
+ 2);
+ add->src1->instruction = &add->instruction;
+ add->dst = pvr_pds_disassemble_regs32tp(context,
+ err_callback,
+ error,
+ instruction >>
+ PVR_ROGUE_PDSINST_ADD32_DST_SHIFT,
+ 0);
+ add->dst->instruction = &add->instruction;
+
+ return &add->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_stm(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction)
+{
+ struct pvr_stm *stm = malloc(sizeof(*stm));
+ assert(stm);
+
+ stm->instruction.next = NULL;
+ stm->instruction.type = INS_STM;
+
+ stm->cc = instruction & (1 << PVR_ROGUE_PDSINST_STM_CCS_CCS_CC_SHIFT);
+ stm->ccs_global = instruction &
+ (1 << PVR_ROGUE_PDSINST_STM_CCS_CCS_GLOBAL_SHIFT);
+ stm->ccs_so = instruction & (1 << PVR_ROGUE_PDSINST_STM_CCS_CCS_SO_SHIFT);
+ stm->tst = instruction & (1 << PVR_ROGUE_PDSINST_STM_SO_TST_SHIFT);
+
+ stm->stream_out = (instruction >> PVR_ROGUE_PDSINST_STM_SO_SHIFT) &
+ PVR_ROGUE_PDSINST_SO_MASK;
+
+ stm->src0 = pvr_pds_disassemble_regs64tp(
+ context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_STM_SO_SRC0_SHIFT,
+ 1);
+ stm->src0->instruction = &stm->instruction;
+
+ stm->src1 = pvr_pds_disassemble_regs64tp(
+ context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_STM_SO_SRC1_SHIFT,
+ 2);
+ stm->src1->instruction = &stm->instruction;
+
+ stm->src2 = pvr_pds_disassemble_regs32(
+ context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_STM_SO_SRC2_SHIFT,
+ 3);
+ stm->src2->instruction = &stm->instruction;
+
+ stm->src3 = pvr_pds_disassemble_regs64tp(
+ context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_STM_SO_SRC3_SHIFT,
+ 4);
+ stm->src3->instruction = &stm->instruction;
+
+ return &stm->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_sftlp32(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction)
+{
+ struct pvr_sftlp *ins = malloc(sizeof(*ins));
+ assert(ins);
+
+ ins->instruction.next = NULL;
+ ins->instruction.type = INS_SFTLP32;
+
+ ins->cc = instruction & PVR_ROGUE_PDSINST_SFTLP32_CC_ENABLE;
+ ins->IM = instruction & PVR_ROGUE_PDSINST_SFTLP32_IM_ENABLE;
+ ins->lop = (instruction >> PVR_ROGUE_PDSINST_SFTLP32_LOP_SHIFT) &
+ PVR_ROGUE_PDSINST_LOP_MASK;
+ ins->src0 = pvr_pds_disassemble_regs32t(
+ context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_SFTLP32_SRC0_SHIFT,
+ 1);
+ ins->src0->instruction = &ins->instruction;
+ ins->src1 = pvr_pds_disassemble_regs32(
+ context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_SFTLP32_SRC1_SHIFT,
+ 2);
+ ins->src1->instruction = &ins->instruction;
+ ins->dst = pvr_pds_disassemble_regs32t(
+ context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_SFTLP32_DST_SHIFT,
+ 0);
+ ins->dst->instruction = &ins->instruction;
+
+ if (ins->IM) {
+ signed char cImmediate =
+ ((instruction >> PVR_ROGUE_PDSINST_SFTLP32_SRC2_SHIFT) &
+ PVR_ROGUE_PDSINST_REGS32_MASK)
+ << 2;
+ ins->src2 = calloc(1, sizeof(*ins->src2));
+ assert(ins->src2);
+
+ ins->src2->literal = abs((cImmediate / 4));
+ ins->src2->negate = cImmediate < 0;
+ ins->src2->instruction = &ins->instruction;
+ } else {
+ ins->src2 = pvr_pds_disassemble_regs32tp(
+ context,
+ err_callback,
+ error,
+ (instruction >> PVR_ROGUE_PDSINST_SFTLP32_SRC2_SHIFT),
+ 3);
+ ins->src2->instruction = &ins->instruction;
+ }
+
+ return &ins->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_sftlp64(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction)
+{
+ struct pvr_sftlp *ins = malloc(sizeof(*ins));
+ assert(ins);
+
+ ins->instruction.next = NULL;
+ ins->instruction.type = INS_SFTLP64;
+
+ ins->cc = instruction & PVR_ROGUE_PDSINST_SFTLP64_CC_ENABLE;
+ ins->IM = instruction & PVR_ROGUE_PDSINST_SFTLP64_IM_ENABLE;
+ ins->lop = (instruction >> PVR_ROGUE_PDSINST_SFTLP64_LOP_SHIFT) &
+ PVR_ROGUE_PDSINST_LOP_MASK;
+ ins->src0 = pvr_pds_disassemble_regs64tp(
+ context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_SFTLP64_SRC0_SHIFT,
+ 1);
+ ins->src0->instruction = &ins->instruction;
+ ins->src1 = pvr_pds_disassemble_regs64tp(
+ context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_SFTLP64_SRC1_SHIFT,
+ 2);
+ ins->src1->instruction = &ins->instruction;
+ ins->dst = pvr_pds_disassemble_regs64tp(
+ context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_SFTLP64_DST_SHIFT,
+ 0);
+ ins->dst->instruction = &ins->instruction;
+
+ if (ins->IM) {
+ signed char cImmediate =
+ (instruction >> PVR_ROGUE_PDSINST_SFTLP64_SRC2_SHIFT) &
+ PVR_ROGUE_PDSINST_REGS32_MASK;
+ ins->src2 = calloc(1, sizeof(*ins->src2));
+ assert(ins->src2);
+
+ ins->src2->literal = (abs(cImmediate) > 63) ? 63 : abs(cImmediate);
+ ins->src2->negate = (cImmediate < 0);
+ ins->src2->instruction = &ins->instruction;
+ } else {
+ ins->src2 = pvr_pds_disassemble_regs32(
+ context,
+ err_callback,
+ error,
+ (instruction >> PVR_ROGUE_PDSINST_SFTLP64_SRC2_SHIFT),
+ 3);
+ ins->src2->instruction = &ins->instruction;
+ }
+
+ return &ins->instruction;
+}
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_cmp(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction)
+{
+ struct pvr_cmp *cmp = malloc(sizeof(*cmp));
+ assert(cmp);
+
+ cmp->instruction.next = NULL;
+ cmp->instruction.type = INS_CMP;
+ cmp->cc = instruction & PVR_ROGUE_PDSINST_CMP_CC_ENABLE;
+ cmp->IM = instruction & PVR_ROGUE_PDSINST_CMP_IM_ENABLE;
+ cmp->cop = instruction >> PVR_ROGUE_PDSINST_CMP_COP_SHIFT &
+ PVR_ROGUE_PDSINST_COP_MASK;
+ cmp->src0 = pvr_pds_disassemble_regs64tp(context,
+ err_callback,
+ error,
+ instruction >>
+ PVR_ROGUE_PDSINST_CMP_SRC0_SHIFT,
+ 1);
+ cmp->src0->instruction = &cmp->instruction;
+
+ if (cmp->IM) {
+ uint32_t immediate = (instruction >> PVR_ROGUE_PDSINST_CMP_SRC1_SHIFT) &
+ PVR_ROGUE_PDSINST_IMM16_MASK;
+ cmp->src1 = calloc(1, sizeof(*cmp->src1));
+ assert(cmp->src1);
+
+ cmp->src1->type = LITERAL_NUM;
+ cmp->src1->literal = immediate;
+ } else {
+ cmp->src1 = pvr_pds_disassemble_regs64(
+ context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_CMP_SRC1_SHIFT,
+ 2);
+ }
+ cmp->src1->instruction = &cmp->instruction;
+
+ return &cmp->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_sp_ld_st(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ bool ld,
+ uint32_t instruction,
+ bool cc)
+{
+ struct pvr_ldst *ins = malloc(sizeof(*ins));
+ assert(ins);
+
+ ins->instruction.next = NULL;
+ ins->instruction.type = ld ? INS_LD : INS_ST;
+
+ ins->cc = cc;
+ ins->src0 =
+ pvr_pds_disassemble_regs64(context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_LD_SRC0_SHIFT,
+ 1);
+ ins->src0->instruction = &ins->instruction;
+ ins->st = !ld;
+
+ return &ins->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_sp_stmc(uint32_t instruction, bool cc)
+{
+ struct pvr_stmc *stmc = malloc(sizeof(*stmc));
+ assert(stmc);
+
+ stmc->instruction.next = NULL;
+ stmc->instruction.type = INS_STMC;
+
+ stmc->cc = cc;
+ stmc->src0 = calloc(1, sizeof(*stmc->src0));
+ assert(stmc->src0);
+
+ stmc->src0->type = LITERAL_NUM;
+ stmc->src0->literal = (instruction >> PVR_ROGUE_PDSINST_STMC_SOMASK_SHIFT) &
+ PVR_ROGUE_PDSINST_SOMASK_MASK;
+ stmc->src0->instruction = &stmc->instruction;
+
+ return &stmc->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_sp_limm(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction,
+ bool cc)
+{
+ struct pvr_limm *limm = malloc(sizeof(*limm));
+ assert(limm);
+ limm->instruction.next = NULL;
+ limm->instruction.type = INS_LIMM;
+
+ limm->cc = cc;
+ limm->GR = (instruction & PVR_ROGUE_PDSINST_LIMM_GR_ENABLE) != 0;
+ limm->src0 = calloc(1, sizeof(*limm->src0));
+ assert(limm->src0);
+
+ limm->src0->type = LITERAL_NUM;
+ limm->src0->literal = (instruction >> PVR_ROGUE_PDSINST_LIMM_SRC0_SHIFT) &
+ PVR_ROGUE_PDSINST_IMM16_MASK;
+ limm->src0->instruction = &limm->instruction;
+ limm->dst = pvr_pds_disassemble_regs32t(context,
+ err_callback,
+ error,
+ instruction >>
+ PVR_ROGUE_PDSINST_LIMM_SRC1_SHIFT,
+ 0);
+ limm->dst->instruction = &limm->instruction;
+
+ return &limm->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_simple(enum pvr_instruction_type type, bool cc)
+{
+ struct pvr_simple *ins = malloc(sizeof(*ins));
+ assert(ins);
+
+ ins->instruction.next = NULL;
+ ins->instruction.type = type;
+ ins->cc = cc;
+
+ return &ins->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_bra(uint32_t instruction)
+{
+ uint32_t branch_addr;
+ struct pvr_bra *bra = (struct pvr_bra *)malloc(sizeof(*bra));
+ assert(bra);
+
+ bra->instruction.type = INS_BRA;
+ bra->instruction.next = NULL;
+
+ branch_addr = (instruction >> PVR_ROGUE_PDSINST_BRA_ADDR_SHIFT) &
+ PVR_ROGUE_PDSINST_BRAADDR_MASK;
+ bra->address = (branch_addr & 0x40000U) ? ((int)branch_addr) - 0x80000
+ : (int)branch_addr;
+
+ bra->srcc = malloc(sizeof(*bra->srcc));
+ assert(bra->srcc);
+
+ bra->srcc->predicate = (instruction >> PVR_ROGUE_PDSINST_BRA_SRCC_SHIFT) &
+ PVR_ROGUE_PDSINST_PREDICATE_MASK;
+ bra->srcc->negate = instruction & PVR_ROGUE_PDSINST_BRA_NEG_ENABLE;
+
+ bra->setc = malloc(sizeof(*bra->setc));
+ assert(bra->setc);
+
+ bra->setc->predicate = (instruction >> PVR_ROGUE_PDSINST_BRA_SETC_SHIFT) &
+ PVR_ROGUE_PDSINST_PREDICATE_MASK;
+
+ bra->target = NULL;
+
+ return &bra->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_sp(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction)
+{
+ uint32_t op = (instruction >> PVR_TYPE_OPCODE_SP_SHIFT) &
+ PVR_ROGUE_PDSINST_OPCODESP_MASK;
+ bool cc = instruction & PVR_TYPE_OPCODE_SP;
+
+ switch (op) {
+ case PVR_ROGUE_PDSINST_OPCODESP_LD:
+ error.instruction = INS_LD;
+ return pvr_pds_disassemble_instruction_sp_ld_st(
+ context,
+ err_callback,
+ error,
+ true,
+ instruction,
+ instruction & (1 << PVR_ROGUE_PDSINST_LD_CC_SHIFT));
+ case PVR_ROGUE_PDSINST_OPCODESP_ST:
+ error.instruction = INS_ST;
+ return pvr_pds_disassemble_instruction_sp_ld_st(
+ context,
+ err_callback,
+ error,
+ false,
+ instruction,
+ instruction & (1 << PVR_ROGUE_PDSINST_ST_CC_SHIFT));
+ case PVR_ROGUE_PDSINST_OPCODESP_STMC:
+ error.instruction = INS_STMC;
+ return pvr_pds_disassemble_instruction_sp_stmc(instruction, cc);
+ case PVR_ROGUE_PDSINST_OPCODESP_LIMM:
+ error.instruction = INS_LIMM;
+ return pvr_pds_disassemble_instruction_sp_limm(context,
+ err_callback,
+ error,
+ instruction,
+ cc);
+ case PVR_ROGUE_PDSINST_OPCODESP_WDF:
+ error.instruction = INS_WDF;
+ return pvr_pds_disassemble_simple(INS_WDF, cc);
+ case PVR_ROGUE_PDSINST_OPCODESP_LOCK:
+ error.instruction = INS_LOCK;
+ return pvr_pds_disassemble_simple(INS_LOCK, cc);
+ case PVR_ROGUE_PDSINST_OPCODESP_RELEASE:
+ error.instruction = INS_RELEASE;
+ return pvr_pds_disassemble_simple(INS_RELEASE, cc);
+ case PVR_ROGUE_PDSINST_OPCODESP_HALT:
+ error.instruction = INS_HALT;
+ return pvr_pds_disassemble_simple(INS_HALT, cc);
+ case PVR_ROGUE_PDSINST_OPCODESP_NOP:
+ error.instruction = INS_NOP;
+ return pvr_pds_disassemble_simple(INS_NOP, cc);
+ default:
+ error.type = PVR_PDS_ERR_SP_UNKNOWN;
+ error.text = "opcode unknown for special instruction";
+ pvr_error_check(err_callback, error);
+ return NULL;
+ }
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_ddmad(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction)
+{
+ struct pvr_ddmad *ddmad = malloc(sizeof(*ddmad));
+ assert(ddmad);
+
+ ddmad->instruction.next = NULL;
+ ddmad->instruction.type = INS_DDMAD;
+
+ ddmad->cc = instruction & PVR_ROGUE_PDSINST_DDMAD_CC_ENABLE;
+ ddmad->END = instruction & PVR_ROGUE_PDSINST_DDMAD_END_ENABLE;
+
+ ddmad->src0 = pvr_pds_disassemble_regs32(
+ context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_DDMAD_SRC0_SHIFT,
+ 1);
+ ddmad->src0->instruction = &ddmad->instruction;
+
+ ddmad->src1 = pvr_pds_disassemble_regs32t(
+ context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_DDMAD_SRC1_SHIFT,
+ 2);
+ ddmad->src1->instruction = &ddmad->instruction;
+
+ ddmad->src2 = pvr_pds_disassemble_regs64(
+ context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_DDMAD_SRC2_SHIFT,
+ 3);
+ ddmad->src2->instruction = &ddmad->instruction;
+
+ ddmad->src3 = pvr_pds_disassemble_regs64C(
+ context,
+ err_callback,
+ error,
+ instruction >> PVR_ROGUE_PDSINST_DDMAD_SRC3_SHIFT,
+ 4);
+ ddmad->src3->instruction = &ddmad->instruction;
+
+ return &ddmad->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_mad(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction)
+{
+ struct pvr_mad *mad = malloc(sizeof(*mad));
+ assert(mad);
+
+ mad->instruction.next = NULL;
+ mad->instruction.type = INS_MAD;
+
+ mad->cc = instruction & PVR_ROGUE_PDSINST_MAD_CC_ENABLE;
+ mad->sna = instruction & PVR_ROGUE_PDSINST_MAD_SNA_SUB;
+ mad->alum = (instruction & PVR_ROGUE_PDSINST_MAD_ALUM_SIGNED);
+
+ mad->src0 = pvr_pds_disassemble_regs32(context,
+ err_callback,
+ error,
+ instruction >>
+ PVR_ROGUE_PDSINST_MAD_SRC0_SHIFT,
+ 1);
+ mad->src0->instruction = &mad->instruction;
+
+ mad->src1 = pvr_pds_disassemble_regs32(context,
+ err_callback,
+ error,
+ instruction >>
+ PVR_ROGUE_PDSINST_MAD_SRC1_SHIFT,
+ 2);
+ mad->src1->instruction = &mad->instruction;
+
+ mad->src2 = pvr_pds_disassemble_regs64(context,
+ err_callback,
+ error,
+ instruction >>
+ PVR_ROGUE_PDSINST_MAD_SRC2_SHIFT,
+ 3);
+ mad->src2->instruction = &mad->instruction;
+
+ mad->dst = pvr_pds_disassemble_regs64t(context,
+ err_callback,
+ error,
+ instruction >>
+ PVR_ROGUE_PDSINST_MAD_DST_SHIFT,
+ 0);
+ mad->dst->instruction = &mad->instruction;
+
+ return &mad->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_dout(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ struct pvr_dissassembler_error error,
+ uint32_t instruction)
+{
+ struct pvr_dout *dout = malloc(sizeof(*dout));
+ assert(dout);
+
+ dout->instruction.next = NULL;
+ dout->instruction.type = INS_DOUT;
+
+ dout->END = instruction & PVR_ROGUE_PDSINST_DOUT_END_ENABLE;
+ dout->cc = instruction & PVR_ROGUE_PDSINST_DOUT_CC_ENABLE;
+ dout->dst = (instruction >> PVR_ROGUE_PDSINST_DOUT_DST_SHIFT) &
+ PVR_ROGUE_PDSINST_DSTDOUT_MASK;
+
+ dout->src0 = pvr_pds_disassemble_regs64(context,
+ err_callback,
+ error,
+ instruction >>
+ PVR_ROGUE_PDSINST_DOUT_SRC0_SHIFT,
+ 1);
+ dout->src0->instruction = &dout->instruction;
+
+ dout->src1 = pvr_pds_disassemble_regs32(context,
+ err_callback,
+ error,
+ instruction >>
+ PVR_ROGUE_PDSINST_DOUT_SRC1_SHIFT,
+ 2);
+ dout->src1->instruction = &dout->instruction;
+
+ return &dout->instruction;
+}
+
+static void pvr_pds_free_instruction_limm(struct pvr_limm *inst)
+{
+ free(inst->dst);
+ free(inst->src0);
+ free(inst);
+}
+
+static void pvr_pds_free_instruction_add(struct pvr_add *inst)
+{
+ free(inst->dst);
+ free(inst->src0);
+ free(inst->src1);
+ free(inst);
+}
+
+static void pvr_pds_free_instruction_cmp(struct pvr_cmp *inst)
+{
+ free(inst->src0);
+ free(inst->src1);
+ free(inst);
+}
+
+static void pvr_pds_free_instruction_mad(struct pvr_mad *inst)
+{
+ free(inst->dst);
+ free(inst->src0);
+ free(inst->src1);
+ free(inst->src2);
+ free(inst);
+}
+
+static void pvr_pds_free_instruction_bra(struct pvr_bra *inst)
+{
+ free(inst->setc);
+ free(inst->srcc);
+ free(inst);
+}
+
+static void pvr_pds_free_instruction_ddmad(struct pvr_ddmad *inst)
+{
+ free(inst->src0);
+ free(inst->src1);
+ free(inst->src2);
+ free(inst->src3);
+ free(inst);
+}
+
+static void pvr_pds_free_instruction_dout(struct pvr_dout *inst)
+{
+ free(inst->src0);
+ free(inst->src1);
+ free(inst);
+}
+
+static void pvr_pds_free_instruction_ldst(struct pvr_ldst *inst)
+{
+ free(inst->src0);
+ free(inst);
+}
+
+static void pvr_pds_free_instruction_simple(struct pvr_simple *inst)
+{
+ free(inst);
+}
+
+static void pvr_pds_free_instruction_sfltp(struct pvr_sftlp *inst)
+{
+ free(inst->dst);
+ free(inst->src0);
+ free(inst->src1);
+ free(inst->src2);
+ free(inst);
+}
+
+static void pvr_pds_free_instruction_stm(struct pvr_stm *inst)
+{
+ free(inst->src0);
+ free(inst->src1);
+ free(inst->src2);
+ free(inst->src3);
+ free(inst);
+}
+
+static void pvr_pds_free_instruction_stmc(struct pvr_stmc *inst)
+{
+ free(inst->src0);
+ free(inst);
+}
+
+void pvr_pds_free_instruction(struct pvr_instruction *instruction)
+{
+ if (!instruction)
+ return;
+
+ switch (instruction->type) {
+ case INS_LIMM:
+ pvr_pds_free_instruction_limm((struct pvr_limm *)instruction);
+ break;
+ case INS_ADD64:
+ case INS_ADD32:
+ pvr_pds_free_instruction_add((struct pvr_add *)instruction);
+ break;
+ case INS_CMP:
+ pvr_pds_free_instruction_cmp((struct pvr_cmp *)instruction);
+ break;
+ case INS_MAD:
+ pvr_pds_free_instruction_mad((struct pvr_mad *)instruction);
+ break;
+ case INS_BRA:
+ pvr_pds_free_instruction_bra((struct pvr_bra *)instruction);
+ break;
+ case INS_DDMAD:
+ pvr_pds_free_instruction_ddmad((struct pvr_ddmad *)instruction);
+ break;
+ case INS_DOUT:
+ pvr_pds_free_instruction_dout((struct pvr_dout *)instruction);
+ break;
+ case INS_LD:
+ case INS_ST:
+ pvr_pds_free_instruction_ldst((struct pvr_ldst *)instruction);
+ break;
+ case INS_WDF:
+ case INS_LOCK:
+ case INS_RELEASE:
+ case INS_HALT:
+ case INS_NOP:
+ pvr_pds_free_instruction_simple((struct pvr_simple *)instruction);
+ break;
+ case INS_SFTLP64:
+ case INS_SFTLP32:
+ pvr_pds_free_instruction_sfltp((struct pvr_sftlp *)instruction);
+ break;
+ case INS_STM:
+ pvr_pds_free_instruction_stm((struct pvr_stm *)instruction);
+ break;
+ case INS_STMC:
+ pvr_pds_free_instruction_stmc((struct pvr_stmc *)instruction);
+ break;
+ }
+}
+
+struct pvr_instruction *
+pvr_pds_disassemble_instruction2(void *context,
+ PVR_ERR_CALLBACK err_callback,
+ uint32_t instruction)
+{
+ struct pvr_dissassembler_error error = { .context = context };
+
+ /* First we need to find out what type of OPCODE we are dealing with. */
+ if (instruction & PVR_TYPE_OPCODE) {
+ uint32_t opcode_C = (instruction >> PVR_TYPE_OPCODE_SHIFT) &
+ PVR_ROGUE_PDSINST_OPCODEC_MASK;
+ switch (opcode_C) {
+ case PVR_ROGUE_PDSINST_OPCODEC_ADD64:
+ error.instruction = INS_ADD64;
+ return pvr_pds_disassemble_instruction_add64(context,
+ err_callback,
+ error,
+ instruction);
+ case PVR_ROGUE_PDSINST_OPCODEC_ADD32:
+ error.instruction = INS_ADD32;
+ return pvr_pds_disassemble_instruction_add32(context,
+ err_callback,
+ error,
+ instruction);
+ case PVR_ROGUE_PDSINST_OPCODEC_SFTLP64:
+ error.instruction = INS_SFTLP64;
+ return pvr_pds_disassemble_instruction_sftlp64(context,
+ err_callback,
+ error,
+ instruction);
+ case PVR_ROGUE_PDSINST_OPCODEC_CMP:
+ error.instruction = INS_CMP;
+ return pvr_pds_disassemble_instruction_cmp(context,
+ err_callback,
+ error,
+ instruction);
+ case PVR_ROGUE_PDSINST_OPCODEC_BRA:
+ error.instruction = INS_BRA;
+ return pvr_pds_disassemble_instruction_bra(instruction);
+ case PVR_ROGUE_PDSINST_OPCODEC_SP:
+ return pvr_pds_disassemble_instruction_sp(context,
+ err_callback,
+ error,
+ instruction);
+ case PVR_ROGUE_PDSINST_OPCODEC_DDMAD:
+ error.instruction = INS_DDMAD;
+ return pvr_pds_disassemble_instruction_ddmad(context,
+ err_callback,
+ error,
+ instruction);
+ case PVR_ROGUE_PDSINST_OPCODEC_DOUT:
+ error.instruction = INS_DOUT;
+ return pvr_pds_disassemble_instruction_dout(context,
+ err_callback,
+ error,
+ instruction);
+ }
+ } else if (instruction & PVR_TYPE_OPCODEB) {
+ uint32_t opcode_B = (instruction >> PVR_TYPE_OPCODEB_SHIFT) &
+ PVR_ROGUE_PDSINST_OPCODEB_MASK;
+ switch (opcode_B) {
+ case PVR_ROGUE_PDSINST_OPCODEB_SFTLP32:
+ error.instruction = INS_SFTLP32;
+ return pvr_pds_disassemble_instruction_sftlp32(context,
+ err_callback,
+ error,
+ instruction);
+ case PVR_ROGUE_PDSINST_OPCODEB_STM:
+ error.instruction = INS_STM;
+ return pvr_pds_disassemble_instruction_stm(context,
+ err_callback,
+ error,
+ instruction);
+ }
+ } else { /* Opcode A - MAD instruction. */
+ error.instruction = INS_MAD;
+ return pvr_pds_disassemble_instruction_mad(context,
+ err_callback,
+ error,
+ instruction);
+ }
+ return NULL;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "pvr_rogue_pds_defs.h"
+#include "pvr_rogue_pds_disasm.h"
+#include "pvr_rogue_pds_encode.h"
+#include "util/log.h"
+
+#define X(lop, str) #str,
+static const char *const LOP[] = { PVR_PDS_LOP };
+#undef X
+
+static void pvr_pds_disassemble_operand(struct pvr_operand *op,
+ char *instr_str,
+ size_t instr_len)
+{
+#define X(enum, str, size) { #str, #size },
+ static const char *const regs[][2] = { PVR_PDS_OPERAND_TYPES };
+#undef X
+
+ if (op->type == LITERAL_NUM) {
+ snprintf(instr_str,
+ instr_len,
+ "%s (%llu)",
+ regs[op->type][0],
+ (unsigned long long)op->literal);
+ } else if (op->type == UNRESOLVED) {
+ snprintf(instr_str, instr_len, "UNRESOLVED");
+ } else {
+ snprintf(instr_str,
+ instr_len,
+ "%s[%u].%s",
+ regs[op->type][0],
+ op->absolute_address,
+ regs[op->type][1]);
+ }
+}
+
+static void pvr_pds_disassemble_instruction_add64(struct pvr_add *add,
+ char *instr_str,
+ size_t instr_len)
+{
+ char dst[32];
+ char src0[32];
+ char src1[32];
+
+ pvr_pds_disassemble_operand(add->src0, src0, sizeof(src0));
+ pvr_pds_disassemble_operand(add->src1, src1, sizeof(src1));
+ pvr_pds_disassemble_operand(add->dst, dst, sizeof(dst));
+
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s%s = %s %s %s %s",
+ "ADD64",
+ add->cc ? "? " : "",
+ dst,
+ src0,
+ add->sna ? "-" : "+",
+ src1,
+ add->alum ? "[signed]" : "");
+}
+
+static void pvr_pds_disassemble_instruction_add32(struct pvr_add *add,
+ char *instr_str,
+ size_t instr_len)
+{
+ char dst[32];
+ char src0[32];
+ char src1[32];
+
+ pvr_pds_disassemble_operand(add->src0, src0, sizeof(src0));
+ pvr_pds_disassemble_operand(add->src1, src1, sizeof(src1));
+ pvr_pds_disassemble_operand(add->dst, dst, sizeof(dst));
+
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s%s = %s %s %s %s",
+ "ADD32",
+ add->cc ? "? " : "",
+ dst,
+ src0,
+ add->sna ? "-" : "+",
+ src1,
+ add->alum ? "[signed]" : "");
+}
+
+static void
+pvr_pds_disassemble_instruction_sftlp32(struct pvr_sftlp *instruction,
+ char *instr_str,
+ size_t instr_len)
+{
+ char dst[32];
+ char src0[32];
+ char src1[32];
+ char src2[32];
+
+ pvr_pds_disassemble_operand(instruction->src0, src0, sizeof(src0));
+ pvr_pds_disassemble_operand(instruction->src1, src1, sizeof(src1));
+ pvr_pds_disassemble_operand(instruction->dst, dst, sizeof(dst));
+
+ if (instruction->IM)
+ snprintf(src2, sizeof(src2), "%u", (uint32_t)instruction->src2->literal);
+ else
+ pvr_pds_disassemble_operand(instruction->src2, src2, sizeof(src2));
+
+ if (instruction->lop == LOP_NONE) {
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s%s = %s %s %s",
+ "SFTLP32",
+ instruction->cc ? "? " : "",
+ dst,
+ src0,
+ instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<",
+ src2);
+ } else if (instruction->lop == LOP_NOT) {
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s%s = (~%s) %s %s",
+ "SFTLP32",
+ instruction->cc ? "? " : "",
+ dst,
+ src0,
+ instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<",
+ src2);
+ } else {
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s%s = (%s %s %s) %s %s",
+ "SFTLP32",
+ instruction->cc ? "? " : "",
+ dst,
+ src0,
+ LOP[instruction->lop],
+ src1,
+ instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<",
+ src2);
+ }
+}
+
+static void pvr_pds_disassemble_instruction_stm(struct pvr_stm *instruction,
+ char *instr_str,
+ size_t instr_len)
+{
+ char src0[32];
+ char src1[32];
+ char src2[32];
+ char src3[32];
+
+ char stm_pred[64];
+
+ pvr_pds_disassemble_operand(instruction->src0, src0, sizeof(src0));
+ pvr_pds_disassemble_operand(instruction->src1, src1, sizeof(src1));
+ pvr_pds_disassemble_operand(instruction->src2, src2, sizeof(src2));
+ pvr_pds_disassemble_operand(instruction->src3, src3, sizeof(src3));
+
+ if (instruction->ccs_global)
+ snprintf(stm_pred, sizeof(stm_pred), "overflow_any");
+ else if (instruction->ccs_so)
+ snprintf(stm_pred, sizeof(stm_pred), "overflow_current");
+ else
+ stm_pred[0] = 0;
+
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s%s%s stm%u = %s, %s, %s, %s",
+ "STM",
+ instruction->cc ? "? " : "",
+ stm_pred,
+ instruction->tst ? " (TST only)" : "",
+ instruction->stream_out,
+ src0,
+ src1,
+ src2,
+ src3);
+}
+
+static void pds_disassemble_instruction_stmc(struct pvr_stmc *instruction,
+ char *instr_str,
+ size_t instr_len)
+{
+ char src0[32];
+
+ pvr_pds_disassemble_operand(instruction->src0, src0, sizeof(src0));
+
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s %s",
+ "STMC",
+ instruction->cc ? "? " : "",
+ src0);
+}
+
+static void
+pvr_pds_disassemble_instruction_sftlp64(struct pvr_sftlp *instruction,
+ char *instr_str,
+ size_t instr_len)
+{
+ char dst[32];
+ char src0[32];
+ char src1[32];
+ char src2[32];
+
+ pvr_pds_disassemble_operand(instruction->src0, src0, sizeof(src0));
+ pvr_pds_disassemble_operand(instruction->src1, src1, sizeof(src1));
+ pvr_pds_disassemble_operand(instruction->dst, dst, sizeof(dst));
+
+ if (instruction->IM)
+ snprintf(src2, sizeof(src2), "%u", (uint32_t)instruction->src2->literal);
+ else
+ pvr_pds_disassemble_operand(instruction->src2, src2, sizeof(src2));
+
+ if (instruction->lop == LOP_NONE) {
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s%s = %s %s %s",
+ "SFTLP64",
+ instruction->cc ? "? " : "",
+ dst,
+ src0,
+ instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<",
+ src2);
+ } else if (instruction->lop == LOP_NOT) {
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s%s = (~%s) %s %s",
+ "SFTLP64",
+ instruction->cc ? "? " : "",
+ dst,
+ src0,
+ instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<",
+ src2);
+ } else {
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s%s = (%s %s %s) %s %s",
+ "SFTLP64",
+ instruction->cc ? "? " : "",
+ dst,
+ src0,
+ LOP[instruction->lop],
+ src1,
+ instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<",
+ src2);
+ }
+}
+
+static void pvr_pds_disassemble_instruction_cmp(struct pvr_cmp *cmp,
+ char *instr_str,
+ size_t instr_len)
+{
+ char src0[32];
+ char src1[32];
+ static const char *const COP[] = { "=", ">", "<", "!=" };
+
+ pvr_pds_disassemble_operand(cmp->src0, src0, sizeof(src0));
+
+ if (cmp->IM) {
+ snprintf(src1,
+ sizeof(src1),
+ "%#04llx",
+ (unsigned long long)cmp->src1->literal);
+ } else {
+ pvr_pds_disassemble_operand(cmp->src1, src1, sizeof(src1));
+ }
+
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%sP0 = (%s %s %s)",
+ "CMP",
+ cmp->cc ? "? " : "",
+ src0,
+ COP[cmp->cop],
+ src1);
+}
+
+static void pvr_pds_disassemble_instruction_ldst(struct pvr_ldst *ins,
+ char *instr_str,
+ size_t instr_len)
+{
+ char src0[PVR_PDS_MAX_INST_STR_LEN];
+
+ pvr_pds_disassemble_operand(ins->src0, src0, sizeof(src0));
+
+ if (ins->st) {
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s%s: mem(%s) <= src(%s)",
+ "ST",
+ ins->cc ? "? " : "",
+ src0,
+ "?",
+ "?");
+ } else {
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s%s: dst(%s) <= mem(%s)",
+ "ld",
+ ins->cc ? "? " : "",
+ src0,
+ "?",
+ "?");
+ }
+}
+
+static void pvr_pds_disassemble_simple(struct pvr_simple *simple,
+ const char *type,
+ char *instr_str,
+ size_t instr_len)
+{
+ snprintf(instr_str, instr_len, "%-16s%s", type, simple->cc ? "? " : "");
+}
+
+static void pvr_pds_disassemble_instruction_limm(struct pvr_limm *limm,
+ char *instr_str,
+ size_t instr_len)
+{
+ int32_t imm = (uint32_t)limm->src0->literal;
+ char dst[PVR_PDS_MAX_INST_STR_LEN];
+
+ pvr_pds_disassemble_operand(limm->dst, dst, sizeof(dst));
+
+ if (limm->GR) {
+ char *pchGReg;
+
+ switch (imm) {
+ case 0:
+ pchGReg = "cluster";
+ break;
+ case 1:
+ pchGReg = "instance";
+ break;
+ default:
+ pchGReg = "unknown";
+ }
+
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s%s = G%d (%s)",
+ "LIMM",
+ limm->cc ? "? " : "",
+ dst,
+ imm,
+ pchGReg);
+ } else {
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s%s = %#04x",
+ "LIMM",
+ limm->cc ? "? " : "",
+ dst,
+ imm);
+ }
+}
+
+static void pvr_pds_disassemble_instruction_ddmad(struct pvr_ddmad *ddmad,
+ char *instr_str,
+ size_t instr_len)
+{
+ char src0[PVR_PDS_MAX_INST_STR_LEN];
+ char src1[PVR_PDS_MAX_INST_STR_LEN];
+ char src2[PVR_PDS_MAX_INST_STR_LEN];
+ char src3[PVR_PDS_MAX_INST_STR_LEN];
+
+ pvr_pds_disassemble_operand(ddmad->src0, src0, sizeof(src0));
+ pvr_pds_disassemble_operand(ddmad->src1, src1, sizeof(src1));
+ pvr_pds_disassemble_operand(ddmad->src2, src2, sizeof(src2));
+ pvr_pds_disassemble_operand(ddmad->src3, src3, sizeof(src3));
+
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%sdoutd = (%s * %s) + %s, %s%s",
+ "DDMAD",
+ ddmad->cc ? "? " : "",
+ src0,
+ src1,
+ src2,
+ src3,
+ ddmad->END ? "; HALT" : "");
+}
+
+static void pvr_pds_disassemble_predicate(uint32_t predicate,
+ char *buffer,
+ size_t buffer_length)
+{
+ switch (predicate) {
+ case PVR_ROGUE_PDSINST_PREDICATE_P0:
+ snprintf(buffer, buffer_length, "%s", "p0");
+ break;
+ case PVR_ROGUE_PDSINST_PREDICATE_IF0:
+ snprintf(buffer, buffer_length, "%s", "if0");
+ break;
+ case PVR_ROGUE_PDSINST_PREDICATE_IF1:
+ snprintf(buffer, buffer_length, "%s", "if1");
+ break;
+ case PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_0:
+ snprintf(buffer, buffer_length, "%s", "so_overflow_0");
+ break;
+ case PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_1:
+ snprintf(buffer, buffer_length, "%s", "so_overflow_1");
+ break;
+ case PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_2:
+ snprintf(buffer, buffer_length, "%s", "so_overflow_2");
+ break;
+ case PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_3:
+ snprintf(buffer, buffer_length, "%s", "so_overflow_3");
+ break;
+ case PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_GLOBAL:
+ snprintf(buffer, buffer_length, "%s", "so_overflow_any");
+ break;
+ case PVR_ROGUE_PDSINST_PREDICATE_KEEP:
+ snprintf(buffer, buffer_length, "%s", "keep");
+ break;
+ case PVR_ROGUE_PDSINST_PREDICATE_OOB:
+ snprintf(buffer, buffer_length, "%s", "oob");
+ break;
+ default:
+ snprintf(buffer, buffer_length, "%s", "<ERROR>");
+ break;
+ }
+}
+
+static void pvr_pds_disassemble_instruction_bra(struct pvr_bra *bra,
+ char *instr_str,
+ size_t instr_len)
+{
+ char setc_pred[32];
+ char srcc_pred[32];
+
+ pvr_pds_disassemble_predicate(bra->srcc->predicate,
+ srcc_pred,
+ sizeof(srcc_pred));
+ pvr_pds_disassemble_predicate(bra->setc->predicate,
+ setc_pred,
+ sizeof(setc_pred));
+
+ if (bra->setc->predicate != PVR_ROGUE_PDSINST_PREDICATE_KEEP) {
+ snprintf(instr_str,
+ instr_len,
+ "%-16sif %s%s %d ( setc = %s )",
+ "BRA",
+ bra->srcc->negate ? "! " : "",
+ srcc_pred,
+ bra->address,
+ setc_pred);
+ } else {
+ snprintf(instr_str,
+ instr_len,
+ "%-16sif %s%s %d",
+ "BRA",
+ bra->srcc->negate ? "! " : "",
+ srcc_pred,
+ bra->address);
+ }
+}
+
+static void pvr_pds_disassemble_instruction_mad(struct pvr_mad *mad,
+ char *instr_str,
+ size_t instr_len)
+{
+ char src0[PVR_PDS_MAX_INST_STR_LEN];
+ char src1[PVR_PDS_MAX_INST_STR_LEN];
+ char src2[PVR_PDS_MAX_INST_STR_LEN];
+ char dst[PVR_PDS_MAX_INST_STR_LEN];
+
+ pvr_pds_disassemble_operand(mad->src0, src0, sizeof(src0));
+ pvr_pds_disassemble_operand(mad->src1, src1, sizeof(src1));
+ pvr_pds_disassemble_operand(mad->src2, src2, sizeof(src2));
+ pvr_pds_disassemble_operand(mad->dst, dst, sizeof(dst));
+
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s%s = (%s * %s) %s %s%s",
+ "MAD",
+ mad->cc ? "? " : "",
+ dst,
+ src0,
+ src1,
+ mad->sna ? "-" : "+",
+ src2,
+ mad->alum ? " [signed]" : "");
+}
+
+static void pvr_pds_disassemble_instruction_dout(struct pvr_dout *dout,
+ char *instr_str,
+ size_t instr_len)
+{
+ char src0[PVR_PDS_MAX_INST_STR_LEN];
+ char src1[PVR_PDS_MAX_INST_STR_LEN];
+
+#define X(dout_dst, str) #str,
+ static const char *const dst[] = { PVR_PDS_DOUT_DSTS };
+#undef X
+
+ pvr_pds_disassemble_operand(dout->src0, src0, sizeof(src0));
+ pvr_pds_disassemble_operand(dout->src1, src1, sizeof(src1));
+
+ {
+ snprintf(instr_str,
+ instr_len,
+ "%-16s%s%s = %s, %s%s",
+ "DOUT",
+ dout->cc ? "? " : "",
+ dst[dout->dst],
+ src0,
+ src1,
+ dout->END ? "; HALT" : "");
+ }
+}
+
+void pvr_pds_disassemble_instruction(char *instr_str,
+ size_t instr_len,
+ struct pvr_instruction *instruction)
+{
+ if (!instruction) {
+ snprintf(instr_str,
+ instr_len,
+ "Instruction was not disassembled properly\n");
+ return;
+ }
+
+ switch (instruction->type) {
+ case INS_LIMM:
+ pvr_pds_disassemble_instruction_limm((struct pvr_limm *)instruction,
+ instr_str,
+ instr_len);
+ break;
+ case INS_ADD64:
+ pvr_pds_disassemble_instruction_add64((struct pvr_add *)instruction,
+ instr_str,
+ instr_len);
+ break;
+ case INS_ADD32:
+ pvr_pds_disassemble_instruction_add32((struct pvr_add *)instruction,
+ instr_str,
+ instr_len);
+ break;
+ case INS_CMP:
+ pvr_pds_disassemble_instruction_cmp((struct pvr_cmp *)instruction,
+ instr_str,
+ instr_len);
+ break;
+ case INS_MAD:
+ pvr_pds_disassemble_instruction_mad((struct pvr_mad *)instruction,
+ instr_str,
+ instr_len);
+ break;
+ case INS_BRA:
+ pvr_pds_disassemble_instruction_bra((struct pvr_bra *)instruction,
+ instr_str,
+ instr_len);
+ break;
+ case INS_DDMAD:
+ pvr_pds_disassemble_instruction_ddmad((struct pvr_ddmad *)instruction,
+ instr_str,
+ instr_len);
+ break;
+ case INS_DOUT:
+ pvr_pds_disassemble_instruction_dout((struct pvr_dout *)instruction,
+ instr_str,
+ instr_len);
+ break;
+ case INS_LD:
+ case INS_ST:
+ pvr_pds_disassemble_instruction_ldst((struct pvr_ldst *)instruction,
+ instr_str,
+ instr_len);
+ break;
+ case INS_WDF:
+ pvr_pds_disassemble_simple((struct pvr_simple *)instruction,
+ "WDF",
+ instr_str,
+ instr_len);
+ break;
+ case INS_LOCK:
+ pvr_pds_disassemble_simple((struct pvr_simple *)instruction,
+ "LOCK",
+ instr_str,
+ instr_len);
+ break;
+ case INS_RELEASE:
+ pvr_pds_disassemble_simple((struct pvr_simple *)instruction,
+ "RELEASE",
+ instr_str,
+ instr_len);
+ break;
+ case INS_HALT:
+ pvr_pds_disassemble_simple((struct pvr_simple *)instruction,
+ "HALT",
+ instr_str,
+ instr_len);
+ break;
+ case INS_NOP:
+ pvr_pds_disassemble_simple((struct pvr_simple *)instruction,
+ "NOP",
+ instr_str,
+ instr_len);
+ break;
+ case INS_SFTLP32:
+ pvr_pds_disassemble_instruction_sftlp32((struct pvr_sftlp *)instruction,
+ instr_str,
+ instr_len);
+ break;
+ case INS_SFTLP64:
+ pvr_pds_disassemble_instruction_sftlp64((struct pvr_sftlp *)instruction,
+ instr_str,
+ instr_len);
+ break;
+ case INS_STM:
+ pvr_pds_disassemble_instruction_stm((struct pvr_stm *)instruction,
+ instr_str,
+ instr_len);
+ break;
+ case INS_STMC:
+ pds_disassemble_instruction_stmc((struct pvr_stmc *)instruction,
+ instr_str,
+ instr_len);
+ break;
+ default:
+ snprintf(instr_str, instr_len, "Printing not implemented\n");
+ break;
+ }
+}
+
+#if defined(DUMP_PDS)
+void pvr_pds_print_instruction(uint32_t instr)
+{
+ char instruction_str[1024];
+ struct pvr_instruction *decoded =
+ pvr_pds_disassemble_instruction2(0, 0, instr);
+
+ if (!decoded) {
+ mesa_logd("%X\n", instr);
+ } else {
+ pvr_pds_disassemble_instruction(instruction_str,
+ sizeof(instruction_str),
+ decoded);
+ mesa_logd("\t0x%08x, /* %s */\n", instr, instruction_str);
+ }
+}
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS0_H
+#define PVR_DRAW_INDIRECTARRAYS0_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays0_code[15] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1940000, /* LIMM temp[5].32 = 0000 */
+ 0x500c0804, /* SFTLP32 temp[4].32 = temp[1].32 << 0 */
+ 0xb1880000, /* CMP P0 = (temp[4].64 = 0000) */
+ 0xd9800000, /* LIMM ? temp[0].32 = 0000 */
+ 0xd9840000, /* LIMM ? temp[1].32 = 0000 */
+ 0x04081023, /* MAD temp[6].64 = (temp[1].32 * const[2].32) +
+ const[4].64 */
+ 0x50343001, /* SFTLP32 temp[1].32 = temp[6].32 << 0 */
+ 0x912040c1, /* ADD32 temp[1].32 = temp[1].32 - const[3].32 */
+ 0xd0800003, /* ST const[6].64: mem(?) <= src(?) */
+ 0xd0000004, /* LD const[8].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf40a4003, /* DOUT doutv = temp[0].64, const[10].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_arrays0_program = {
+ pvr_draw_indirect_arrays0_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 4, /* number of constant mappings */
+
+ 12, /* size of data segment, in dwords, aligned to 4 */
+ 16, /* size of code segment, in dwords, aligned to 4 */
+ 12, /* size of temp segment, in dwords, aligned to 4 */
+ 11, /* size of data segment, in dwords */
+ 15, /* size of code segment, in dwords */
+ 10, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_arrays0_di_data(buffer, addr, device) \
+ do { \
+ uint64_t data = ((addr) | (0x40000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays0_write_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x30000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays0_flush_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x1940000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays0_num_views(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays0_immediates(buffer) \
+ do { \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS1_H
+#define PVR_DRAW_INDIRECTARRAYS1_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays1_code[15] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1940000, /* LIMM temp[5].32 = 0000 */
+ 0x50141004, /* SFTLP32 temp[4].32 = temp[2].32 << 0 */
+ 0xb1880000, /* CMP P0 = (temp[4].64 = 0000) */
+ 0xd9840000, /* LIMM ? temp[1].32 = 0000 */
+ 0xd9880000, /* LIMM ? temp[2].32 = 0000 */
+ 0x04101023, /* MAD temp[6].64 = (temp[2].32 * const[2].32) +
+ const[4].64 */
+ 0x50343002, /* SFTLP32 temp[2].32 = temp[6].32 << 0 */
+ 0x912080c2, /* ADD32 temp[2].32 = temp[2].32 - const[3].32 */
+ 0xd0800003, /* ST const[6].64: mem(?) <= src(?) */
+ 0xd0000004, /* LD const[8].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf40a4003, /* DOUT doutv = temp[0].64, const[10].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_arrays1_program = {
+ pvr_draw_indirect_arrays1_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 4, /* number of constant mappings */
+
+ 12, /* size of data segment, in dwords, aligned to 4 */
+ 16, /* size of code segment, in dwords, aligned to 4 */
+ 12, /* size of temp segment, in dwords, aligned to 4 */
+ 11, /* size of data segment, in dwords */
+ 15, /* size of code segment, in dwords */
+ 10, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_arrays1_di_data(buffer, addr, device) \
+ do { \
+ uint64_t data = ((addr) | (0x40000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays1_write_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x430000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays1_flush_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x1940000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays1_num_views(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays1_immediates(buffer) \
+ do { \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS2_H
+#define PVR_DRAW_INDIRECTARRAYS2_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays2_code[15] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1840000, /* LIMM temp[1].32 = 0000 */
+ 0x501c1800, /* SFTLP32 temp[0].32 = temp[3].32 << 0 */
+ 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */
+ 0xd9880000, /* LIMM ? temp[2].32 = 0000 */
+ 0xd98c0000, /* LIMM ? temp[3].32 = 0000 */
+ 0x04181023, /* MAD temp[6].64 = (temp[3].32 * const[2].32) +
+ const[4].64 */
+ 0x50343003, /* SFTLP32 temp[3].32 = temp[6].32 << 0 */
+ 0x9120c0c3, /* ADD32 temp[3].32 = temp[3].32 - const[3].32 */
+ 0xd0800003, /* ST const[6].64: mem(?) <= src(?) */
+ 0xd0000004, /* LD const[8].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf40a4003, /* DOUT doutv = temp[0].64, const[10].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_arrays2_program = {
+ pvr_draw_indirect_arrays2_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 4, /* number of constant mappings */
+
+ 12, /* size of data segment, in dwords, aligned to 4 */
+ 16, /* size of code segment, in dwords, aligned to 4 */
+ 12, /* size of temp segment, in dwords, aligned to 4 */
+ 11, /* size of data segment, in dwords */
+ 15, /* size of code segment, in dwords */
+ 10, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_arrays2_di_data(buffer, addr, device) \
+ do { \
+ uint64_t data = ((addr) | (0x60000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays2_write_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x830000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays2_flush_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x1940000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays2_num_views(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays2_immediates(buffer) \
+ do { \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS3_H
+#define PVR_DRAW_INDIRECTARRAYS3_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays3_code[15] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1840000, /* LIMM temp[1].32 = 0000 */
+ 0x50242000, /* SFTLP32 temp[0].32 = temp[4].32 << 0 */
+ 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */
+ 0xd98c0000, /* LIMM ? temp[3].32 = 0000 */
+ 0xd9900000, /* LIMM ? temp[4].32 = 0000 */
+ 0x04201023, /* MAD temp[6].64 = (temp[4].32 * const[2].32) +
+ const[4].64 */
+ 0x50343004, /* SFTLP32 temp[4].32 = temp[6].32 << 0 */
+ 0x912100c4, /* ADD32 temp[4].32 = temp[4].32 - const[3].32 */
+ 0xd0800003, /* ST const[6].64: mem(?) <= src(?) */
+ 0xd0000004, /* LD const[8].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf40a4003, /* DOUT doutv = temp[0].64, const[10].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_arrays3_program = {
+ pvr_draw_indirect_arrays3_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 4, /* number of constant mappings */
+
+ 12, /* size of data segment, in dwords, aligned to 4 */
+ 16, /* size of code segment, in dwords, aligned to 4 */
+ 12, /* size of temp segment, in dwords, aligned to 4 */
+ 11, /* size of data segment, in dwords */
+ 15, /* size of code segment, in dwords */
+ 10, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_arrays3_di_data(buffer, addr, device) \
+ do { \
+ uint64_t data = ((addr) | (0x60000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays3_write_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0xc30000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays3_flush_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x1940000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays3_num_views(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays3_immediates(buffer) \
+ do { \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE0_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE0_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance0_code[18] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1940000, /* LIMM temp[5].32 = 0000 */
+ 0x500c0804, /* SFTLP32 temp[4].32 = temp[1].32 << 0 */
+ 0xb1880000, /* CMP P0 = (temp[4].64 = 0000) */
+ 0xd9800000, /* LIMM ? temp[0].32 = 0000 */
+ 0xd9840000, /* LIMM ? temp[1].32 = 0000 */
+ 0x04081023, /* MAD temp[6].64 = (temp[1].32 * const[2].32) +
+ const[4].64 */
+ 0x50343001, /* SFTLP32 temp[1].32 = temp[6].32 << 0 */
+ 0x912040c1, /* ADD32 temp[1].32 = temp[1].32 - const[3].32 */
+ 0x9001a0a0, /* ADD32 ptemp[0].32 = const[6].32 + temp[2].32 */
+ 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */
+ 0x9001a0e1, /* ADD32 ptemp[1].32 = const[6].32 + temp[3].32 */
+ 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */
+ 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_arrays_base_instance0_program = {
+ pvr_draw_indirect_arrays_base_instance0_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 4, /* number of constant mappings */
+
+ 12, /* size of data segment, in dwords, aligned to 4 */
+ 20, /* size of code segment, in dwords, aligned to 4 */
+ 12, /* size of temp segment, in dwords, aligned to 4 */
+ 12, /* size of data segment, in dwords */
+ 18, /* size of code segment, in dwords */
+ 10, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_arrays_base_instance0_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x40000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance0_write_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x30000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance0_flush_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x1940000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance0_num_views(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance0_immediates(buffer) \
+ do { \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE1_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE1_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance1_code[18] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd19c0000, /* LIMM temp[7].32 = 0000 */
+ 0x50141006, /* SFTLP32 temp[6].32 = temp[2].32 << 0 */
+ 0xb18c0000, /* CMP P0 = (temp[6].64 = 0000) */
+ 0xd9840000, /* LIMM ? temp[1].32 = 0000 */
+ 0xd9880000, /* LIMM ? temp[2].32 = 0000 */
+ 0x04101024, /* MAD temp[8].64 = (temp[2].32 * const[2].32) +
+ const[4].64 */
+ 0x50444002, /* SFTLP32 temp[2].32 = temp[8].32 << 0 */
+ 0x912080c2, /* ADD32 temp[2].32 = temp[2].32 - const[3].32 */
+ 0x9001a0e0, /* ADD32 ptemp[0].32 = const[6].32 + temp[3].32 */
+ 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */
+ 0x9001a121, /* ADD32 ptemp[1].32 = const[6].32 + temp[4].32 */
+ 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */
+ 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_arrays_base_instance1_program = {
+ pvr_draw_indirect_arrays_base_instance1_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 4, /* number of constant mappings */
+
+ 12, /* size of data segment, in dwords, aligned to 4 */
+ 20, /* size of code segment, in dwords, aligned to 4 */
+ 12, /* size of temp segment, in dwords, aligned to 4 */
+ 12, /* size of data segment, in dwords */
+ 18, /* size of code segment, in dwords */
+ 12, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_arrays_base_instance1_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x60000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance1_write_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x430000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance1_flush_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x2140000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance1_num_views(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance1_immediates(buffer) \
+ do { \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE2_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE2_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance2_code[18] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1840000, /* LIMM temp[1].32 = 0000 */
+ 0x501c1800, /* SFTLP32 temp[0].32 = temp[3].32 << 0 */
+ 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */
+ 0xd9880000, /* LIMM ? temp[2].32 = 0000 */
+ 0xd98c0000, /* LIMM ? temp[3].32 = 0000 */
+ 0x04181023, /* MAD temp[6].64 = (temp[3].32 * const[2].32) +
+ const[4].64 */
+ 0x50343003, /* SFTLP32 temp[3].32 = temp[6].32 << 0 */
+ 0x9120c0c3, /* ADD32 temp[3].32 = temp[3].32 - const[3].32 */
+ 0x9001a120, /* ADD32 ptemp[0].32 = const[6].32 + temp[4].32 */
+ 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */
+ 0x9001a161, /* ADD32 ptemp[1].32 = const[6].32 + temp[5].32 */
+ 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */
+ 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_arrays_base_instance2_program = {
+ pvr_draw_indirect_arrays_base_instance2_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 4, /* number of constant mappings */
+
+ 12, /* size of data segment, in dwords, aligned to 4 */
+ 20, /* size of code segment, in dwords, aligned to 4 */
+ 12, /* size of temp segment, in dwords, aligned to 4 */
+ 12, /* size of data segment, in dwords */
+ 18, /* size of code segment, in dwords */
+ 10, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_arrays_base_instance2_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x60000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance2_write_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x830000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance2_flush_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x1940000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance2_num_views(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance2_immediates(buffer) \
+ do { \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE3_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE3_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance3_code[18] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1840000, /* LIMM temp[1].32 = 0000 */
+ 0x50242000, /* SFTLP32 temp[0].32 = temp[4].32 << 0 */
+ 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */
+ 0xd98c0000, /* LIMM ? temp[3].32 = 0000 */
+ 0xd9900000, /* LIMM ? temp[4].32 = 0000 */
+ 0x04201024, /* MAD temp[8].64 = (temp[4].32 * const[2].32) +
+ const[4].64 */
+ 0x50444004, /* SFTLP32 temp[4].32 = temp[8].32 << 0 */
+ 0x912100c4, /* ADD32 temp[4].32 = temp[4].32 - const[3].32 */
+ 0x9001a160, /* ADD32 ptemp[0].32 = const[6].32 + temp[5].32 */
+ 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */
+ 0x9001a1a1, /* ADD32 ptemp[1].32 = const[6].32 + temp[6].32 */
+ 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */
+ 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_arrays_base_instance3_program = {
+ pvr_draw_indirect_arrays_base_instance3_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 4, /* number of constant mappings */
+
+ 12, /* size of data segment, in dwords, aligned to 4 */
+ 20, /* size of code segment, in dwords, aligned to 4 */
+ 12, /* size of temp segment, in dwords, aligned to 4 */
+ 12, /* size of data segment, in dwords */
+ 18, /* size of code segment, in dwords */
+ 12, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_arrays_base_instance3_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x80000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance3_write_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0xc30000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance3_flush_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x2140000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance3_num_views(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance3_immediates(buffer) \
+ do { \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID0_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID0_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance_drawid0_code[18] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1940000, /* LIMM temp[5].32 = 0000 */
+ 0x500c0804, /* SFTLP32 temp[4].32 = temp[1].32 << 0 */
+ 0xb1880000, /* CMP P0 = (temp[4].64 = 0000) */
+ 0xd9800000, /* LIMM ? temp[0].32 = 0000 */
+ 0xd9840000, /* LIMM ? temp[1].32 = 0000 */
+ 0x04081023, /* MAD temp[6].64 = (temp[1].32 * const[2].32) +
+ const[4].64 */
+ 0x50343001, /* SFTLP32 temp[1].32 = temp[6].32 << 0 */
+ 0x912040c1, /* ADD32 temp[1].32 = temp[1].32 - const[3].32 */
+ 0x9001a0a0, /* ADD32 ptemp[0].32 = const[6].32 + temp[2].32 */
+ 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */
+ 0x9001a0e1, /* ADD32 ptemp[1].32 = const[6].32 + temp[3].32 */
+ 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */
+ 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_arrays_base_instance_drawid0_program = {
+ pvr_draw_indirect_arrays_base_instance_drawid0_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 4, /* number of constant mappings */
+
+ 12, /* size of data segment, in dwords, aligned to 4 */
+ 20, /* size of code segment, in dwords, aligned to 4 */
+ 12, /* size of temp segment, in dwords, aligned to 4 */
+ 12, /* size of data segment, in dwords */
+ 18, /* size of code segment, in dwords */
+ 10, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_arrays_base_instance_drawid0_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x40000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid0_write_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x30000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid0_flush_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x1940000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid0_num_views(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid0_immediates( \
+ buffer) \
+ do { \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID1_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID1_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance_drawid1_code[18] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd19c0000, /* LIMM temp[7].32 = 0000 */
+ 0x50141006, /* SFTLP32 temp[6].32 = temp[2].32 << 0 */
+ 0xb18c0000, /* CMP P0 = (temp[6].64 = 0000) */
+ 0xd9840000, /* LIMM ? temp[1].32 = 0000 */
+ 0xd9880000, /* LIMM ? temp[2].32 = 0000 */
+ 0x04101024, /* MAD temp[8].64 = (temp[2].32 * const[2].32) +
+ const[4].64 */
+ 0x50444002, /* SFTLP32 temp[2].32 = temp[8].32 << 0 */
+ 0x912080c2, /* ADD32 temp[2].32 = temp[2].32 - const[3].32 */
+ 0x9001a0e0, /* ADD32 ptemp[0].32 = const[6].32 + temp[3].32 */
+ 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */
+ 0x9001a121, /* ADD32 ptemp[1].32 = const[6].32 + temp[4].32 */
+ 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */
+ 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_arrays_base_instance_drawid1_program = {
+ pvr_draw_indirect_arrays_base_instance_drawid1_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 4, /* number of constant mappings */
+
+ 12, /* size of data segment, in dwords, aligned to 4 */
+ 20, /* size of code segment, in dwords, aligned to 4 */
+ 12, /* size of temp segment, in dwords, aligned to 4 */
+ 12, /* size of data segment, in dwords */
+ 18, /* size of code segment, in dwords */
+ 12, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_arrays_base_instance_drawid1_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x60000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid1_write_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x430000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid1_flush_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x2140000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid1_num_views(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid1_immediates( \
+ buffer) \
+ do { \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID2_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID2_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance_drawid2_code[18] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1840000, /* LIMM temp[1].32 = 0000 */
+ 0x501c1800, /* SFTLP32 temp[0].32 = temp[3].32 << 0 */
+ 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */
+ 0xd9880000, /* LIMM ? temp[2].32 = 0000 */
+ 0xd98c0000, /* LIMM ? temp[3].32 = 0000 */
+ 0x04181023, /* MAD temp[6].64 = (temp[3].32 * const[2].32) +
+ const[4].64 */
+ 0x50343003, /* SFTLP32 temp[3].32 = temp[6].32 << 0 */
+ 0x9120c0c3, /* ADD32 temp[3].32 = temp[3].32 - const[3].32 */
+ 0x9001a120, /* ADD32 ptemp[0].32 = const[6].32 + temp[4].32 */
+ 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */
+ 0x9001a161, /* ADD32 ptemp[1].32 = const[6].32 + temp[5].32 */
+ 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */
+ 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_arrays_base_instance_drawid2_program = {
+ pvr_draw_indirect_arrays_base_instance_drawid2_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 4, /* number of constant mappings */
+
+ 12, /* size of data segment, in dwords, aligned to 4 */
+ 20, /* size of code segment, in dwords, aligned to 4 */
+ 12, /* size of temp segment, in dwords, aligned to 4 */
+ 12, /* size of data segment, in dwords */
+ 18, /* size of code segment, in dwords */
+ 10, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_arrays_base_instance_drawid2_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x60000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid2_write_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x830000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid2_flush_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x1940000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid2_num_views(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid2_immediates( \
+ buffer) \
+ do { \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID3_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID3_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance_drawid3_code[18] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1840000, /* LIMM temp[1].32 = 0000 */
+ 0x50242000, /* SFTLP32 temp[0].32 = temp[4].32 << 0 */
+ 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */
+ 0xd98c0000, /* LIMM ? temp[3].32 = 0000 */
+ 0xd9900000, /* LIMM ? temp[4].32 = 0000 */
+ 0x04201024, /* MAD temp[8].64 = (temp[4].32 * const[2].32) +
+ const[4].64 */
+ 0x50444004, /* SFTLP32 temp[4].32 = temp[8].32 << 0 */
+ 0x912100c4, /* ADD32 temp[4].32 = temp[4].32 - const[3].32 */
+ 0x9001a160, /* ADD32 ptemp[0].32 = const[6].32 + temp[5].32 */
+ 0xd0800004, /* ST const[8].64: mem(?) <= src(?) */
+ 0x9001a1a1, /* ADD32 ptemp[1].32 = const[6].32 + temp[6].32 */
+ 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */
+ 0xd0000005, /* LD const[10].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4064003, /* DOUT doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_arrays_base_instance_drawid3_program = {
+ pvr_draw_indirect_arrays_base_instance_drawid3_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 4, /* number of constant mappings */
+
+ 12, /* size of data segment, in dwords, aligned to 4 */
+ 20, /* size of code segment, in dwords, aligned to 4 */
+ 12, /* size of temp segment, in dwords, aligned to 4 */
+ 12, /* size of data segment, in dwords */
+ 18, /* size of code segment, in dwords */
+ 12, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_arrays_base_instance_drawid3_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x80000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid3_write_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0xc30000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid3_flush_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x2140000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid3_num_views(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid3_immediates( \
+ buffer) \
+ do { \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS0_H
+#define PVR_DRAW_INDIRECTELEMENTS0_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements0_code[21] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0x9000a0e0, /* ADD32 ptemp[0].32 = const[2].32 + temp[3].32 */
+ 0x04101822, /* MAD temp[4].64 = (temp[2].32 * const[3].32) +
+ const[4].64 */
+ 0x53283006, /* SFTLP32 temp[6].32 = (temp[5].32 | const[6].32) << 0
+ */
+ 0x50242007, /* SFTLP32 temp[7].32 = temp[4].32 << 0 */
+ 0x04083842, /* MAD temp[4].64 = (temp[1].32 * const[7].32) +
+ const[8].64 */
+ 0x50242001, /* SFTLP32 temp[1].32 = temp[4].32 << 0 */
+ 0x50040008, /* SFTLP32 temp[8].32 = temp[0].32 << 0 */
+ 0x91204289, /* ADD32 temp[9].32 = temp[1].32 - const[10].32 */
+ 0x501c180a, /* SFTLP32 temp[10].32 = temp[3].32 << 0 */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1940000, /* LIMM temp[5].32 = 0000 */
+ 0x500c0804, /* SFTLP32 temp[4].32 = temp[1].32 << 0 */
+ 0xb1880000, /* CMP P0 = (temp[4].64 = 0000) */
+ 0xd9a00000, /* LIMM ? temp[8].32 = 0000 */
+ 0xd9a40000, /* LIMM ? temp[9].32 = 0000 */
+ 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */
+ 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_elements0_program = {
+ pvr_draw_indirect_elements0_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 7, /* number of constant mappings */
+
+ 16, /* size of data segment, in dwords, aligned to 4 */
+ 24, /* size of code segment, in dwords, aligned to 4 */
+ 20, /* size of temp segment, in dwords, aligned to 4 */
+ 16, /* size of data segment, in dwords */
+ 21, /* size of code segment, in dwords */
+ 18, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_elements0_di_data(buffer, addr, device) \
+ do { \
+ uint64_t data = ((addr) | (0x40000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements0_write_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x1850000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 12, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements0_flush_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x3160000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 14, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements0_idx_stride(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements0_idx_base(buffer, value) \
+ do { \
+ uint64_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements0_idx_header(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements0_num_views(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 7, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements0_immediates(buffer) \
+ do { \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS1_H
+#define PVR_DRAW_INDIRECTELEMENTS1_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements1_code[21] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0x9000a120, /* ADD32 ptemp[0].32 = const[2].32 + temp[4].32 */
+ 0x04181823, /* MAD temp[6].64 = (temp[3].32 * const[3].32) +
+ const[4].64 */
+ 0x53383008, /* SFTLP32 temp[8].32 = (temp[7].32 | const[6].32) << 0
+ */
+ 0x50343009, /* SFTLP32 temp[9].32 = temp[6].32 << 0 */
+ 0x04103843, /* MAD temp[6].64 = (temp[2].32 * const[7].32) +
+ const[8].64 */
+ 0x50343002, /* SFTLP32 temp[2].32 = temp[6].32 << 0 */
+ 0x500c080a, /* SFTLP32 temp[10].32 = temp[1].32 << 0 */
+ 0x9120828b, /* ADD32 temp[11].32 = temp[2].32 - const[10].32 */
+ 0x5024200c, /* SFTLP32 temp[12].32 = temp[4].32 << 0 */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd19c0000, /* LIMM temp[7].32 = 0000 */
+ 0x50141006, /* SFTLP32 temp[6].32 = temp[2].32 << 0 */
+ 0xb18c0000, /* CMP P0 = (temp[6].64 = 0000) */
+ 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */
+ 0xd9ac0000, /* LIMM ? temp[11].32 = 0000 */
+ 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */
+ 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_elements1_program = {
+ pvr_draw_indirect_elements1_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 7, /* number of constant mappings */
+
+ 16, /* size of data segment, in dwords, aligned to 4 */
+ 24, /* size of code segment, in dwords, aligned to 4 */
+ 20, /* size of temp segment, in dwords, aligned to 4 */
+ 16, /* size of data segment, in dwords */
+ 21, /* size of code segment, in dwords */
+ 20, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_elements1_di_data(buffer, addr, device) \
+ do { \
+ uint64_t data = ((addr) | (0x60000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements1_write_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x2050000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 12, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements1_flush_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x3960000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 14, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements1_idx_stride(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements1_idx_base(buffer, value) \
+ do { \
+ uint64_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements1_idx_header(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements1_num_views(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 7, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements1_immediates(buffer) \
+ do { \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS2_H
+#define PVR_DRAW_INDIRECTELEMENTS2_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements2_code[21] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0x9000a160, /* ADD32 ptemp[0].32 = const[2].32 + temp[5].32 */
+ 0x04201820, /* MAD temp[0].64 = (temp[4].32 * const[3].32) +
+ const[4].64 */
+ 0x53083006, /* SFTLP32 temp[6].32 = (temp[1].32 | const[6].32) << 0
+ */
+ 0x50040007, /* SFTLP32 temp[7].32 = temp[0].32 << 0 */
+ 0x04183840, /* MAD temp[0].64 = (temp[3].32 * const[7].32) +
+ const[8].64 */
+ 0x50040003, /* SFTLP32 temp[3].32 = temp[0].32 << 0 */
+ 0x50141008, /* SFTLP32 temp[8].32 = temp[2].32 << 0 */
+ 0x9120c289, /* ADD32 temp[9].32 = temp[3].32 - const[10].32 */
+ 0x502c280a, /* SFTLP32 temp[10].32 = temp[5].32 << 0 */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1840000, /* LIMM temp[1].32 = 0000 */
+ 0x501c1800, /* SFTLP32 temp[0].32 = temp[3].32 << 0 */
+ 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */
+ 0xd9a00000, /* LIMM ? temp[8].32 = 0000 */
+ 0xd9a40000, /* LIMM ? temp[9].32 = 0000 */
+ 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */
+ 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_elements2_program = {
+ pvr_draw_indirect_elements2_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 7, /* number of constant mappings */
+
+ 16, /* size of data segment, in dwords, aligned to 4 */
+ 24, /* size of code segment, in dwords, aligned to 4 */
+ 20, /* size of temp segment, in dwords, aligned to 4 */
+ 16, /* size of data segment, in dwords */
+ 21, /* size of code segment, in dwords */
+ 18, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_elements2_di_data(buffer, addr, device) \
+ do { \
+ uint64_t data = ((addr) | (0x60000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements2_write_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x1850000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 12, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements2_flush_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x3160000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 14, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements2_idx_stride(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements2_idx_base(buffer, value) \
+ do { \
+ uint64_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements2_idx_header(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements2_num_views(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 7, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements2_immediates(buffer) \
+ do { \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS3_H
+#define PVR_DRAW_INDIRECTELEMENTS3_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements3_code[21] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0x9000a1a0, /* ADD32 ptemp[0].32 = const[2].32 + temp[6].32 */
+ 0x04281820, /* MAD temp[0].64 = (temp[5].32 * const[3].32) +
+ const[4].64 */
+ 0x53083007, /* SFTLP32 temp[7].32 = (temp[1].32 | const[6].32) << 0
+ */
+ 0x50040008, /* SFTLP32 temp[8].32 = temp[0].32 << 0 */
+ 0x04203840, /* MAD temp[0].64 = (temp[4].32 * const[7].32) +
+ const[8].64 */
+ 0x50040004, /* SFTLP32 temp[4].32 = temp[0].32 << 0 */
+ 0x501c1809, /* SFTLP32 temp[9].32 = temp[3].32 << 0 */
+ 0x9121028a, /* ADD32 temp[10].32 = temp[4].32 - const[10].32 */
+ 0x5034300b, /* SFTLP32 temp[11].32 = temp[6].32 << 0 */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1840000, /* LIMM temp[1].32 = 0000 */
+ 0x50242000, /* SFTLP32 temp[0].32 = temp[4].32 << 0 */
+ 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */
+ 0xd9a40000, /* LIMM ? temp[9].32 = 0000 */
+ 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */
+ 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */
+ 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_elements3_program = {
+ pvr_draw_indirect_elements3_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 7, /* number of constant mappings */
+
+ 16, /* size of data segment, in dwords, aligned to 4 */
+ 24, /* size of code segment, in dwords, aligned to 4 */
+ 20, /* size of temp segment, in dwords, aligned to 4 */
+ 16, /* size of data segment, in dwords */
+ 21, /* size of code segment, in dwords */
+ 18, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_elements3_di_data(buffer, addr, device) \
+ do { \
+ uint64_t data = ((addr) | (0x80000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements3_write_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x1c50000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 12, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements3_flush_vdm(buffer, addr) \
+ do { \
+ uint64_t data = ((addr) | (0x3160000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 14, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements3_idx_stride(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements3_idx_base(buffer, value) \
+ do { \
+ uint64_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements3_idx_header(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements3_num_views(buffer, value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 7, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements3_immediates(buffer) \
+ do { \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE0_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE0_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements_base_instance0_code[23] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0x9000a0e0, /* ADD32 ptemp[0].32 = const[2].32 + temp[3].32 */
+ 0x9000a121, /* ADD32 ptemp[1].32 = const[2].32 + temp[4].32 */
+ 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */
+ 0x04101823, /* MAD temp[6].64 = (temp[2].32 * const[3].32) +
+ const[4].64 */
+ 0x53383008, /* SFTLP32 temp[8].32 = (temp[7].32 | const[6].32) << 0
+ */
+ 0x50343009, /* SFTLP32 temp[9].32 = temp[6].32 << 0 */
+ 0x04083843, /* MAD temp[6].64 = (temp[1].32 * const[7].32) +
+ const[8].64 */
+ 0x50343001, /* SFTLP32 temp[1].32 = temp[6].32 << 0 */
+ 0x5004000a, /* SFTLP32 temp[10].32 = temp[0].32 << 0 */
+ 0x9120428b, /* ADD32 temp[11].32 = temp[1].32 - const[10].32 */
+ 0x501c180c, /* SFTLP32 temp[12].32 = temp[3].32 << 0 */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd19c0000, /* LIMM temp[7].32 = 0000 */
+ 0x500c0806, /* SFTLP32 temp[6].32 = temp[1].32 << 0 */
+ 0xb18c0000, /* CMP P0 = (temp[6].64 = 0000) */
+ 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */
+ 0xd9ac0000, /* LIMM ? temp[11].32 = 0000 */
+ 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */
+ 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_elements_base_instance0_program = {
+ pvr_draw_indirect_elements_base_instance0_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 7, /* number of constant mappings */
+
+ 16, /* size of data segment, in dwords, aligned to 4 */
+ 24, /* size of code segment, in dwords, aligned to 4 */
+ 20, /* size of temp segment, in dwords, aligned to 4 */
+ 16, /* size of data segment, in dwords */
+ 23, /* size of code segment, in dwords */
+ 20, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_elements_base_instance0_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x60000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance0_write_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x2050000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 12, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance0_flush_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x3960000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 14, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance0_idx_stride(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance0_idx_base(buffer, \
+ value) \
+ do { \
+ uint64_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance0_idx_header(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance0_num_views(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 7, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance0_immediates(buffer) \
+ do { \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE1_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE1_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements_base_instance1_code[23] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0x9000a120, /* ADD32 ptemp[0].32 = const[2].32 + temp[4].32 */
+ 0x9000a161, /* ADD32 ptemp[1].32 = const[2].32 + temp[5].32 */
+ 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */
+ 0x04181823, /* MAD temp[6].64 = (temp[3].32 * const[3].32) +
+ const[4].64 */
+ 0x53383008, /* SFTLP32 temp[8].32 = (temp[7].32 | const[6].32) << 0
+ */
+ 0x50343009, /* SFTLP32 temp[9].32 = temp[6].32 << 0 */
+ 0x04103843, /* MAD temp[6].64 = (temp[2].32 * const[7].32) +
+ const[8].64 */
+ 0x50343002, /* SFTLP32 temp[2].32 = temp[6].32 << 0 */
+ 0x500c080a, /* SFTLP32 temp[10].32 = temp[1].32 << 0 */
+ 0x9120828b, /* ADD32 temp[11].32 = temp[2].32 - const[10].32 */
+ 0x5024200c, /* SFTLP32 temp[12].32 = temp[4].32 << 0 */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd19c0000, /* LIMM temp[7].32 = 0000 */
+ 0x50141006, /* SFTLP32 temp[6].32 = temp[2].32 << 0 */
+ 0xb18c0000, /* CMP P0 = (temp[6].64 = 0000) */
+ 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */
+ 0xd9ac0000, /* LIMM ? temp[11].32 = 0000 */
+ 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */
+ 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_elements_base_instance1_program = {
+ pvr_draw_indirect_elements_base_instance1_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 7, /* number of constant mappings */
+
+ 16, /* size of data segment, in dwords, aligned to 4 */
+ 24, /* size of code segment, in dwords, aligned to 4 */
+ 20, /* size of temp segment, in dwords, aligned to 4 */
+ 16, /* size of data segment, in dwords */
+ 23, /* size of code segment, in dwords */
+ 20, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_elements_base_instance1_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x60000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance1_write_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x2050000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 12, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance1_flush_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x3960000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 14, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance1_idx_stride(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance1_idx_base(buffer, \
+ value) \
+ do { \
+ uint64_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance1_idx_header(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance1_num_views(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 7, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance1_immediates(buffer) \
+ do { \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE2_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE2_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements_base_instance2_code[23] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0x9000a160, /* ADD32 ptemp[0].32 = const[2].32 + temp[5].32 */
+ 0x9000a1a1, /* ADD32 ptemp[1].32 = const[2].32 + temp[6].32 */
+ 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */
+ 0x04201820, /* MAD temp[0].64 = (temp[4].32 * const[3].32) +
+ const[4].64 */
+ 0x53083007, /* SFTLP32 temp[7].32 = (temp[1].32 | const[6].32) << 0
+ */
+ 0x50040008, /* SFTLP32 temp[8].32 = temp[0].32 << 0 */
+ 0x04183840, /* MAD temp[0].64 = (temp[3].32 * const[7].32) +
+ const[8].64 */
+ 0x50040003, /* SFTLP32 temp[3].32 = temp[0].32 << 0 */
+ 0x50141009, /* SFTLP32 temp[9].32 = temp[2].32 << 0 */
+ 0x9120c28a, /* ADD32 temp[10].32 = temp[3].32 - const[10].32 */
+ 0x502c280b, /* SFTLP32 temp[11].32 = temp[5].32 << 0 */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1840000, /* LIMM temp[1].32 = 0000 */
+ 0x501c1800, /* SFTLP32 temp[0].32 = temp[3].32 << 0 */
+ 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */
+ 0xd9a40000, /* LIMM ? temp[9].32 = 0000 */
+ 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */
+ 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */
+ 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_elements_base_instance2_program = {
+ pvr_draw_indirect_elements_base_instance2_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 7, /* number of constant mappings */
+
+ 16, /* size of data segment, in dwords, aligned to 4 */
+ 24, /* size of code segment, in dwords, aligned to 4 */
+ 20, /* size of temp segment, in dwords, aligned to 4 */
+ 16, /* size of data segment, in dwords */
+ 23, /* size of code segment, in dwords */
+ 18, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_elements_base_instance2_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x80000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance2_write_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x1c50000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 12, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance2_flush_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x3160000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 14, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance2_idx_stride(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance2_idx_base(buffer, \
+ value) \
+ do { \
+ uint64_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance2_idx_header(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance2_num_views(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 7, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance2_immediates(buffer) \
+ do { \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE3_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE3_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements_base_instance3_code[23] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0x9000a1a0, /* ADD32 ptemp[0].32 = const[2].32 + temp[6].32 */
+ 0x9000a1e1, /* ADD32 ptemp[1].32 = const[2].32 + temp[7].32 */
+ 0x9130f0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 - ptemp[3].32 */
+ 0x04281820, /* MAD temp[0].64 = (temp[5].32 * const[3].32) +
+ const[4].64 */
+ 0x53083008, /* SFTLP32 temp[8].32 = (temp[1].32 | const[6].32) << 0
+ */
+ 0x50040009, /* SFTLP32 temp[9].32 = temp[0].32 << 0 */
+ 0x04203840, /* MAD temp[0].64 = (temp[4].32 * const[7].32) +
+ const[8].64 */
+ 0x50040004, /* SFTLP32 temp[4].32 = temp[0].32 << 0 */
+ 0x501c180a, /* SFTLP32 temp[10].32 = temp[3].32 << 0 */
+ 0x9121028b, /* ADD32 temp[11].32 = temp[4].32 - const[10].32 */
+ 0x5034300c, /* SFTLP32 temp[12].32 = temp[6].32 << 0 */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1840000, /* LIMM temp[1].32 = 0000 */
+ 0x50242000, /* SFTLP32 temp[0].32 = temp[4].32 << 0 */
+ 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */
+ 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */
+ 0xd9ac0000, /* LIMM ? temp[11].32 = 0000 */
+ 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */
+ 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_elements_base_instance3_program = {
+ pvr_draw_indirect_elements_base_instance3_code, /* code segment */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 7, /* number of constant mappings */
+
+ 16, /* size of data segment, in dwords, aligned to 4 */
+ 24, /* size of code segment, in dwords, aligned to 4 */
+ 20, /* size of temp segment, in dwords, aligned to 4 */
+ 16, /* size of data segment, in dwords */
+ 23, /* size of code segment, in dwords */
+ 20, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_elements_base_instance3_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x80000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance3_write_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x2050000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 12, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance3_flush_vdm(buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x3960000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 14, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance3_idx_stride(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance3_idx_base(buffer, \
+ value) \
+ do { \
+ uint64_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance3_idx_header(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance3_num_views(buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 7, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance3_immediates(buffer) \
+ do { \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID0_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID0_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t
+ pvr_draw_indirect_elements_base_instance_drawid0_code[23] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0x9000a0e0, /* ADD32 ptemp[0].32 = const[2].32 + temp[3].32 */
+ 0x9000a121, /* ADD32 ptemp[1].32 = const[2].32 + temp[4].32 */
+ 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */
+ 0x04102033, /* MAD temp[6].64 = (temp[2].32 * const[4].32) +
+ const[6].64 */
+ 0x53382808, /* SFTLP32 temp[8].32 = (temp[7].32 | const[5].32) <<
+ * 0
+ */
+ 0x50343009, /* SFTLP32 temp[9].32 = temp[6].32 << 0 */
+ 0x04084053, /* MAD temp[6].64 = (temp[1].32 * const[8].32) +
+ const[10].64 */
+ 0x50343001, /* SFTLP32 temp[1].32 = temp[6].32 << 0 */
+ 0x5004000a, /* SFTLP32 temp[10].32 = temp[0].32 << 0 */
+ 0x912040cb, /* ADD32 temp[11].32 = temp[1].32 - const[3].32 */
+ 0x501c180c, /* SFTLP32 temp[12].32 = temp[3].32 << 0 */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd19c0000, /* LIMM temp[7].32 = 0000 */
+ 0x500c0806, /* SFTLP32 temp[6].32 = temp[1].32 << 0 */
+ 0xb18c0000, /* CMP P0 = (temp[6].64 = 0000) */
+ 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */
+ 0xd9ac0000, /* LIMM ? temp[11].32 = 0000 */
+ 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */
+ 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */
+ };
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_elements_base_instance_drawid0_program = {
+ pvr_draw_indirect_elements_base_instance_drawid0_code, /* code segment
+ */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 7, /* number of constant mappings */
+
+ 16, /* size of data segment, in dwords, aligned to 4 */
+ 24, /* size of code segment, in dwords, aligned to 4 */
+ 20, /* size of temp segment, in dwords, aligned to 4 */
+ 16, /* size of data segment, in dwords */
+ 23, /* size of code segment, in dwords */
+ 20, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x60000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_write_vdm( \
+ buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x2050000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 12, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_flush_vdm( \
+ buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x3960000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 14, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_idx_stride( \
+ buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_idx_base( \
+ buffer, \
+ value) \
+ do { \
+ uint64_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_idx_header( \
+ buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 5, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_num_views( \
+ buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_immediates( \
+ buffer) \
+ do { \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID1_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID1_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t
+ pvr_draw_indirect_elements_base_instance_drawid1_code[23] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0x9000a120, /* ADD32 ptemp[0].32 = const[2].32 + temp[4].32 */
+ 0x9000a161, /* ADD32 ptemp[1].32 = const[2].32 + temp[5].32 */
+ 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */
+ 0x04182033, /* MAD temp[6].64 = (temp[3].32 * const[4].32) +
+ const[6].64 */
+ 0x53382808, /* SFTLP32 temp[8].32 = (temp[7].32 | const[5].32) <<
+ * 0
+ */
+ 0x50343009, /* SFTLP32 temp[9].32 = temp[6].32 << 0 */
+ 0x04104053, /* MAD temp[6].64 = (temp[2].32 * const[8].32) +
+ const[10].64 */
+ 0x50343002, /* SFTLP32 temp[2].32 = temp[6].32 << 0 */
+ 0x500c080a, /* SFTLP32 temp[10].32 = temp[1].32 << 0 */
+ 0x912080cb, /* ADD32 temp[11].32 = temp[2].32 - const[3].32 */
+ 0x5024200c, /* SFTLP32 temp[12].32 = temp[4].32 << 0 */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd19c0000, /* LIMM temp[7].32 = 0000 */
+ 0x50141006, /* SFTLP32 temp[6].32 = temp[2].32 << 0 */
+ 0xb18c0000, /* CMP P0 = (temp[6].64 = 0000) */
+ 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */
+ 0xd9ac0000, /* LIMM ? temp[11].32 = 0000 */
+ 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */
+ 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */
+ };
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_elements_base_instance_drawid1_program = {
+ pvr_draw_indirect_elements_base_instance_drawid1_code, /* code segment
+ */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 7, /* number of constant mappings */
+
+ 16, /* size of data segment, in dwords, aligned to 4 */
+ 24, /* size of code segment, in dwords, aligned to 4 */
+ 20, /* size of temp segment, in dwords, aligned to 4 */
+ 16, /* size of data segment, in dwords */
+ 23, /* size of code segment, in dwords */
+ 20, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x60000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_write_vdm( \
+ buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x2050000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 12, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_flush_vdm( \
+ buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x3960000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 14, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_idx_stride( \
+ buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_idx_base( \
+ buffer, \
+ value) \
+ do { \
+ uint64_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_idx_header( \
+ buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 5, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_num_views( \
+ buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_immediates( \
+ buffer) \
+ do { \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID2_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID2_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t
+ pvr_draw_indirect_elements_base_instance_drawid2_code[23] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0x9000a160, /* ADD32 ptemp[0].32 = const[2].32 + temp[5].32 */
+ 0x9000a1a1, /* ADD32 ptemp[1].32 = const[2].32 + temp[6].32 */
+ 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */
+ 0x04202030, /* MAD temp[0].64 = (temp[4].32 * const[4].32) +
+ const[6].64 */
+ 0x53082807, /* SFTLP32 temp[7].32 = (temp[1].32 | const[5].32) <<
+ * 0
+ */
+ 0x50040008, /* SFTLP32 temp[8].32 = temp[0].32 << 0 */
+ 0x04184050, /* MAD temp[0].64 = (temp[3].32 * const[8].32) +
+ const[10].64 */
+ 0x50040003, /* SFTLP32 temp[3].32 = temp[0].32 << 0 */
+ 0x50141009, /* SFTLP32 temp[9].32 = temp[2].32 << 0 */
+ 0x9120c0ca, /* ADD32 temp[10].32 = temp[3].32 - const[3].32 */
+ 0x502c280b, /* SFTLP32 temp[11].32 = temp[5].32 << 0 */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1840000, /* LIMM temp[1].32 = 0000 */
+ 0x501c1800, /* SFTLP32 temp[0].32 = temp[3].32 << 0 */
+ 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */
+ 0xd9a40000, /* LIMM ? temp[9].32 = 0000 */
+ 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */
+ 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */
+ 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */
+ };
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_elements_base_instance_drawid2_program = {
+ pvr_draw_indirect_elements_base_instance_drawid2_code, /* code segment
+ */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 7, /* number of constant mappings */
+
+ 16, /* size of data segment, in dwords, aligned to 4 */
+ 24, /* size of code segment, in dwords, aligned to 4 */
+ 20, /* size of temp segment, in dwords, aligned to 4 */
+ 16, /* size of data segment, in dwords */
+ 23, /* size of code segment, in dwords */
+ 18, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x80000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_write_vdm( \
+ buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x1c50000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 12, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_flush_vdm( \
+ buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x3160000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 14, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_idx_stride( \
+ buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_idx_base( \
+ buffer, \
+ value) \
+ do { \
+ uint64_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_idx_header( \
+ buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 5, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_num_views( \
+ buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_immediates( \
+ buffer) \
+ do { \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID3_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID3_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t
+ pvr_draw_indirect_elements_base_instance_drawid3_code[23] = {
+ 0xd0000000, /* LD const[0].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0x9000a1a0, /* ADD32 ptemp[0].32 = const[2].32 + temp[6].32 */
+ 0x9000a1e1, /* ADD32 ptemp[1].32 = const[2].32 + temp[7].32 */
+ 0x9030c0e3, /* ADD32 ptemp[3].32 = ptemp[3].32 + const[3].32 */
+ 0x04282030, /* MAD temp[0].64 = (temp[5].32 * const[4].32) +
+ const[6].64 */
+ 0x53082808, /* SFTLP32 temp[8].32 = (temp[1].32 | const[5].32) <<
+ * 0
+ */
+ 0x50040009, /* SFTLP32 temp[9].32 = temp[0].32 << 0 */
+ 0x04204050, /* MAD temp[0].64 = (temp[4].32 * const[8].32) +
+ const[10].64 */
+ 0x50040004, /* SFTLP32 temp[4].32 = temp[0].32 << 0 */
+ 0x501c180a, /* SFTLP32 temp[10].32 = temp[3].32 << 0 */
+ 0x912100cb, /* ADD32 temp[11].32 = temp[4].32 - const[3].32 */
+ 0x5034300c, /* SFTLP32 temp[12].32 = temp[6].32 << 0 */
+ 0xc8000001, /* BRA if keep 1 ( setc = p0 ) */
+ 0xd1840000, /* LIMM temp[1].32 = 0000 */
+ 0x50242000, /* SFTLP32 temp[0].32 = temp[4].32 << 0 */
+ 0xb1800000, /* CMP P0 = (temp[0].64 = 0000) */
+ 0xd9a80000, /* LIMM ? temp[10].32 = 0000 */
+ 0xd9ac0000, /* LIMM ? temp[11].32 = 0000 */
+ 0xd0800006, /* ST const[12].64: mem(?) <= src(?) */
+ 0xd0000007, /* LD const[14].64: dst(?) <= mem(?) */
+ 0xd1000000, /* WDF */
+ 0xf4024003, /* DOUT doutv = temp[0].64, const[2].32; HALT */
+ };
+
+static const struct pvr_psc_program_output
+ pvr_draw_indirect_elements_base_instance_drawid3_program = {
+ pvr_draw_indirect_elements_base_instance_drawid3_code, /* code segment
+ */
+ 0, /* constant mappings, zeroed since we use the macros below */
+ 7, /* number of constant mappings */
+
+ 16, /* size of data segment, in dwords, aligned to 4 */
+ 24, /* size of code segment, in dwords, aligned to 4 */
+ 20, /* size of temp segment, in dwords, aligned to 4 */
+ 16, /* size of data segment, in dwords */
+ 23, /* size of code segment, in dwords */
+ 20, /* size of temp segment, in dwords */
+ NULL /* function pointer to write data segment */
+ };
+
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_di_data(buffer, \
+ addr, \
+ device) \
+ do { \
+ uint64_t data = ((addr) | (0x80000000000ULL) | \
+ ENABLE_SLC_MCU_CACHE_CONTROLS(device)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 0, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_write_vdm( \
+ buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x2050000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 12, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_flush_vdm( \
+ buffer, \
+ addr) \
+ do { \
+ uint64_t data = ((addr) | (0x3960000000000ULL)); \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 14, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_idx_stride( \
+ buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 4, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_idx_base( \
+ buffer, \
+ value) \
+ do { \
+ uint64_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 6, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_idx_header( \
+ buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 5, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_num_views( \
+ buffer, \
+ value) \
+ do { \
+ uint32_t data = value; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 8, &data, sizeof(data)); \
+ } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_immediates( \
+ buffer) \
+ do { \
+ { \
+ uint32_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 2, &data, sizeof(data)); \
+ } \
+ { \
+ uint32_t data = 0x1; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 3, &data, sizeof(data)); \
+ } \
+ { \
+ uint64_t data = 0x0; \
+ PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+ memcpy(buffer + 10, &data, sizeof(data)); \
+ } \
+ } while (0)
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_ROGUE_PDS_DEFS_H
+#define PVR_ROGUE_PDS_DEFS_H
+
+#include <stdint.h>
+
+/* Instruction type C */
+#define PVR_ROGUE_PDSINST_OPCODEC_MASK (0x0000000FU)
+/* 64 bit add*/
+#define PVR_ROGUE_PDSINST_OPCODEC_ADD64 UINT32_C(0x00000008)
+/* 32 bit add*/
+#define PVR_ROGUE_PDSINST_OPCODEC_ADD32 UINT32_C(0x00000009)
+/* Shift and/or Logic Operation (64 bit)*/
+#define PVR_ROGUE_PDSINST_OPCODEC_SFTLP64 UINT32_C(0x0000000a)
+/* Compare and set predicate*/
+#define PVR_ROGUE_PDSINST_OPCODEC_CMP UINT32_C(0x0000000b)
+/* Branch and/or select predicate*/
+#define PVR_ROGUE_PDSINST_OPCODEC_BRA UINT32_C(0x0000000c)
+/* Umbrella OpcodeSP instructions*/
+#define PVR_ROGUE_PDSINST_OPCODEC_SP UINT32_C(0x0000000d)
+/* Multiply Accumulate with DOUD*/
+#define PVR_ROGUE_PDSINST_OPCODEC_DDMAD UINT32_C(0x0000000e)
+/* DOUT Command*/
+#define PVR_ROGUE_PDSINST_OPCODEC_DOUT UINT32_C(0x0000000f)
+
+/* Logical Operation */
+#define PVR_ROGUE_PDSINST_LOP_MASK (0x00000007U)
+#define PVR_ROGUE_PDSINST_LOP_NONE (0x00000000U)
+#define PVR_ROGUE_PDSINST_LOP_NOT (0x00000001U)
+#define PVR_ROGUE_PDSINST_LOP_AND (0x00000002U)
+#define PVR_ROGUE_PDSINST_LOP_OR (0x00000003U)
+#define PVR_ROGUE_PDSINST_LOP_XOR (0x00000004U)
+#define PVR_ROGUE_PDSINST_LOP_XNOR (0x00000005U)
+#define PVR_ROGUE_PDSINST_LOP_NAND (0x00000006U)
+#define PVR_ROGUE_PDSINST_LOP_NOR (0x00000007U)
+
+/* 64-bit Source Temps and Persistent Temps. */
+#define PVR_ROGUE_PDSINST_REGS64TP_MASK (0x0000001FU)
+#define PVR_ROGUE_PDSINST_REGS64TP_TEMP64 (0U)
+#define PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER (0U)
+#define PVR_ROGUE_PDSINST_REGS64TP_TEMP64_UPPER (15U)
+#define PVR_ROGUE_PDSINST_REGS64TP_PTEMP64 (1U)
+#define PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER (16U)
+#define PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_UPPER (31U)
+
+/* 32-bit Registers - 32-bit aligned. */
+#define PVR_ROGUE_PDSINST_REGS32_MASK (0x000000FFU)
+#define PVR_ROGUE_PDSINST_REGS32_CONST32 (0U)
+#define PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER (0U)
+#define PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER (127U)
+#define PVR_ROGUE_PDSINST_REGS32_TEMP32 (1U)
+#define PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER (128U)
+#define PVR_ROGUE_PDSINST_REGS32_TEMP32_UPPER (159U)
+#define PVR_ROGUE_PDSINST_REGS32_PTEMP32 (2U)
+#define PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER (192U)
+#define PVR_ROGUE_PDSINST_REGS32_PTEMP32_UPPER (223U)
+
+/* cc ? if im then
+ * cc ? dst = (*src0 lop *src1) << src2
+ * cc ? else
+ * cc ? dst = (*src0 lop *src1) << *src2
+ *
+ * Take the logical operation of the 2 sources, and shift to a 64 bit result.
+ * For unary operator NOT, *src0 is taken as the logical operand; for operator
+ * NONE, an unmodified *src0 is shifted. If IM is set use SFT as a direct shift
+ * value, otherwise use an address to obtain the shift value. The shift value
+ * (SRC2) is treated as a 2's complement encoded signed value. A negative value
+ * encodes a right shift. Values are clamped to the range [-63,63].
+ */
+#define PVR_ROGUE_PDSINST_SFTLP64_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_SFTLP64_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_SFTLP64_OPCODE_DEFAULT (0xA0000000U) /* SFTLP64 */
+#define PVR_ROGUE_PDSINST_SFTLP64_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_SFTLP64_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_SFTLP64_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_SFTLP64_LOP_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_SFTLP64_IM_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_SFTLP64_IM_ENABLE (0x00800000U)
+#define PVR_ROGUE_PDSINST_SFTLP64_SRC0_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_SFTLP64_SRC1_SHIFT (13U)
+#define PVR_ROGUE_PDSINST_SFTLP64_SRC2_SHIFT (5U)
+#define PVR_ROGUE_PDSINST_SFTLP64_DST_SHIFT (0U)
+
+/* Instruction type B */
+#define PVR_ROGUE_PDSINST_OPCODEB_MASK (0x00000007U)
+/* Shift and/or Logic Operation (32 bit) */
+#define PVR_ROGUE_PDSINST_OPCODEB_SFTLP32 UINT32_C(0x00000002)
+/* Vertex Stream Out DMA Command */
+#define PVR_ROGUE_PDSINST_OPCODEB_STM UINT32_C(0x00000003)
+
+/* 32-bit Source Temps. */
+#define PVR_ROGUE_PDSINST_REGS32T_MASK (0x0000001FU)
+#define PVR_ROGUE_PDSINST_REGS32T_TEMP32 (0U)
+#define PVR_ROGUE_PDSINST_REGS32T_TEMP32_LOWER (0U)
+#define PVR_ROGUE_PDSINST_REGS32T_TEMP32_UPPER (31U)
+
+/* 32-bit Source Temps and Persistent Temps. */
+#define PVR_ROGUE_PDSINST_REGS32TP_MASK (0x0000003FU)
+#define PVR_ROGUE_PDSINST_REGS32TP_TEMP32 (0U)
+#define PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER (0U)
+#define PVR_ROGUE_PDSINST_REGS32TP_TEMP32_UPPER (31U)
+#define PVR_ROGUE_PDSINST_REGS32TP_PTEMP32 (1U)
+#define PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER (32U)
+#define PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_UPPER (63U)
+
+/* cc ? if im then
+ * cc ? dst = (*src0 lop *src1) << src2
+ * cc ? else
+ * cc ? dst = (*src0 lop *src1) << *src2
+ *
+ * Take the logical operation of the 2 sources, and shift to a 32 bit result.
+ * For unary operator NOT, *src0 is taken as the logical operand; for operator
+ * NONE, an unmodified *src0 is shifted.If IM is set, use the shift value SFT
+ * (SRC2) as a direct shift value, otherwise use an address to obtain the shift
+ * value. SFT (SRC2) is treated as a 2's complement encoded signed value. A
+ * negative value encodes a right shift. Values are clamped to the range
+ * [-31,31].
+ */
+#define PVR_ROGUE_PDSINST_SFTLP32_OPCODE_SHIFT (29U)
+#define PVR_ROGUE_PDSINST_SFTLP32_OPCODE_CLRMSK (0x1FFFFFFFU)
+#define PVR_ROGUE_PDSINST_SFTLP32_OPCODE_DEFAULT (0x40000000U) /* SFTLP32 */
+#define PVR_ROGUE_PDSINST_SFTLP32_IM_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_SFTLP32_IM_ENABLE (0x10000000U)
+#define PVR_ROGUE_PDSINST_SFTLP32_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_SFTLP32_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_SFTLP32_LOP_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_SFTLP32_SRC0_SHIFT (19U)
+#define PVR_ROGUE_PDSINST_SFTLP32_SRC1_SHIFT (11U)
+#define PVR_ROGUE_PDSINST_SFTLP32_SRC2_SHIFT (5U)
+#define PVR_ROGUE_PDSINST_SFTLP32_DST_SHIFT (0U)
+
+/* The stream being processed within the vertex, selects 1 of 4 streams. */
+#define PVR_ROGUE_PDSINST_SO_MASK (0x00000003U)
+
+/* An instruction to enable the 'Streaming Out' of data to memory.
+ *
+ * This instruction can only be used when called from a Stream Output Program
+ * (see
+ *
+ * Stream output configuration words, as it reads its source data from unified
+ * vertex store within the TA.
+ *
+ * Stream Out programs use the vertex data master, but are called from the TA.
+ * They do not execute on the USC. If synchronization is required with the
+ * control stream to the next draw call, a DOUTV command must be used when
+ * stream out finishes for the current draw call. The VDM must have a
+ * corresponding entry in the control stream indicating when it should wait for
+ * the PDS.
+ *
+ * As SRC0, SRC1 needs to be held from program to program it is assumed these
+ * are in persistent temps. There are 32 (dword) persistent temps, 8 of which
+ * are required to support 4 streams. The driver needs to manage the allocation
+ * of these. If the value needs to be carried from one geometry job to another,
+ * it will need to be loaded from memory at the start of the geometry job, and
+ * stored at the end of it (using a state program in the input control stream).
+ *
+ * When a new buffer is altered which was in use, the driver will need to fence
+ * in order to make sure that the preceding operation have completed before the
+ * persistent temps are updated.
+ *
+ * It is assumed that the USC compiler will optimize the stream order to keep
+ * data which is contiguous in the output vertex (going to memory)
+ * together. This will enable multiple words to be streamed out in a single
+ * DMA. This will reduce the processing load on the TA.
+ *
+ * The sources are read from within the constant, temporary stores of the PDS,
+ * and have the following meaning.
+ *
+ * If the buffer is being appended to then persistent constants need to be
+ * stored to memory at the end of the geometry job, and reloaded at the start
+ * of the next job (as another context may be run).
+ *
+ * ccs ? if (so_address + (so_vosize * so_primtype)) <= so_limit then
+ *
+ * dma the data from the vbg, and write it into memory. so_vioff is
+ * an offset into the current vertex.
+ * ccs ? for (so_vertex=0 ; so_vertex < so_primtype; so_vertex++)
+ * ccs ? for (i=0 ; i < so_dmasize; i++)
+ * ccs ? *(so_address + so_vooff + i + (so_vertex * so_vosize)) =
+ * readvertexvbg(so_vioff + i + (so_vertex * stream_size))
+ *
+ * ccs ? if so_eop then
+ * ccs ? so_address = so_address + (so_vosize * so_primtype)
+ * ccs ? so_primwritten = so_primwritten + 1
+ * ccs ?
+ * end if
+ *
+ * ccs ? else
+ *
+ * ccs ? setp(so_overflow_predicate[so])
+ * ccs ? [so_overflow_predicate[global]]
+ *
+ * ccs ? end if
+ *
+ * if so_eop then
+ * so_primneeded = so_primneeded + 1
+ * end if
+ *
+ * The VBG presents a stream when outputted from the shader. A bit is set in the
+ * input register indicating which stream is present. The PDS is called on a per
+ * primitive basis. In simple geometry this is per input triangle, strip etc.,
+ * in geometry shader land this is per output primitive from the geometry
+ * shader. Primitives are unraveled to remove vertex sharing. The PDS is called
+ * in submission order. The PDS program needs to be written for the primitive
+ * which is being emitted.
+ *
+ * Example
+ *
+ * Data is actually going into three buffers (this is defined elsewhere).
+ * SO_VERTEX0.Pos.XY -> buffer0
+ * SO_VERTEX0.Mult.XY -> buffer0
+ * SO_VERTEX1.Add.XY -> buffer1
+ *
+ * SO_VERTEX0.Pos.ZW -> buffer2
+ *
+ * Persistent temps:
+ * pt0 = Buffer0 start address;
+ * pt1 = Buffer1 start address;
+ * pt2 = Buffer2 start address;
+ * pt3 = 0 (buffer0 primwritten/needed)
+ * pt4 = 0 (buffer1 primwritten/needed)
+ * pt5 = 0 (buffer2 primwritten/needed)
+ *
+ * Constants:
+ * c0 = Buffer 0 top
+ * c1 = Buffer 1 top
+ * c2 = Buffer 2 top
+ * c3 = SRC2,3 for Pos.XY: VOOFF = 0, DMASIZE = 2, SO_VIOFF = 0, EOP = 0
+ * c4 = SRC2,3 for Mult: VOSIZE = 4, VOOFF = 2, DMASIZE = 2, SO_VIOFF = 2, EOP =
+ * 1 c5 = SRC2,3 for Pos.ZW: VOSIZE=2, VOOFF = 0, DMASIZE = 2, SO_VIOFF = 0, EOP
+ * = 1 c6 = SRC2,3 for Add: VOSIZE=2, VOOFF = 0, DMASIZE = 2, SO_VIOFF = 0, EOP
+ * = 1
+ *
+ * ifstream0 {
+ *
+ * # Write Pos.XY
+ * STM SO=0, SRC3=c0, SRC2=c3, SRC1=pt3, SRC0=pt0
+ * STM SO=0, SRC3=c0, SRC2=c4, SRC1=pt3, SRC0=pt0
+ * #Write Pos.ZW to buffer 1 and advance
+ * STM SO=0, SRC3=c2, SRC2=c5, SRC1=pt5, SRC0=pt2
+ *
+ * }
+ *
+ * else if stream1 {
+ *
+ * #Write Add to buffer 1 and advance
+ * STM S0=1, SRC3=c1, SRC2=c6, SRC1=pt4, SRC0=pt1
+ *
+ * }
+ */
+#define PVR_ROGUE_PDSINST_STM_OPCODE_SHIFT (29U)
+#define PVR_ROGUE_PDSINST_STM_CCS_CCS_GLOBAL_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_STM_CCS_CCS_SO_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_STM_CCS_CCS_CC_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_STM_SO_TST_SHIFT (25U)
+#define PVR_ROGUE_PDSINST_STM_SO_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_STM_SO_SRC0_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_STM_SO_SRC1_SHIFT (13U)
+#define PVR_ROGUE_PDSINST_STM_SO_SRC2_SHIFT (5U)
+#define PVR_ROGUE_PDSINST_STM_SO_SRC3_SHIFT (0U)
+
+/* Multiple Accumulate */
+#define PVR_ROGUE_PDSINST_OPCODEA_MAD UINT32_C(0x00000000)
+
+/* ALU Mode */
+
+/* ALU will perform unsigned math.*/
+#define PVR_ROGUE_PDSINST_ALUM_UNSIGNED (0x00000000U)
+
+/* 64-bit Registers - 64-bit aligned */
+#define PVR_ROGUE_PDSINST_REGS64_MASK (0x0000007FU)
+#define PVR_ROGUE_PDSINST_REGS64_CONST64 (0U)
+#define PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER (0U)
+#define PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER (63U)
+#define PVR_ROGUE_PDSINST_REGS64_TEMP64 (1U)
+#define PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER (64U)
+#define PVR_ROGUE_PDSINST_REGS64_TEMP64_UPPER (79U)
+#define PVR_ROGUE_PDSINST_REGS64_PTEMP64 (2U)
+#define PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER (96U)
+#define PVR_ROGUE_PDSINST_REGS64_PTEMP64_UPPER (111U)
+
+/* 64-bit Temps 0-15 Destination */
+#define PVR_ROGUE_PDSINST_REGS64T_MASK (0x0000000FU)
+#define PVR_ROGUE_PDSINST_REGS64T_TEMP64 (0U)
+#define PVR_ROGUE_PDSINST_REGS64T_TEMP64_LOWER (0U)
+#define PVR_ROGUE_PDSINST_REGS64T_TEMP64_UPPER (15U)
+
+/* cc ? dst = (src0 * src1) + (src2 * -1sna) + cin
+ *
+ * Multiply 2 source 32 bit numbers to generate a 64 bit result, then add or
+ * subtract a third source. Conditionally takes in a carry in. Always generates
+ * a carry out which is held in the status register.
+ */
+#define PVR_ROGUE_PDSINST_MAD_OPCODE_SHIFT (30U)
+#define PVR_ROGUE_PDSINST_MAD_SNA_SHIFT (29U)
+#define PVR_ROGUE_PDSINST_MAD_SNA_ADD (0x00000000U)
+#define PVR_ROGUE_PDSINST_MAD_SNA_SUB (0x20000000U)
+#define PVR_ROGUE_PDSINST_MAD_ALUM_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_MAD_ALUM_SIGNED (0x10000000U)
+#define PVR_ROGUE_PDSINST_MAD_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_MAD_CC_ENABLE (0x08000000U)
+/* 32-bit source to multiply - 32-bit range. */
+#define PVR_ROGUE_PDSINST_MAD_SRC0_SHIFT (19U)
+/* 32-bit source to multiply - 32-bit range */
+#define PVR_ROGUE_PDSINST_MAD_SRC1_SHIFT (11U)
+/* 64-bit source to add - 64-bit range */
+#define PVR_ROGUE_PDSINST_MAD_SRC2_SHIFT (4U)
+#define PVR_ROGUE_PDSINST_MAD_DST_SHIFT (0U)
+
+/* cc ? dst = src0 + (src1 * -1sna) + cin
+ *
+ * Add or subtract 2 64 bit numbers. Conditionally takes in a carry in. Always
+ * generates a carry out which is held in the status register.
+ */
+#define PVR_ROGUE_PDSINST_ADD64_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_ADD64_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_ADD64_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_ADD64_ALUM_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_ADD64_ALUM_SIGNED (0x04000000U)
+#define PVR_ROGUE_PDSINST_ADD64_SNA_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_ADD64_SNA_SUB (0x01000000U)
+
+/* 64-bit source to add. */
+#define PVR_ROGUE_PDSINST_ADD64_SRC0_SHIFT (12U)
+
+/* 64-bit source to add */
+#define PVR_ROGUE_PDSINST_ADD64_SRC1_SHIFT (5U)
+
+/* 64-bit temp or persistent temp */
+#define PVR_ROGUE_PDSINST_ADD64_DST_SHIFT (0U)
+/* cc ? dst = src0 + (src1 * -1sna) + cin
+ *
+ * Add or subtract 2 32 bit numbers. Conditionally takes in a carry in. Always
+ * generates a carry out which is held in the status register.
+ */
+#define PVR_ROGUE_PDSINST_ADD32_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_ADD32_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_ADD32_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_ADD32_ALUM_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_ADD32_ALUM_SIGNED (0x04000000U)
+#define PVR_ROGUE_PDSINST_ADD32_SNA_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_ADD32_SNA_SUB (0x01000000U)
+/* 32-bit source to add */
+#define PVR_ROGUE_PDSINST_ADD32_SRC0_SHIFT (14U)
+#define PVR_ROGUE_PDSINST_ADD32_SRC0_CLRMSK (0xFFC03FFFU)
+/* 32-bit source to add */
+#define PVR_ROGUE_PDSINST_ADD32_SRC1_SHIFT (6U)
+#define PVR_ROGUE_PDSINST_ADD32_SRC1_CLRMSK (0xFFFFC03FU)
+/* 32-bit temp or persistent temp */
+#define PVR_ROGUE_PDSINST_ADD32_DST_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_ADD32_DST_CLRMSK (0xFFFFFFC0U)
+
+/* Comparison Operation */
+#define PVR_ROGUE_PDSINST_COP_MASK (0x00000003U)
+
+/* = */
+#define PVR_ROGUE_PDSINST_COP_EQ (0x00000000U)
+
+/* > */
+#define PVR_ROGUE_PDSINST_COP_GT (0x00000001U)
+
+/* < */
+#define PVR_ROGUE_PDSINST_COP_LT (0x00000002U)
+
+/* != */
+#define PVR_ROGUE_PDSINST_COP_NE (0x00000003U)
+
+/* Compare Instruction with 2 sources (IM=0)
+ *
+ * im = 0;
+ * cc ? dst = src0 op src1
+ *
+ * Test source 0 against source 1. The result is written to the destination
+ * predicate (P0). All arguments are treated as unsigned.
+ */
+#define PVR_ROGUE_PDSINST_CMP_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_CMP_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_CMP_OPCODE_DEFAULT (0xB0000000U) /* CMP */
+#define PVR_ROGUE_PDSINST_CMP_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_CMP_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_CMP_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_CMP_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_CMP_COP_SHIFT (25U)
+#define PVR_ROGUE_PDSINST_CMP_COP_CLRMSK (0xF9FFFFFFU)
+#define PVR_ROGUE_PDSINST_CMP_COP_EQ (0x00000000U)
+#define PVR_ROGUE_PDSINST_CMP_COP_GT (0x02000000U)
+#define PVR_ROGUE_PDSINST_CMP_COP_LT (0x04000000U)
+#define PVR_ROGUE_PDSINST_CMP_COP_NE (0x06000000U)
+#define PVR_ROGUE_PDSINST_CMP_SETCP_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_CMP_SETCP_CLRMSK (0xFEFFFFFFU)
+#define PVR_ROGUE_PDSINST_CMP_SETCP_EN (0x01000000U)
+#define PVR_ROGUE_PDSINST_CMP_IM_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_CMP_IM_CLRMSK (0xFF7FFFFFU)
+#define PVR_ROGUE_PDSINST_CMP_IM_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_CMP_IM_ENABLE (0x00800000U)
+#define PVR_ROGUE_PDSINST_CMP_SRC0_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_CMP_SRC0_CLRMSK (0xFF83FFFFU)
+#define PVR_ROGUE_PDSINST_CMP_SRC1_SHIFT (2U)
+#define PVR_ROGUE_PDSINST_CMP_SRC1_CLRMSK (0xFFFFFE03U)
+
+/* 16-bit signed immediate. */
+#define PVR_ROGUE_PDSINST_IMM16_MASK (0x0000FFFFU)
+
+/* Compare Instruction with Immediate (IM=1)
+ *
+ * im = 1;
+ * cc ? dst = src0 op imm16
+ *
+ * Test source 0 against an immediate. The result is written to the destination
+ * predicate (P0). All arguments are treated as unsigned.
+ */
+#define PVR_ROGUE_PDSINST_CMPI_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_CMPI_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_CMPI_OPCODE_DEFAULT (0xB0000000U) /* CMP */
+#define PVR_ROGUE_PDSINST_CMPI_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_CMPI_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_CMPI_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_CMPI_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_CMPI_COP_SHIFT (25U)
+#define PVR_ROGUE_PDSINST_CMPI_COP_CLRMSK (0xF9FFFFFFU)
+#define PVR_ROGUE_PDSINST_CMPI_COP_EQ (0x00000000U)
+#define PVR_ROGUE_PDSINST_CMPI_COP_GT (0x02000000U)
+#define PVR_ROGUE_PDSINST_CMPI_COP_LT (0x04000000U)
+#define PVR_ROGUE_PDSINST_CMPI_COP_NE (0x06000000U)
+#define PVR_ROGUE_PDSINST_CMPI_SETCP_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_CMPI_SETCP_CLRMSK (0xFEFFFFFFU)
+#define PVR_ROGUE_PDSINST_CMPI_SETCP_EN (0x01000000U)
+#define PVR_ROGUE_PDSINST_CMPI_IM_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_CMPI_IM_CLRMSK (0xFF7FFFFFU)
+#define PVR_ROGUE_PDSINST_CMPI_IM_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_CMPI_IM_ENABLE (0x00800000U)
+#define PVR_ROGUE_PDSINST_CMPI_SRC0_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_CMPI_SRC0_CLRMSK (0xFF83FFFFU)
+#define PVR_ROGUE_PDSINST_CMPI_IM16_SHIFT (2U)
+#define PVR_ROGUE_PDSINST_CMPI_IM16_CLRMSK (0xFFFC0003U)
+
+/* Condition codes */
+#define PVR_ROGUE_PDSINST_PREDICATE_MASK (0x0000000FU)
+
+/* Use programmable predicate 0 */
+#define PVR_ROGUE_PDSINST_PREDICATE_P0 (0x00000000U)
+/* Input Predicate 0 - When DM Pixel Start/End Program End of Tile, When DM
+ * Pixel State Program indicates load Uniforms, When DM Vertex Last Vertex In
+ * Task, When DM Compute indicates shared or kernel task (compute thread barrier
+ * mode) or Last In Task (normal mode), When DM Tessellator TBD.
+ */
+#define PVR_ROGUE_PDSINST_PREDICATE_IF0 (0x00000001U)
+/* Input Predicate 1 - When DM Pixel Start/End Program End Render, When DM Pixel
+ * State Program indicates load Texture, When DM vertex First In Task, When DM
+ * Compute indicates synchronization task (compute thread barrier mode) or First
+ * In Task (normal mode), When DM Tessellator TBD.
+ */
+#define PVR_ROGUE_PDSINST_PREDICATE_IF1 (0x00000002U)
+/* Stream 0 Out has overflowed. Note this is per stream not per buffer. */
+#define PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_0 (0x00000003U)
+/* Stream 1 Out has overflowed. Note this is per stream not per buffer. */
+#define PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_1 (0x00000004U)
+/* Stream 2 Out has overflowed. Note this is per stream not per buffer. */
+#define PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_2 (0x00000005U)
+/* Stream 3 Out has overflowed. Note this is per stream not per buffer. */
+#define PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_3 (0x00000006U)
+/* A Stream Out has overflowed. Note this is per stream not per buffer. */
+#define PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_GLOBAL (0x00000007U)
+/* For SETC Don't set a new predicate, KEEP the existing one. For BRA
+ * instruction where this is the source predicate, KEEP the instruction, don't
+ * predicate it out.
+ */
+#define PVR_ROGUE_PDSINST_PREDICATE_KEEP (0x00000008U)
+/* DMA Out of Bounds predicate - set by DDMAT instruction when DMA is out of
+ * bounds.
+ */
+#define PVR_ROGUE_PDSINST_PREDICATE_OOB (0x00000009U)
+
+/* Negate condition. */
+
+/* Do not negate condition. */
+#define PVR_ROGUE_PDSINST_NEG_DISABLE (0x00000000U)
+/* Negate condition. */
+#define PVR_ROGUE_PDSINST_NEG_ENABLE (0x00000001U)
+
+/* Branch Address. */
+#define PVR_ROGUE_PDSINST_BRAADDR_MASK (0x0007FFFFU)
+
+/* Branch and Set Selected Predicate Instruction
+ *
+ * im = 1;
+ * cc xor neg ? pc = dst;
+ *
+ * Conditionally branch to an address (ADDR), depending upon the predicate. The
+ * meaning of the predicate can be negated using NEG. This instruction also
+ * allows the current predicate referenced by other instructions to be set by
+ * the SETC field. The current predicate is available by all instructions. This
+ * is a signed offset from the current PC. BRA ADDR=0 would be an infinite loop
+ * of the instruction.
+ */
+
+#define PVR_ROGUE_PDSINST_BRA_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_BRA_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_BRA_OPCODE_DEFAULT (0xC0000000U) /* BRA */
+#define PVR_ROGUE_PDSINST_BRA_SRCC_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_CLRMSK (0xF0FFFFFFU)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_P0 (0x00000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_IF0 (0x01000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_IF1 (0x02000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_SO_OVERFLOW_PREDICATE_0 (0x03000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_SO_OVERFLOW_PREDICATE_1 (0x04000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_SO_OVERFLOW_PREDICATE_2 (0x05000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_SO_OVERFLOW_PREDICATE_3 (0x06000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_SO_OVERFLOW_PREDICATE_GLOBAL (0x07000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_KEEP (0x08000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_OOB (0x09000000U)
+#define PVR_ROGUE_PDSINST_BRA_NEG_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_BRA_NEG_CLRMSK (0xFF7FFFFFU)
+#define PVR_ROGUE_PDSINST_BRA_NEG_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_BRA_NEG_ENABLE (0x00800000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_SHIFT (19U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_CLRMSK (0xFF87FFFFU)
+#define PVR_ROGUE_PDSINST_BRA_SETC_P0 (0x00000000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_IF0 (0x00080000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_IF1 (0x00100000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_SO_OVERFLOW_PREDICATE_0 (0x00180000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_SO_OVERFLOW_PREDICATE_1 (0x00200000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_SO_OVERFLOW_PREDICATE_2 (0x00280000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_SO_OVERFLOW_PREDICATE_3 (0x00300000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_SO_OVERFLOW_PREDICATE_GLOBAL (0x00380000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_KEEP (0x00400000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_OOB (0x00480000U)
+#define PVR_ROGUE_PDSINST_BRA_ADDR_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_BRA_ADDR_CLRMSK (0xFFF80000U)
+
+/* SLC_MODE_LD SLC Cache Policy for loads. */
+#define PVR_ROGUE_PDSINST_SLC_MODE_LD_MASK (0x00000003U)
+/* Bypass Policy */
+#define PVR_ROGUE_PDSINST_SLC_MODE_LD_BYPASS (0x00000000U)
+/* Standard Cached Read */
+#define PVR_ROGUE_PDSINST_SLC_MODE_LD_CACHED (0x00000001U)
+/* Cached Read no allocate */
+#define PVR_ROGUE_PDSINST_SLC_MODE_LD_CACHED_RD_NA (0x00000003U)
+
+/* CMODE_LD MCU (SLC) Cache Mode for Loads. */
+#define PVR_ROGUE_PDSINST_CMODE_LD_MASK (0x00000003U)
+
+/* Normal cache operation. */
+#define PVR_ROGUE_PDSINST_CMODE_LD_CACHED (0x00000000U)
+
+/* Bypass L0 and L1. */
+#define PVR_ROGUE_PDSINST_CMODE_LD_BYPASS (0x00000001U)
+
+/* Force line fill of L0 and L1. */
+#define PVR_ROGUE_PDSINST_CMODE_LD_FORCE_LINE_FILL (0x00000002U)
+
+/* ld: Number of 64 bit words to load. */
+#define PVR_ROGUE_PDSINST_LD_COUNT8_MASK (0x00000007U)
+
+/* Source Base Address for memory fetch in DWORDS - MUST BE 128 BIT ALIGNED. */
+#define PVR_ROGUE_PDSINST_LD_SRCADD_MASK (UINT64_C(0x0000003FFFFFFFFF))
+
+/* Load Instruction DMA : Src0 */
+
+/* SLC cache policy. */
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_SHIFT (62U)
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CLRMSK \
+ (UINT64_C(0x3FFFFFFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS \
+ (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED \
+ (UINT64_C(0x4000000000000000))
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED_RD_NA \
+ (UINT64_C(0xc000000000000000))
+
+/* The destination address in the temps (persistent or not) for the read data -
+ * MUST BE 128 BIT ALIGNED.
+ */
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT (47U)
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_CLRMSK (UINT64_C(0xFFF07FFFFFFFFFFF))
+
+/* Cache Mode */
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_SHIFT (44U)
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CLRMSK (UINT64_C(0xFFFFCFFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_BYPASS (UINT64_C(0x0000100000000000))
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_FORCE_LINE_FILL \
+ (UINT64_C(0x0000200000000000))
+
+/* ld: Number of 64 bit words to load. */
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT (41U)
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_CLRMSK \
+ (UINT64_C(0xFFFFF1FFFFFFFFFF))
+
+/* Source Base Address for memory fetch - MUST BE 128 BIT ALIGNED. */
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT (2U)
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_CLRMSK \
+ (UINT64_C(0xFFFFFF0000000003))
+
+/* Special Instructions Op-code. */
+#define PVR_ROGUE_PDSINST_OPCODESP_MASK (0x0000000FU)
+
+/* Data Load from memory. */
+#define PVR_ROGUE_PDSINST_OPCODESP_LD UINT32_C(0x00000000)
+
+/* Data Store to memory. */
+#define PVR_ROGUE_PDSINST_OPCODESP_ST UINT32_C(0x00000001)
+
+/* Wait read or write data operations to complete. */
+#define PVR_ROGUE_PDSINST_OPCODESP_WDF UINT32_C(0x00000002)
+
+/* Load 16 bit immediate. */
+#define PVR_ROGUE_PDSINST_OPCODESP_LIMM UINT32_C(0x00000003)
+
+/* Lock the execute so only this instance can execute for this data master. */
+#define PVR_ROGUE_PDSINST_OPCODESP_LOCK UINT32_C(0x00000004)
+
+/* Release the lock taken by lock. */
+#define PVR_ROGUE_PDSINST_OPCODESP_RELEASE UINT32_C(0x00000005)
+
+/* Halt execution (program termination). */
+#define PVR_ROGUE_PDSINST_OPCODESP_HALT UINT32_C(0x00000006)
+
+/* Clear stream out predicate. */
+#define PVR_ROGUE_PDSINST_OPCODESP_STMC UINT32_C(0x00000007)
+
+/* Parallel Stream Out. */
+#define PVR_ROGUE_PDSINST_OPCODESP_STMP UINT32_C(0x00000008)
+
+/* Integer Divide. */
+#define PVR_ROGUE_PDSINST_OPCODESP_IDIV UINT32_C(0x00000009)
+
+/* Atomic Access. */
+#define PVR_ROGUE_PDSINST_OPCODESP_AA UINT32_C(0x0000000a)
+
+/* Issue Data Fence. */
+#define PVR_ROGUE_PDSINST_OPCODESP_IDF UINT32_C(0x0000000b)
+
+/* Issue Data Fence. */
+#define PVR_ROGUE_PDSINST_OPCODESP_POL (0x0000000cU)
+
+/*No Operation. */
+#define PVR_ROGUE_PDSINST_OPCODESP_NOP (0x0000000fU)
+
+/* Data Load Instruction (Opcode SP)
+ *
+ * for (i=0; i < count;i++) {
+ * cc ? *(src0 + i) = mem(src1 + i)
+ * }
+ *
+ * Load count 32 bit words from memory to the temporaries reading from the
+ * address in memory pointed to by SRCADD. If the final destination address
+ * (DEST + COUNT - 1) exceeds the amount of temps available the entire load is
+ * discarded.
+ */
+#define PVR_ROGUE_PDSINST_LD_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_LD_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_LD_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_LD_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_LD_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_LD_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_LD_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_LD_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_LD_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_LD_OP_DEFAULT (0x00000000U) /* ld */
+#define PVR_ROGUE_PDSINST_LD_SRC0_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_LD_SRC0_CLRMSK (0xFFFFFF80U)
+
+/* CMODE_ST MCU (SLC) Cache Mode for stores. */
+#define PVR_ROGUE_PDSINST_CMODE_ST_MASK (0x00000003U)
+
+/* Write-through Policy */
+#define PVR_ROGUE_PDSINST_CMODE_ST_WRITE_THROUGH (0x00000000U)
+
+/* Write-back Policy. */
+#define PVR_ROGUE_PDSINST_CMODE_ST_WRITE_BACK (0x00000001U)
+
+/* Lazy write-back policy. */
+#define PVR_ROGUE_PDSINST_CMODE_ST_LAZY_WRITE_BACK (0x00000002U)
+
+/* ST: Number of 32 bit Words to store. */
+#define PVR_ROGUE_PDSINST_ST_COUNT4_MASK (0x0000000FU)
+
+/* Source Base Address for memory fetch in DWORDS. */
+#define PVR_ROGUE_PDSINST_ST_SRCADD_MASK (UINT64_C(0x0000003FFFFFFFFF))
+
+/* Store Instruction DMA : Src0 */
+
+/* SLC cache policy. */
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_SHIFT (62U)
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_CLRMSK \
+ (UINT64_C(0x3FFFFFFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH \
+ (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_BACK \
+ (UINT64_C(0x4000000000000000))
+
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT (46U)
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_CLRMSK (UINT64_C(0xFFF03FFFFFFFFFFF))
+
+/* Cache Mode. */
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_SHIFT (44U)
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_CLRMSK (UINT64_C(0xFFFFCFFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH \
+ (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_BACK \
+ (UINT64_C(0x0000100000000000))
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_LAZY_WRITE_BACK \
+ (UINT64_C(0x0000200000000000))
+
+/* ST: Number of 32 bit Words to store. */
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT (40U)
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_CLRMSK \
+ (UINT64_C(0xFFFFF0FFFFFFFFFF))
+
+/* Destination Base Address for memory write. */
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT (2U)
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_CLRMSK \
+ (UINT64_C(0xFFFFFF0000000003))
+
+/* Data Store Instruction (Opcode SP)
+ *
+ * for (i=0; i < count;i++) {
+ * cc ? mem(src1 + i) = *(src0 + i)
+ * }
+ *
+ * Store count 64 bit words from temporaries to memory (memory address starts at
+ * src1). If the instruction attempts to read data (in temps) outside of it's
+ * allocated region the entire store is discarded.
+ */
+#define PVR_ROGUE_PDSINST_ST_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_ST_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_ST_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_ST_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_ST_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_ST_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_ST_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_ST_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_ST_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_ST_OP_DEFAULT (0x00800000U) /* ST */
+#define PVR_ROGUE_PDSINST_ST_SRC0_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_ST_SRC0_CLRMSK (0xFFFFFF80U)
+
+/* Data Fence Instruction (Opcode SP)
+ *
+ * Cc ? wdf
+ *
+ * The data fence instruction gives the ability to track the return of dependent
+ * read data and to determine when data written from the core has made it to the
+ * MCU. This is required on reads as there is no implicit synchronization
+ * between read accesses to the primary attribute bank and data returned by
+ * dependent reads. For writes it is required where the program is enforcing
+ * synchronization with another program (which could be on the PDS or any other
+ * processor in the system). Note, this only guarantees order within the
+ * PDS. For order elsewhere reads need to be issued, and flush commands may have
+ * to be issued to the MCU
+ *
+ * The fence mechanism takes the form of a counter that is incremented whenever
+ * a read (ld) or write (ST) instruction is encountered by the instruction fetch
+ * decoder. When the read or write instruction returns, or writes all its data
+ * the counter is decremented. There is 1 counter per thread. Prior to accessing
+ * return data a WDF instruction must be issued, when this is seen by the
+ * instruction decoder it will check the current count value and will suspend
+ * execution if it is currently non zero, execution being resumed as soon as the
+ * counter reaches zero, and a slot is available.
+ *
+ * Example
+ * Do a dependent read for data
+ *
+ * ldr0,#2,r3 Issue read
+ * ... Try and do some other stuff
+ * wdf Make sure read data has come back
+ * add32 r2,r1,r0 And use the returned result
+ *
+ */
+#define PVR_ROGUE_PDSINST_WDF_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_WDF_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_WDF_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_WDF_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_WDF_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_WDF_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_WDF_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_WDF_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_WDF_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_WDF_OP_DEFAULT (0x01000000U) /* WDF */
+
+/* PDS Global Register access control */
+
+/* Disable global register access */
+#define PVR_ROGUE_PDSINST_GR_DISABLE (0x00000000U)
+
+/* Enable global register access, global register specified by IMM16.*/
+#define PVR_ROGUE_PDSINST_GR_ENABLE (0x00000001U)
+
+/* Load Immediate (Opcode SP)
+ *
+ * cc ? GR = DISABLE : *src1 = src0
+ * cc ? GR = ENABLE : *src1 = greg[IMM16]
+ *
+ * Load an immediate value (src0) into the temporary registers. If the GR flag
+ * is set, the PDS global register specified by IMM16 will be loaded instead.
+ * greg[0] = cluster number greg[1] = instance number
+ *
+ */
+#define PVR_ROGUE_PDSINST_LIMM_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_LIMM_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_LIMM_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_LIMM_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_LIMM_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_LIMM_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_LIMM_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_LIMM_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_LIMM_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_LIMM_OP_DEFAULT (0x01800000U) /* LIMM */
+#define PVR_ROGUE_PDSINST_LIMM_SRC1_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_LIMM_SRC1_CLRMSK (0xFF83FFFFU)
+#define PVR_ROGUE_PDSINST_LIMM_SRC0_SHIFT (2U)
+#define PVR_ROGUE_PDSINST_LIMM_SRC0_CLRMSK (0xFFFC0003U)
+#define PVR_ROGUE_PDSINST_LIMM_GR_SHIFT (1U)
+#define PVR_ROGUE_PDSINST_LIMM_GR_CLRMSK (0xFFFFFFFDU)
+#define PVR_ROGUE_PDSINST_LIMM_GR_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_LIMM_GR_ENABLE (0x00000002U)
+
+/* Lock Instruction (Opcode SP)
+ *
+ * cc ? lock
+ *
+ * The hardware contains an internal mutex per data master. When the lock
+ * instruction is issued, the thread will attempt to take control of the mutex
+ * (for the current data master). If it is already taken by another thread, then
+ * the thread is descheduled until it is available.
+ *
+ * The purpose of the lock (and release) instructions is to allow critical
+ * sections of code to execute serially to other code for the same data
+ * master. This is particularly useful when accessing the persistent (cross
+ * thread) temporaries. Note that there is no communication possible across data
+ * masters.
+ *
+ * It is illegal to place a DOUT instruction inside a LOCK, RELEASE section of
+ * code.
+ */
+#define PVR_ROGUE_PDSINST_LOCK_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_LOCK_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_LOCK_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_LOCK_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_LOCK_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_LOCK_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_LOCK_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_LOCK_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_LOCK_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_LOCK_OP_DEFAULT (0x02000000U) /* LOCK */
+
+/* Release Lock (Opcode SP)
+ *
+ * cc ? release
+ *
+ * The hardware contains an internal mutex per data master. If a thread has
+ * issued a lock instruction, then a release instruction must be issued to
+ * release the lock. See the corresponding lock instruction for more details
+ *
+ * It is illegal to place a DOUT instruction inside a LOCK, RELEASE section of
+ * code.
+ */
+#define PVR_ROGUE_PDSINST_RELEASE_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_RELEASE_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_RELEASE_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_RELEASE_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_RELEASE_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_RELEASE_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_RELEASE_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_RELEASE_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_RELEASE_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_RELEASE_OP_DEFAULT (0x02800000U) /* RELEASE */
+
+/* Special instruction - Halt
+ * Halt Execution (Opcode SP)
+ *
+ * cc ? halt
+ *
+ * The last instruction in a program must always be a halt instruction, or a
+ * DOUT/DDMAD instruction with the END flag set. This is required in order to
+ * indicate the end of the program.
+ */
+#define PVR_ROGUE_PDSINST_HALT_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_HALT_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_HALT_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_HALT_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_HALT_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_HALT_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_HALT_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_HALT_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_HALT_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_HALT_OP_DEFAULT (0x03000000U) /* HALT */
+
+/* Special instruction - Nop
+ * No Operation (Opcode SP)
+ *
+ * cc ? NOP
+ *
+ * This instruction does no operation, and introduces a wait cycle into the
+ * pipeline.
+ *
+ */
+#define PVR_ROGUE_PDSINST_NOP_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_NOP_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_NOP_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_NOP_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_NOP_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_NOP_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_NOP_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_NOP_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_NOP_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_NOP_OP_DEFAULT (0x07800000U) /* NOP */
+
+/* The SO bits to clear 0-3 streams 0-3, bit 4-global */
+#define PVR_ROGUE_PDSINST_SOMASK_MASK (0x0000001FU)
+
+/* Special instruction - Stream out predicate clear
+ * (Opcode SP)
+ *
+ * cc ? NOP
+ *
+ * This instruction clears the stream out predicates to 0, according to the
+ * clear bits.
+ *
+ */
+#define PVR_ROGUE_PDSINST_STMC_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_STMC_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_STMC_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_STMC_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_STMC_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_STMC_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_STMC_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_STMC_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_STMC_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_STMC_OP_DEFAULT (0x03800000U) /* STMC */
+#define PVR_ROGUE_PDSINST_STMC_SOMASK_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_STMC_SOMASK_CLRMSK (0xFFFFFFE0U)
+
+/* A 1 TB address, with byte granularity. Address must be dword aligned when
+ * repeat is 0.
+ */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_ADDRESS_MASK \
+ (UINT64_C(0x000000FFFFFFFFFF))
+
+/* SLC cache policy */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_SLCMODE_SHIFT (62U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_SLCMODE_CLRMSK \
+ (UINT64_C(0x3FFFFFFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_SLCMODE_BYPASS \
+ (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_SLCMODE_CACHED \
+ (UINT64_C(0x4000000000000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_SLCMODE_CACHED_RD_NA \
+ (UINT64_C(0xc000000000000000))
+
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_ADDRESS_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_ADDRESS_CLRMSK \
+ (UINT64_C(0xFFFFFF0000000000))
+
+/* Size of external memory buffer in bytes (0 is 0 bytes) */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_MSIZE_MASK (0x7FFFFFFFU)
+
+/* When repeat is enabled the size of the DMA in bytes */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_WORDSIZE_MASK (0x00000003U)
+/* DMA of 1 byte */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_WORDSIZE_ONE (0x00000000U)
+/* DMA of 2 byte */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_WORDSIZE_TWO (0x00000001U)
+/* DMA of 3 byte */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_WORDSIZE_THREE (0x00000002U)
+/* DMA of 4 byte */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_WORDSIZE_FOUR (0x00000003U)
+
+/* DMA to unified store */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_DEST_UNIFIED_STORE (0x00000000U)
+
+/* DMA to common store */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_DEST_COMMON_STORE (0x00000001U)
+
+/* Primary instance data offset in 32 bit words (offset into the current
+ * instance).
+ */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_AO_MASK (0x00001FFFU)
+
+/* Only applies to unified store DMAs, must be clear for common store.
+ *
+ * DMA is issued natively, in its entirety.
+ */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_REPEAT_NOREPEAT (0x00000000U)
+/* BSIZE is the number of times the DMA is repeated. Word size is the size of
+ * the DMA. The DMA is expanded into BSIZE DMAs.
+ */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_REPEAT_REPEAT (0x00000001U)
+
+/* Size of fetch in dwords (0 is 0 dwords). */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_MASK (0x00000FFFU)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_RANGE (0U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_LOWER (0U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_UPPER (255U)
+
+/* Size of external buffer in bytes. */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT (33U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK \
+ (UINT64_C(0x00000001FFFFFFFF))
+
+/* Perform OOB checking. */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_SHIFT (32U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_CLRMSK \
+ (UINT64_C(0xFFFFFFFEFFFFFFFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_EN \
+ (UINT64_C(0x0000000100000000))
+
+/* Last DMA in program. */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_SHIFT (31U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_CLRMSK \
+ (UINT64_C(0xFFFFFFFF7FFFFFFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN \
+ (UINT64_C(0x0000000080000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_SHIFT (29U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_CLRMSK \
+ (UINT64_C(0xFFFFFFFF9FFFFFFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_ONE \
+ (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_TWO \
+ (UINT64_C(0x0000000020000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_THREE \
+ (UINT64_C(0x0000000040000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_FOUR \
+ (UINT64_C(0x0000000060000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_CLRMSK \
+ (UINT64_C(0xFFFFFFFFEFFFFFFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE \
+ (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_COMMON_STORE \
+ (UINT64_C(0x0000000010000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CLRMSK \
+ (UINT64_C(0xFFFFFFFFF3FFFFFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED \
+ (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_BYPASS \
+ (UINT64_C(0x0000000004000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_FORCE_LINE_FILL \
+ (UINT64_C(0x0000000008000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT (13U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_CLRMSK \
+ (UINT64_C(0xFFFFFFFFFC001FFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_SHIFT (12U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_CLRMSK \
+ (UINT64_C(0xFFFFFFFFFFFFEFFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_NOREPEAT \
+ (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_REPEAT \
+ (UINT64_C(0x0000000000001000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_CLRMSK \
+ (UINT64_C(0xFFFFFFFFFFFFF000))
+
+/* Stop execution flag
+ *
+ * Continue execution after this instruction.
+ */
+#define PVR_ROGUE_PDSINST_END_DISABLE (0x00000000U)
+
+/* Halt execution after this instruction. */
+#define PVR_ROGUE_PDSINST_END_ENABLE (0x00000001U)
+
+/* 64-bit Consts 0-63 Destination. */
+#define PVR_ROGUE_PDSINST_REGS64C_MASK (0x0000003FU)
+#define PVR_ROGUE_PDSINST_REGS64C_CONST64 (0U)
+#define PVR_ROGUE_PDSINST_REGS64C_CONST64_LOWER (0U)
+#define PVR_ROGUE_PDSINST_REGS64C_CONST64_UPPER (63U)
+
+/* Multiply-add then send to DOUTD (Opcode SP). Optionally perform out-of-bounds
+ * checking (DDMAD(T)). Multiply-add then send to DOUTD (Opcode SP).
+ *
+ * cc ? if ( test == 1 ) then
+ * cc ? if ( ((src0 * src1) + src2)[39:0] + (src3[11:0]<<2) <= src2[39:0] +
+ * src3[63:33] ) then cc ? OOB = 0 cc ? doutd = (src0 * src1) + src2, src3 cc
+ * ? else cc ? OOB = 1 cc ? endif cc ? else cc ? doutd = (src0 * src1) +
+ * src2 src3 cc ? endif
+ *
+ * cc ? doutd = (src0 * src1) + src2, src3
+ *
+ * This instruction performs a 32 bit multiply, followed by a 64 bit add. This
+ * result is combined with a 4th source and used to create the data for an DOUTD
+ * emit. A DOUTD is a command to a DMA engine, which reads data from memory and
+ * writes it into the USC Unified or Common Store.
+ *
+ * Additionally the DDMAD performs an out-of-bounds check on the DMA when the
+ * test flag is set . If a buffer overflow is predicated, the DMA is skipped and
+ * the OOB (DMA out of bounds) predicate is set.
+ */
+#define PVR_ROGUE_PDSINST_DDMAD_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_DDMAD_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_DDMAD_OPCODE_DEFAULT (0xE0000000U) /* DDMAD */
+#define PVR_ROGUE_PDSINST_DDMAD_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_DDMAD_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_DDMAD_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_DDMAD_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_DDMAD_END_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_DDMAD_END_CLRMSK (0xFBFFFFFFU)
+#define PVR_ROGUE_PDSINST_DDMAD_END_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_DDMAD_END_ENABLE (0x04000000U)
+
+/* 32-bit source to multiply - 32-bit range. */
+#define PVR_ROGUE_PDSINST_DDMAD_SRC0_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_DDMAD_SRC0_CLRMSK (0xFC03FFFFU)
+
+/* 32-bit source to multiply - 32-bit range. */
+#define PVR_ROGUE_PDSINST_DDMAD_SRC1_SHIFT (13U)
+#define PVR_ROGUE_PDSINST_DDMAD_SRC1_CLRMSK (0xFFFC1FFFU)
+
+/* 64-bit source to add - 64-bit range */
+#define PVR_ROGUE_PDSINST_DDMAD_SRC2_SHIFT (6U)
+#define PVR_ROGUE_PDSINST_DDMAD_SRC2_CLRMSK (0xFFFFE03FU)
+
+/* 64-bit constant register destination */
+#define PVR_ROGUE_PDSINST_DDMAD_SRC3_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DDMAD_SRC3_CLRMSK (0xFFFFFFC0U)
+
+/* When DOUTU_SAMPLE_RATE is INSTANCE or SELECTIVE - 32 bit temps per instance
+ * at 4 word granularity. When DOUTU_SAMPLE_RATE is FULL - 32 bit temps per
+ * sample at 4 word granularity.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK (0x0000003FU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_ALIGNSHIFT (2U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_ALIGNSIZE (4U)
+
+/* Sample rate */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SAMPLE_RATE_MASK (0x00000003U)
+
+/* Instance rate */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SAMPLE_RATE_INSTANCE (0x00000000U)
+
+/* Selective sample rate */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SAMPLE_RATE_SELECTIVE (0x00000001U)
+
+/* Full sample rate */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SAMPLE_RATE_FULL (0x00000002U)
+
+/* Code base address (4 byte alignment). */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_EXE_OFF_MASK (0x3FFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_EXE_OFF_ALIGNSHIFT (2U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_EXE_OFF_ALIGNSIZE (4U)
+
+/* Use Interface doutu : Src0 */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_SHIFT (41U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_CLRMSK \
+ (UINT64_C(0xFFFFFDFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_EN \
+ (UINT64_C(0x0000020000000000))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_SHIFT (35U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_CLRMSK \
+ (UINT64_C(0xFFFFFE07FFFFFFFF))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSHIFT (2U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSIZE (4U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SHIFT (33U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_CLRMSK \
+ (UINT64_C(0xFFFFFFF9FFFFFFFF))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_INSTANCE \
+ (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SELECTIVE \
+ (UINT64_C(0x0000000200000000))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_FULL \
+ (UINT64_C(0x0000000400000000))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_SHIFT (2U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_CLRMSK \
+ (UINT64_C(0xFFFFFFFF00000003))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_ALIGNSHIFT (2U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_ALIGNSIZE (4U)
+
+/* Use Interface doutu : Src1. */
+
+/* Secondary instance data offset in 32 bit words (offset of the instance). */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_DOFFSET_MASK (0x00001FFFU)
+
+/* Source Base Address for memory fetch. Address must be dword aligned when
+ * repeat is 0.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SBASE_MASK \
+ (UINT64_C(0x000000FFFFFFFFFF))
+
+/* DMA Interface DOutD : Src0 */
+
+/* SLC cache policy */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_SHIFT (62U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CLRMSK \
+ (UINT64_C(0x3FFFFFFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_BYPASS \
+ (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED \
+ (UINT64_C(0x4000000000000000))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED_RD_NA \
+ (UINT64_C(0xc000000000000000))
+
+/* Secondary instance data offset in 32 bit words (offset of the instance). */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_DOFFSET_SHIFT (40U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_DOFFSET_CLRMSK \
+ (UINT64_C(0xFFE000FFFFFFFFFF))
+
+/* Source Base Address for memory fetch. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SBASE_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SBASE_CLRMSK \
+ (UINT64_C(0xFFFFFF0000000000))
+
+/* When repeat is enabled the size of the DMA in bytes. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_WORDSIZE_MASK (0x00000003U)
+
+/* DMA of 1 byte */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_WORDSIZE_ONE (0x00000000U)
+
+/* DMA of 2 byte */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_WORDSIZE_TWO (0x00000001U)
+
+/* DMA of 3 byte */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_WORDSIZE_THREE (0x00000002U)
+
+/* DMA of 4 byte */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_WORDSIZE_FOUR (0x00000003U)
+
+/* Unified Store */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_DEST_UNIFIED_STORE (0x00000000U)
+
+/* Common Store */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_DEST_COMMON_STORE (0x00000001U)
+
+/* Primary instance data offset in 32 bit words (offset into the current
+ * instance).
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_AO_MASK (0x00001FFFU)
+
+/* Only applies to unified store DMAs, ignore for common store. */
+
+/* DMA is issued natively, in its entirety. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_REPEAT_NOREPEAT (0x00000000U)
+
+/* BSIZE is the number of times the DMA is repeated. Word size is the size of
+ * the DMA. The DMA is expanded into BSIZE DMAs.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_REPEAT_REPEAT (0x00000001U)
+
+/* Size of fetch in dwords (0 means don't DMA, 1=1 etc.) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_BSIZE_MASK (0x00000FFFU)
+
+/* DMA Interface DOutD : Src1 */
+
+/* Last Write or DMA in program (This needs to only be set once on with the last
+ * DMA or last direct write, which ever is last).
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_SHIFT (31U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_CLRMSK (0x7FFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN (0x80000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_SHIFT (29U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_CLRMSK (0x9FFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_ONE (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_TWO (0x20000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_THREE (0x40000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_FOUR (0x60000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_CLRMSK (0xEFFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_UNIFIED_STORE \
+ (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE (0x10000000U)
+
+/* CMODE Cache Mode */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CLRMSK (0xF3FFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_BYPASS (0x04000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_FORCE_LINE_FILL \
+ (0x08000000U)
+
+/* Primary instance data offset in 32 bit words (offset into the current
+ * instance).
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT (13U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_CLRMSK (0xFC001FFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_REPEAT_SHIFT (12U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_REPEAT_CLRMSK (0xFFFFEFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_REPEAT_NOREPEAT (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_REPEAT_REPEAT (0x00001000U)
+
+/* Size of fetch in dwords (0 means don't DMA, 1=1 etc.) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_CLRMSK (0xFFFFF000U)
+
+/* Lower 64-bit (63:0) data to be written. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SBASE0_MASK \
+ (UINT64_C(0xFFFFFFFFFFFFFFFF))
+
+/* Direct Write Interface doutw : Src0. */
+
+/* Lower 64-bit (63:0) data to be written */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC0_DATA_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC0_DATA_CLRMSK \
+ (UINT64_C(0x0000000000000000))
+
+/* Unified Store */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_DEST_UNIFIED_STORE (0x00000000U)
+
+/* Common Store */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_DEST_COMMON_STORE (0x00000001U)
+
+/* Primary instance data offset in 128 bit words (offset into the current
+ * instance).
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_AO_MASK (0x00001FFFU)
+
+/* DMA Interface DOutD : Src1. */
+
+/* Last Write or DMA in program (This needs to only be set once on with the last
+ * DMA or last direct write, which ever is last).
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_SHIFT (31U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_CLRMSK (0x7FFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN (0x80000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_CLRMSK (0xEFFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE \
+ (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE (0x10000000U)
+
+/* CMODE Cache Mode */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CLRMSK (0xF3FFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_BYPASS (0x04000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_FORCE_LINE_FILL \
+ (0x08000000U)
+
+/* Primary instance data offset in 32 bit words (offset into the current
+ * instance). For 64 bit writes the address needs to be 64 bit aligned.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT (13U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_CLRMSK (0xFC001FFFU)
+
+/* 2-bit dword write mask. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_CLRMSK (0xFFFFFFFCU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER (0x00000001U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64 (0x00000002U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_NONE (0x00000003U)
+
+/* VDM Writeback Interface Doutv : Src0 */
+
+/* Number of Indices to use in Draw Indirect (0 = 0) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTV_SBASE_MASK (0xFFFFFFFFU)
+
+/* VDM Writeback Interface Doutv : Src1 */
+
+/* Number of Indices to use in Draw Indirect (0 = 0) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTV_SRC1_SBASE_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTV_SRC1_SBASE_CLRMSK (0x00000000U)
+
+/* Shade Model Control */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SHADEMODEL_MASK (0x00000003U)
+
+/* Vertex 0 is the flat shaded color source. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SHADEMODEL_FLAT_VERTEX0 \
+ (0x00000000U)
+
+/* Vertex 1 is the flat shaded color source. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SHADEMODEL_FLAT_VERTEX1 \
+ (0x00000001U)
+
+/* Vertex 2 is the flat shaded color source. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SHADEMODEL_FLAT_VERTEX2 \
+ (0x00000002U)
+
+/* Gouraud shaded. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SHADEMODEL_GOURAUD (0x00000003U)
+
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SIZE_MASK (0x00000003U)
+
+/* 1 Dimension (U) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SIZE_1D (0x00000000U)
+
+/* 2 Dimension (UV) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SIZE_2D (0x00000001U)
+
+/* 3 Dimension (UVS) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SIZE_3D (0x00000002U)
+
+/* 4 Dimension (UVST) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SIZE_4D (0x00000003U)
+
+/* This issue is perspective correct. */
+
+/* No W */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_PERSPECTIVE_DISABLE (0x00000000U)
+
+/* Use W */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_PERSPECTIVE_ENABLE (0x00000001U)
+
+/* The offset within the vertex if all data is treated as F32 (even if submitted
+ * as F16).
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_F32_OFFSET_MASK (0x000000FFU)
+
+/* The offset within vertex taking into account the F16s and F32s present. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_F16_OFFSET_MASK (0x000000FFU)
+
+/* TSP Parameter Fetch Interface DOutI, This command is only legal in a
+ * coefficient loading program.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_MASK (0x1FFFFFFFU)
+
+/* Apply depth bias to this layer. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_DEPTHBIAS_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_DEPTHBIAS_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_DEPTHBIAS_EN (0x08000000U)
+
+/* Ignore the F16 and F32 offsets, and the WMODE and send the primitive id
+ * instead.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PRIMITIVEID_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PRIMITIVEID_CLRMSK (0xFBFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PRIMITIVEID_EN (0x04000000U)
+
+/* Shade Model for Layer. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_CLRMSK (0xFCFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_FLAT_VERTEX0 \
+ (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_FLAT_VERTEX1 \
+ (0x01000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_FLAT_VERTEX2 \
+ (0x02000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_GOURAUD (0x03000000U)
+
+/* Point sprite Forced. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_POINTSPRITE_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_POINTSPRITE_CLRMSK (0xFF7FFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_POINTSPRITE_EN (0x00800000U)
+
+/* Wrap S Coordinate. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPS_SHIFT (22U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPS_CLRMSK (0xFFBFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPS_EN (0x00400000U)
+
+/* Wrap V Coordinate. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPV_SHIFT (21U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPV_CLRMSK (0xFFDFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPV_EN (0x00200000U)
+
+/* Wrap U Coordinate. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPU_SHIFT (20U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPU_CLRMSK (0xFFEFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPU_EN (0x00100000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_CLRMSK (0xFFF3FFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_1D (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_2D (0x00040000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_3D (0x00080000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_4D (0x000C0000U)
+
+/* Issue is for F16 precision values. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F16_SHIFT (17U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F16_CLRMSK (0xFFFDFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F16_EN (0x00020000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PERSPECTIVE_SHIFT (16U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PERSPECTIVE_CLRMSK (0xFFFEFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PERSPECTIVE_DISABLE \
+ (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PERSPECTIVE_ENABLE (0x00010000U)
+/* The offset within the vertex if all data is treated as F32 (even if submitted
+ * as F16).
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F32_OFFSET_SHIFT (8U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F32_OFFSET_CLRMSK (0xFFFF00FFU)
+
+/* The offset within vertex taking into account the F16s and F32s present. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F16_OFFSET_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F16_OFFSET_CLRMSK (0xFFFFFF00U)
+
+/* The starting address to write the data into the common store allocation, in
+ * 128 bit words. Each 32 bit value consumes 128 bit words in the common store.
+ * The issues are pack, Issue 0, followed by Issue 1.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_DEST_MASK (0x000000FFU)
+
+/* TSP Parameter Fetch Interface DOutI : Src0 */
+
+/* This is the last issue for the triangle. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_SHIFT (63U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_CLRMSK \
+ (UINT64_C(0x7FFFFFFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_EN \
+ (UINT64_C(0x8000000000000000))
+
+/* The starting address to write the data into the common store allocation, in
+ * 128 bit words. Each 32 bit value consumes 128 bit words in the common store.
+ * The issues are pack, Issue 0, followed by Issue 1.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_SHIFT (54U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_CLRMSK \
+ (UINT64_C(0xC03FFFFFFFFFFFFF))
+
+/* Issue 0 */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_CLRMSK \
+ (UINT64_C(0xFFFFFFFFE0000000))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_DEPTHBIAS_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_DEPTHBIAS_CLRMSK \
+ (UINT64_C(0xfffffffff7ffffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_PRIMITIVEID_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_PRIMITIVEID_CLRMSK \
+ (UINT64_C(0xfffffffffbffffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHADEMODEL_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHADEMODEL_CLRMSK \
+ (UINT64_C(0xfffffffffcffffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_POINTSPRITE_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_POINTSPRITE_CLRMSK \
+ (UINT64_C(0xffffffffff7fffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPS_SHIFT (22U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPS_CLRMSK \
+ (UINT64_C(0xffffffffffbfffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPV_SHIFT (21U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPV_CLRMSK \
+ (UINT64_C(0xffffffffffdfffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPU_SHIFT (20U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPU_CLRMSK \
+ (UINT64_C(0xffffffffffefffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SIZE_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SIZE_CLRMSK \
+ (UINT64_C(0xfffffffffff3ffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F16_SHIFT (17U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F16_CLRMSK \
+ (UINT64_C(0xfffffffffffdffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_PERSPECTIVE_SHIFT (16U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_PERSPECTIVE_CLRMSK \
+ (UINT64_C(0xfffffffffffeffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F32_OFFSET_SHIFT (8U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F32_OFFSET_CLRMSK \
+ (UINT64_C(0xffffffffffff00ff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F16_OFFSET_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F16_OFFSET_CLRMSK \
+ (UINT64_C(0xffffffffffffff00))
+
+/* TSP Parameter Fetch Interface DOutI : Src1 */
+
+/* 32-bit Temp or DOUT. */
+#define PVR_ROGUE_PDSINST_DSTDOUT_MASK (0x00000007U)
+
+/* DMA data from memory to the USC. */
+#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTD (0x00000000U)
+
+/* Write a value directly to the USC. */
+#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTW (0x00000001U)
+
+/* Start a USC program. */
+#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTU (0x00000002U)
+
+/* Issue a fence back to the VDM (with value). */
+#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTV (0x00000003U)
+
+/* Issue a command to the TSP Parameter Fetch and FPU to calculate and load
+ * coefficients to USC.
+ */
+#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTI (0x00000004U)
+
+/* Issue a fence back to the CDM. Used if compute is enabled. */
+#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTC (0x00000005U)
+
+/* Issue DOUT to external devices (Opcode SP)
+ *
+ * cc ? dst = src0, src1
+ *
+ * PDS programs have to send data somewhere. This is primary function of the
+ * PDS. All programs must therefore execute some one of DOUT, DDMAD, STM
+ * commands. There are the following program types
+ *
+ * Vertex Shader, Geometry Shader, Hull Shader Programs These programs load data
+ * into memory. These will use the DOUTD or DDMAD commands. Ideally the DDMAD
+ * command is used as the most typical operation Src Address = Index * Stride +
+ * Base, and then DMA from this address. They also schedule the execution of the
+ * USSE and will issue a DOUTU command. This would normally be the last
+ * instruction in the program.
+ *
+ * Obviously the shader programs must not overflow their allocated memory.
+ * However, the USC will do cache look-aheads and so could attempt to fetch
+ * shader code from beyond the end of the program. This could cause a page fault
+ * if the last program instructions are very close to the end of the last valid
+ * memory page.
+ *
+ * To avoid this happening always ensure that the start address of the last
+ * instruction of a shader program does not occur in the last 26 bytes of a
+ * page.
+ *
+ * State/Uniform Loading Programs
+ * These programs load data into memory. These will use the typically use the
+ * DOUTD command
+ *
+ * Coefficient Loading Programs
+ * These programs run once per triangle. They load the A,B,C Coefficient for the
+ * iteration of the varyings into the USC. These programs issue DOUTI
+ * commands. These programs must not do any other sort of DOUT command
+ * (DOUTW/DOUTD/DOUTU).
+ *
+ * Pixel Shader Programs
+ * These programs once per group of pixels, schedule the execution of a pixel
+ * shader on the USC for a group of pixels. This program issues a DOUTU (and
+ * that is all).
+ */
+
+#define PVR_ROGUE_PDSINST_DOUT_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_DOUT_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_OPCODE_DEFAULT (0xF0000000U) /* DOUT */
+#define PVR_ROGUE_PDSINST_DOUT_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_DOUT_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_DOUT_END_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_DOUT_END_CLRMSK (0xFBFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_END_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_END_ENABLE (0x04000000U)
+
+/* 32-bit source */
+#define PVR_ROGUE_PDSINST_DOUT_SRC1_SHIFT (16U)
+#define PVR_ROGUE_PDSINST_DOUT_SRC1_CLRMSK (0xFF00FFFFU)
+
+/* 64-bit source */
+#define PVR_ROGUE_PDSINST_DOUT_SRC0_SHIFT (8U)
+#define PVR_ROGUE_PDSINST_DOUT_SRC0_CLRMSK (0xFFFF80FFU)
+
+/* DOUT Destination */
+#define PVR_ROGUE_PDSINST_DOUT_DST_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_DST_CLRMSK (0xFFFFFFF8U)
+#define PVR_ROGUE_PDSINST_DOUT_DST_DOUTD (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_DST_DOUTW (0x00000001U)
+#define PVR_ROGUE_PDSINST_DOUT_DST_DOUTU (0x00000002U)
+#define PVR_ROGUE_PDSINST_DOUT_DST_DOUTV (0x00000003U)
+#define PVR_ROGUE_PDSINST_DOUT_DST_DOUTI (0x00000004U)
+#if defined(ROGUE_FEATURE_COMPUTE)
+# define PVR_ROGUE_PDSINST_DOUT_DST_DOUTC (0x00000005U)
+#endif /* ROGUE_FEATURE_COMPUTE */
+
+/* Shift */
+
+#endif /* PVR_ROGUE_PDS_DEFS_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_ROGUE_PDS_DISASM_H
+#define PVR_ROGUE_PDS_DISASM_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "util/log.h"
+
+/* Type of operand for an instruction. */
+#define PVR_PDS_OPERAND_TYPES \
+ X(TEMP32, temp, 32) \
+ X(PTEMP32, ptemp, 32) \
+ X(CONST32, const, 32) \
+ X(TEMP64, temp, 64) \
+ X(PTEMP64, ptemp, 64) \
+ X(CONST64, const, 64) \
+ X(UNRESOLVED, UNRESOLVED, 0) \
+ X(LITERAL_NUM, literal, 0)
+
+#define X(enum, str, size) enum,
+enum pvr_operand_type { PVR_PDS_OPERAND_TYPES };
+#undef X
+
+#if defined(DUMP_PDS)
+# define PVR_PDS_PRINT_INST(X) pvr_pds_print_instruction(X)
+# define PVR_PDS_PRINT_DATA(X, Y, Z) \
+ mesa_logd("\t%s : DATA = 0x%lX ADDRESS = 0x%X\n", X, (uint64_t)(Y), Z)
+#else
+# define PVR_PDS_PRINT_INST(X)
+# define PVR_PDS_PRINT_DATA(X, Y, Z)
+#endif
+
+#define PVR_INSTRUCTION_STMP
+#define PVR_INSTRUCTION_IDIV
+#define PVR_INSTRUCTION_AA
+#define PVR_INSTRUCTION_POL
+#define PVR_INSTRUCTION_IDF
+
+#define PVR_INSTRUCTIONS \
+ X(STM) \
+ PVR_INSTRUCTION_STMP \
+ PVR_INSTRUCTION_IDIV \
+ PVR_INSTRUCTION_AA \
+ PVR_INSTRUCTION_IDF \
+ PVR_INSTRUCTION_POL \
+ X(STMC) \
+ X(LD) \
+ X(ST) \
+ X(ADD32) \
+ X(ADD64) \
+ X(MAD) \
+ X(DDMAD) \
+ X(DOUT) \
+ X(CMP) \
+ X(BRA) \
+ X(LIMM) \
+ X(SFTLP32) \
+ X(SFTLP64) \
+ X(WDF) \
+ X(LOCK) \
+ X(RELEASE) \
+ X(HALT) \
+ X(NOP)
+
+#define X(a) INS_##a,
+enum pvr_instruction_type { PVR_INSTRUCTIONS };
+#undef X
+
+struct pvr_predicate {
+ uint32_t predicate;
+ bool negate;
+};
+
+struct pvr_instruction;
+
+/* Operands are either sources or dst of an instruction. */
+struct pvr_operand {
+ enum pvr_operand_type type;
+
+ struct pvr_instruction *instruction;
+ uint64_t literal; /* Literal value if type == LITERAL_NUM */
+ int address; /* Address in word-sizes. */
+ unsigned absolute_address; /* Address in segment, */
+ unsigned index; /* Index within instruction, 0 = dst, 1 = src0 .. */
+ bool negate; /* True if the literal is negative. */
+};
+
+#define PVR_PDS_LOP \
+ X(LOP_NONE, none) \
+ X(LOP_NOT, ~) \
+ X(LOP_AND, &) \
+ X(LOP_OR, |) \
+ X(LOP_XOR, xor) \
+ X(LOP_XNOR, xnor) \
+ X(LOP_NAND, nand) \
+ X(LOP_NOR, nor)
+
+#define X(lop, str) lop,
+enum pvr_pds_lop { PVR_PDS_LOP };
+#undef X
+
+#define PVR_PDS_DOUT_DSTS \
+ X(DOUT_D, doutd) \
+ X(DOUT_W, doutw) \
+ X(DOUT_U, doutu) \
+ X(DOUT_V, doutv) \
+ X(DOUT_I, douti) \
+ X(DOUT_C, doutc) \
+ X(DOUT_R, doutr) \
+ X(DOUT_INVALID0, invalid)
+
+#define X(dout_dst, str) dout_dst,
+enum pvr_dout_type { PVR_PDS_DOUT_DSTS };
+#undef X
+
+#define PVR_PDS_MAX_INST_STR_LEN 256
+
+enum pvr_cop { COP_EQ, COP_GT, COP_LT, COP_NE };
+
+struct pvr_instruction {
+ enum pvr_instruction_type type;
+ struct pvr_instruction *next;
+};
+
+struct pvr_add {
+ struct pvr_instruction instruction;
+ struct pvr_operand *dst;
+ struct pvr_operand *src1;
+ struct pvr_operand *src0;
+ bool cc;
+ bool sna;
+ bool alum;
+};
+
+struct pvr_simple {
+ struct pvr_instruction instruction;
+ bool cc;
+};
+
+struct pvr_ldst {
+ struct pvr_instruction instruction;
+ bool cc;
+ struct pvr_operand *src0;
+ bool st;
+};
+
+struct pvr_mad {
+ struct pvr_instruction instruction;
+ struct pvr_operand *dst;
+ struct pvr_operand *src0;
+ struct pvr_operand *src1;
+ struct pvr_operand *src2;
+ bool cc;
+ bool sna;
+ bool alum;
+};
+
+struct pvr_stm {
+ struct pvr_instruction instruction;
+ struct pvr_operand *src0;
+ struct pvr_operand *src1;
+ struct pvr_operand *src2;
+ struct pvr_operand *src3;
+ unsigned stream_out;
+ bool tst;
+ bool cc;
+ bool ccs_global;
+ bool ccs_so;
+};
+
+struct pvr_stmc {
+ struct pvr_instruction instruction;
+ struct pvr_operand *src0;
+ bool cc;
+};
+
+struct pvr_bra {
+ struct pvr_instruction instruction;
+ struct pvr_predicate *srcc;
+ struct pvr_predicate *setc; /* negate ignored */
+ char *target;
+ signed address; /* signed relative address */
+};
+
+struct pvr_dout {
+ struct pvr_instruction instruction;
+ struct pvr_operand *src0;
+ struct pvr_operand *src1;
+ enum pvr_dout_type dst;
+ bool cc;
+ bool END;
+};
+
+struct pvr_ddmad {
+ struct pvr_instruction instruction;
+ struct pvr_operand *src0;
+ struct pvr_operand *src1;
+ struct pvr_operand *src2;
+ struct pvr_operand *src3;
+ bool cc;
+ bool END;
+};
+
+struct pvr_sftlp {
+ struct pvr_instruction instruction;
+ enum pvr_pds_lop lop;
+ struct pvr_operand *dst;
+ struct pvr_operand *src0;
+ struct pvr_operand *src1;
+ struct pvr_operand *src2;
+ bool cc;
+ bool IM;
+};
+
+struct pvr_limm {
+ struct pvr_instruction instruction;
+ bool cc;
+ bool GR;
+ struct pvr_operand *dst;
+ struct pvr_operand *src0;
+};
+
+struct pvr_cmp {
+ struct pvr_instruction instruction;
+ enum pvr_cop cop;
+ bool IM;
+ bool cc;
+ struct pvr_operand *src0;
+ struct pvr_operand *src1;
+};
+
+#define PVR_PDS_ERR_PARAM_RANGE 0 /* Error when register is out of range. */
+#define PVR_PDS_ERR_SP_UNKNOWN \
+ 1 /* Error when opcode for sp instruction is unknown. */
+
+struct pvr_dissassembler_error {
+ uint32_t type; /* One of PDS_ERR_* */
+ enum pvr_instruction_type instruction; /* The type of instruction where
+ the error occurred. */
+ char *text; /* A string representation of the error. */
+ uint32_t parameter; /* The parameter of the instruction, 0 = dst,
+ 1 = src0.. */
+ uint32_t raw; /* The raw value that caused the error. */
+
+ void *context; /* The passed in context. */
+};
+
+/* Callback when an error happens. */
+typedef void (*PVR_ERR_CALLBACK)(struct pvr_dissassembler_error);
+
+void pvr_pds_free_instruction(struct pvr_instruction *inst);
+struct pvr_instruction *
+pvr_pds_disassemble_instruction2(void *context,
+ PVR_ERR_CALLBACK error_call_back,
+ uint32_t instruction);
+void pvr_pds_disassemble_instruction(char *buffer,
+ size_t instr_len,
+ struct pvr_instruction *instruction);
+
+#if defined(DUMP_PDS)
+void pvr_pds_print_instruction(uint32_t instr);
+#endif
+
+#endif /* PVR_ROGUE_PDS_DISASM_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_ROGUE_PDS_ENCODE_H
+#define PVR_ROGUE_PDS_ENCODE_H
+
+#include <stdint.h>
+
+#include "pvr_rogue_pds_defs.h"
+#include "pvr_rogue_pds_disasm.h"
+#include "util/macros.h"
+
+static ALWAYS_INLINE uint32_t
+pvr_pds_inst_decode_field_range_regs64tp(uint32_t value)
+{
+ if (value <= PVR_ROGUE_PDSINST_REGS64TP_TEMP64_UPPER)
+ return PVR_ROGUE_PDSINST_REGS64TP_TEMP64;
+
+ if ((value >= PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER) &&
+ (value <= PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_UPPER)) {
+ return PVR_ROGUE_PDSINST_REGS64TP_PTEMP64;
+ }
+ return 2;
+}
+
+static ALWAYS_INLINE uint32_t
+pvr_pds_inst_decode_field_range_regs32(uint32_t value)
+{
+ if (value <= PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER)
+ return PVR_ROGUE_PDSINST_REGS32_CONST32;
+
+ if ((value >= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER) &&
+ (value <= PVR_ROGUE_PDSINST_REGS32_TEMP32_UPPER)) {
+ return PVR_ROGUE_PDSINST_REGS32_TEMP32;
+ }
+ if ((value >= PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER) &&
+ (value <= PVR_ROGUE_PDSINST_REGS32_PTEMP32_UPPER)) {
+ return PVR_ROGUE_PDSINST_REGS32_PTEMP32;
+ }
+ return 3;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_stflp64(uint32_t cc,
+ uint32_t lop,
+ uint32_t im,
+ uint32_t src0,
+ uint32_t src1,
+ uint32_t src2,
+ uint32_t dst)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_SFTLP64
+ << PVR_ROGUE_PDSINST_SFTLP64_OPCODE_SHIFT;
+ encoded |= ((dst & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+ << PVR_ROGUE_PDSINST_SFTLP64_DST_SHIFT);
+ encoded |= ((src2 & PVR_ROGUE_PDSINST_REGS32_MASK)
+ << PVR_ROGUE_PDSINST_SFTLP64_SRC2_SHIFT);
+ encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+ << PVR_ROGUE_PDSINST_SFTLP64_SRC1_SHIFT);
+ encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+ << PVR_ROGUE_PDSINST_SFTLP64_SRC0_SHIFT);
+ encoded |= ((im & 1U) << PVR_ROGUE_PDSINST_SFTLP64_IM_SHIFT);
+ encoded |= ((lop & PVR_ROGUE_PDSINST_LOP_MASK)
+ << PVR_ROGUE_PDSINST_SFTLP64_LOP_SHIFT);
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_SFTLP64_CC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t
+pvr_pds_inst_decode_field_range_regs32t(uint32_t value)
+{
+ if (value <= PVR_ROGUE_PDSINST_REGS32T_TEMP32_UPPER)
+ return PVR_ROGUE_PDSINST_REGS32T_TEMP32;
+
+ return 1;
+}
+
+static ALWAYS_INLINE uint32_t
+pvr_pds_inst_decode_field_range_regs32tp(uint32_t value)
+{
+ if (value <= PVR_ROGUE_PDSINST_REGS32TP_TEMP32_UPPER)
+ return PVR_ROGUE_PDSINST_REGS32TP_TEMP32;
+
+ if ((value >= PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER) &&
+ (value <= PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_UPPER)) {
+ return PVR_ROGUE_PDSINST_REGS32TP_PTEMP32;
+ }
+ return 2;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_stflp32(uint32_t im,
+ uint32_t cc,
+ uint32_t lop,
+ uint32_t src0,
+ uint32_t src1,
+ uint32_t src2,
+ uint32_t dst)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEB_SFTLP32
+ << PVR_ROGUE_PDSINST_SFTLP32_OPCODE_SHIFT;
+ encoded |= ((dst & PVR_ROGUE_PDSINST_REGS32T_MASK)
+ << PVR_ROGUE_PDSINST_SFTLP32_DST_SHIFT);
+ encoded |= ((src2 & PVR_ROGUE_PDSINST_REGS32TP_MASK)
+ << PVR_ROGUE_PDSINST_SFTLP32_SRC2_SHIFT);
+ encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32_MASK)
+ << PVR_ROGUE_PDSINST_SFTLP32_SRC1_SHIFT);
+ encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS32T_MASK)
+ << PVR_ROGUE_PDSINST_SFTLP32_SRC0_SHIFT);
+ encoded |= ((lop & PVR_ROGUE_PDSINST_LOP_MASK)
+ << PVR_ROGUE_PDSINST_SFTLP32_LOP_SHIFT);
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_SFTLP32_CC_SHIFT);
+ encoded |= ((im & 1U) << PVR_ROGUE_PDSINST_SFTLP32_IM_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_stm(uint32_t CCS_CCS_GLOBAL,
+ uint32_t CCS_CCS_SO,
+ uint32_t CCS_CCS_CC,
+ uint32_t SO_TST,
+ uint32_t SO,
+ uint32_t SO_SRC0,
+ uint32_t SO_SRC1,
+ uint32_t SO_SRC2,
+ uint32_t SO_SRC3)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEB_STM
+ << PVR_ROGUE_PDSINST_STM_OPCODE_SHIFT;
+ encoded |= ((SO_SRC3 & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+ << PVR_ROGUE_PDSINST_STM_SO_SRC3_SHIFT);
+ encoded |= ((SO_SRC2 & PVR_ROGUE_PDSINST_REGS32_MASK)
+ << PVR_ROGUE_PDSINST_STM_SO_SRC2_SHIFT);
+ encoded |= ((SO_SRC1 & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+ << PVR_ROGUE_PDSINST_STM_SO_SRC1_SHIFT);
+ encoded |= ((SO_SRC0 & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+ << PVR_ROGUE_PDSINST_STM_SO_SRC0_SHIFT);
+ encoded |=
+ ((SO & PVR_ROGUE_PDSINST_SO_MASK) << PVR_ROGUE_PDSINST_STM_SO_SHIFT);
+ encoded |= ((SO_TST & 1U) << PVR_ROGUE_PDSINST_STM_SO_TST_SHIFT);
+ encoded |= ((CCS_CCS_CC & 1U) << PVR_ROGUE_PDSINST_STM_CCS_CCS_CC_SHIFT);
+ encoded |= ((CCS_CCS_SO & 1U) << PVR_ROGUE_PDSINST_STM_CCS_CCS_SO_SHIFT);
+ encoded |=
+ ((CCS_CCS_GLOBAL & 1U) << PVR_ROGUE_PDSINST_STM_CCS_CCS_GLOBAL_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t
+pvr_pds_inst_decode_field_range_regs64(uint32_t value)
+{
+ if (value <= PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER)
+ return PVR_ROGUE_PDSINST_REGS64_CONST64;
+
+ if ((value >= PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER) &&
+ (value <= PVR_ROGUE_PDSINST_REGS64_TEMP64_UPPER)) {
+ return PVR_ROGUE_PDSINST_REGS64_TEMP64;
+ }
+ if ((value >= PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER) &&
+ (value <= PVR_ROGUE_PDSINST_REGS64_PTEMP64_UPPER)) {
+ return PVR_ROGUE_PDSINST_REGS64_PTEMP64;
+ }
+ return 3;
+}
+
+static ALWAYS_INLINE uint32_t pvr_rogue_inst_encode_mad(uint32_t sna,
+ uint32_t alum,
+ uint32_t cc,
+ uint32_t src0,
+ uint32_t src1,
+ uint32_t src2,
+ uint32_t dst)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEA_MAD
+ << PVR_ROGUE_PDSINST_MAD_OPCODE_SHIFT;
+ encoded |= ((dst & PVR_ROGUE_PDSINST_REGS64T_MASK)
+ << PVR_ROGUE_PDSINST_MAD_DST_SHIFT);
+ encoded |= ((src2 & PVR_ROGUE_PDSINST_REGS64_MASK)
+ << PVR_ROGUE_PDSINST_MAD_SRC2_SHIFT);
+ encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32_MASK)
+ << PVR_ROGUE_PDSINST_MAD_SRC1_SHIFT);
+ encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS32_MASK)
+ << PVR_ROGUE_PDSINST_MAD_SRC0_SHIFT);
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_MAD_CC_SHIFT);
+ encoded |= ((alum & 1U) << PVR_ROGUE_PDSINST_MAD_ALUM_SHIFT);
+ encoded |= ((sna & 1U) << PVR_ROGUE_PDSINST_MAD_SNA_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_add64(uint32_t cc,
+ uint32_t alum,
+ uint32_t sna,
+ uint32_t src0,
+ uint32_t src1,
+ uint32_t dst)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_ADD64
+ << PVR_ROGUE_PDSINST_ADD64_OPCODE_SHIFT;
+ encoded |= ((dst & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+ << PVR_ROGUE_PDSINST_ADD64_DST_SHIFT);
+ encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS64_MASK)
+ << PVR_ROGUE_PDSINST_ADD64_SRC1_SHIFT);
+ encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64_MASK)
+ << PVR_ROGUE_PDSINST_ADD64_SRC0_SHIFT);
+ encoded |= ((sna & 1U) << PVR_ROGUE_PDSINST_ADD64_SNA_SHIFT);
+ encoded |= ((alum & 1U) << PVR_ROGUE_PDSINST_ADD64_ALUM_SHIFT);
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_ADD64_CC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_add32(uint32_t cc,
+ uint32_t alum,
+ uint32_t sna,
+ uint32_t src0,
+ uint32_t src1,
+ uint32_t dst)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_ADD32
+ << PVR_ROGUE_PDSINST_ADD32_OPCODE_SHIFT;
+ encoded |= ((dst & PVR_ROGUE_PDSINST_REGS32TP_MASK)
+ << PVR_ROGUE_PDSINST_ADD32_DST_SHIFT);
+ encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32_MASK)
+ << PVR_ROGUE_PDSINST_ADD32_SRC1_SHIFT);
+ encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS32_MASK)
+ << PVR_ROGUE_PDSINST_ADD32_SRC0_SHIFT);
+ encoded |= ((sna & 1U) << PVR_ROGUE_PDSINST_ADD32_SNA_SHIFT);
+ encoded |= ((alum & 1U) << PVR_ROGUE_PDSINST_ADD32_ALUM_SHIFT);
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_ADD32_CC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_cmp(uint32_t cc,
+ uint32_t cop,
+ uint32_t src0,
+ uint32_t src1)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_CMP
+ << PVR_ROGUE_PDSINST_CMP_OPCODE_SHIFT;
+ encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS64_MASK)
+ << PVR_ROGUE_PDSINST_CMP_SRC1_SHIFT);
+ encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+ << PVR_ROGUE_PDSINST_CMP_SRC0_SHIFT);
+ encoded |= UINT32_C(0x0) << PVR_ROGUE_PDSINST_CMP_IM_SHIFT;
+ encoded |= UINT32_C(0x1) << PVR_ROGUE_PDSINST_CMP_SETCP_SHIFT;
+ encoded |=
+ ((cop & PVR_ROGUE_PDSINST_COP_MASK) << PVR_ROGUE_PDSINST_CMP_COP_SHIFT);
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_CMP_CC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_cmpi(uint32_t cc,
+ uint32_t cop,
+ uint32_t src0,
+ uint32_t im16)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_CMP
+ << PVR_ROGUE_PDSINST_CMPI_OPCODE_SHIFT;
+ encoded |= ((im16 & PVR_ROGUE_PDSINST_IMM16_MASK)
+ << PVR_ROGUE_PDSINST_CMPI_IM16_SHIFT);
+ encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+ << PVR_ROGUE_PDSINST_CMPI_SRC0_SHIFT);
+ encoded |= UINT32_C(0x1) << PVR_ROGUE_PDSINST_CMPI_IM_SHIFT;
+ encoded |= UINT32_C(0x1) << PVR_ROGUE_PDSINST_CMPI_SETCP_SHIFT;
+ encoded |=
+ ((cop & PVR_ROGUE_PDSINST_COP_MASK) << PVR_ROGUE_PDSINST_CMPI_COP_SHIFT);
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_CMPI_CC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_bra(uint32_t srcc,
+ uint32_t neg,
+ uint32_t setc,
+ uint32_t addr)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_BRA
+ << PVR_ROGUE_PDSINST_BRA_OPCODE_SHIFT;
+ encoded |= ((addr & PVR_ROGUE_PDSINST_BRAADDR_MASK)
+ << PVR_ROGUE_PDSINST_BRA_ADDR_SHIFT);
+ encoded |= ((setc & PVR_ROGUE_PDSINST_PREDICATE_MASK)
+ << PVR_ROGUE_PDSINST_BRA_SETC_SHIFT);
+ encoded |= ((neg & 1U) << PVR_ROGUE_PDSINST_BRA_NEG_SHIFT);
+ encoded |= ((srcc & PVR_ROGUE_PDSINST_PREDICATE_MASK)
+ << PVR_ROGUE_PDSINST_BRA_SRCC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_ld(uint32_t cc, uint32_t src0)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP << PVR_ROGUE_PDSINST_LD_OPCODE_SHIFT;
+ encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64_MASK)
+ << PVR_ROGUE_PDSINST_LD_SRC0_SHIFT);
+ encoded |= PVR_ROGUE_PDSINST_OPCODESP_LD << PVR_ROGUE_PDSINST_LD_OP_SHIFT;
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_LD_CC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_st(uint32_t cc, uint32_t src0)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP << PVR_ROGUE_PDSINST_ST_OPCODE_SHIFT;
+ encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64_MASK)
+ << PVR_ROGUE_PDSINST_ST_SRC0_SHIFT);
+ encoded |= PVR_ROGUE_PDSINST_OPCODESP_ST << PVR_ROGUE_PDSINST_ST_OP_SHIFT;
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_ST_CC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_wdf(uint32_t cc)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP
+ << PVR_ROGUE_PDSINST_WDF_OPCODE_SHIFT;
+ encoded |= PVR_ROGUE_PDSINST_OPCODESP_WDF << PVR_ROGUE_PDSINST_WDF_OP_SHIFT;
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_WDF_CC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_limm(uint32_t cc,
+ uint32_t src1,
+ uint32_t src0,
+ uint32_t gr)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP
+ << PVR_ROGUE_PDSINST_LIMM_OPCODE_SHIFT;
+ encoded |= ((gr & 1U) << PVR_ROGUE_PDSINST_LIMM_GR_SHIFT);
+ encoded |= ((src0 & PVR_ROGUE_PDSINST_IMM16_MASK)
+ << PVR_ROGUE_PDSINST_LIMM_SRC0_SHIFT);
+ encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32T_MASK)
+ << PVR_ROGUE_PDSINST_LIMM_SRC1_SHIFT);
+ encoded |= PVR_ROGUE_PDSINST_OPCODESP_LIMM
+ << PVR_ROGUE_PDSINST_LIMM_OP_SHIFT;
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_LIMM_CC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_lock(uint32_t cc)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP
+ << PVR_ROGUE_PDSINST_LOCK_OPCODE_SHIFT;
+ encoded |= PVR_ROGUE_PDSINST_OPCODESP_LOCK
+ << PVR_ROGUE_PDSINST_LOCK_OP_SHIFT;
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_LOCK_CC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_release(uint32_t cc)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP
+ << PVR_ROGUE_PDSINST_RELEASE_OPCODE_SHIFT;
+ encoded |= PVR_ROGUE_PDSINST_OPCODESP_RELEASE
+ << PVR_ROGUE_PDSINST_RELEASE_OP_SHIFT;
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_RELEASE_CC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_halt(uint32_t cc)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP
+ << PVR_ROGUE_PDSINST_HALT_OPCODE_SHIFT;
+ encoded |= PVR_ROGUE_PDSINST_OPCODESP_HALT
+ << PVR_ROGUE_PDSINST_HALT_OP_SHIFT;
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_HALT_CC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_stmc(uint32_t cc,
+ uint32_t so_mask)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP
+ << PVR_ROGUE_PDSINST_STMC_OPCODE_SHIFT;
+ encoded |= ((so_mask & PVR_ROGUE_PDSINST_SOMASK_MASK)
+ << PVR_ROGUE_PDSINST_STMC_SOMASK_SHIFT);
+ encoded |= PVR_ROGUE_PDSINST_OPCODESP_STMC
+ << PVR_ROGUE_PDSINST_STMC_OP_SHIFT;
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_STMC_CC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t
+pvr_rogue_pds_inst_decode_field_range_regs64c(uint32_t value)
+{
+ if (value <= PVR_ROGUE_PDSINST_REGS64C_CONST64_UPPER)
+ return PVR_ROGUE_PDSINST_REGS64C_CONST64;
+
+ return 1;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_ddmad(uint32_t cc,
+ uint32_t end,
+ uint32_t src0,
+ uint32_t src1,
+ uint32_t src2,
+ uint32_t src3)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_DDMAD
+ << PVR_ROGUE_PDSINST_DDMAD_OPCODE_SHIFT;
+ encoded |= ((src3 & PVR_ROGUE_PDSINST_REGS64C_MASK)
+ << PVR_ROGUE_PDSINST_DDMAD_SRC3_SHIFT);
+ encoded |= ((src2 & PVR_ROGUE_PDSINST_REGS64_MASK)
+ << PVR_ROGUE_PDSINST_DDMAD_SRC2_SHIFT);
+ encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32T_MASK)
+ << PVR_ROGUE_PDSINST_DDMAD_SRC1_SHIFT);
+ encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS32_MASK)
+ << PVR_ROGUE_PDSINST_DDMAD_SRC0_SHIFT);
+ encoded |= ((end & 1U) << PVR_ROGUE_PDSINST_DDMAD_END_SHIFT);
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_DDMAD_CC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_dout(uint32_t cc,
+ uint32_t end,
+ uint32_t src1,
+ uint32_t src0,
+ uint32_t dst)
+{
+ uint32_t encoded = 0;
+
+ encoded |= PVR_ROGUE_PDSINST_OPCODEC_DOUT
+ << PVR_ROGUE_PDSINST_DOUT_OPCODE_SHIFT;
+ encoded |= ((dst & PVR_ROGUE_PDSINST_DSTDOUT_MASK)
+ << PVR_ROGUE_PDSINST_DOUT_DST_SHIFT);
+ encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64_MASK)
+ << PVR_ROGUE_PDSINST_DOUT_SRC0_SHIFT);
+ encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32_MASK)
+ << PVR_ROGUE_PDSINST_DOUT_SRC1_SHIFT);
+ encoded |= ((end & 1U) << PVR_ROGUE_PDSINST_DOUT_END_SHIFT);
+ encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_DOUT_CC_SHIFT);
+
+ PVR_PDS_PRINT_INST(encoded);
+
+ return encoded;
+}
+
+#endif /* PVR_ROGUE_PDS_ENCODE_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "pvr_device_info.h"
+#include "pvr_pds.h"
+#include "pvr_rogue_pds_defs.h"
+#include "pvr_rogue_pds_disasm.h"
+#include "pvr_rogue_pds_encode.h"
+#include "util/log.h"
+#include "util/macros.h"
+
+#define R32_C(x) ((x) + PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER)
+#define R32_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER)
+#define R32_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER)
+
+#define R32TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER)
+#define R32TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER)
+
+#define R64_C(x) ((x) + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER)
+#define R64_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER)
+#define R64_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER)
+
+#define R64TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER)
+#define R64TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER)
+
+/* 32-bit PTemp index for draw indirect base instance. */
+#define PVR_INDIRECT_BASE_INSTANCE_PTEMP 1U
+
+/* Number of constants to reserve per DDMAD instruction in the PDS Vertex. */
+#define PVR_PDS_DDMAD_NUM_CONSTS 8
+
+#if defined(TRACE_PDS)
+/* Some macros for a pretty printing. */
+
+# define pvr_debug_pds_const(reg, size, annotation) \
+ mesa_logd("const[%d] @ (%dbits) %s", reg, size, annotation)
+# define pvr_debug_pds_temp(reg, size, annotation) \
+ mesa_logd("temp[%d] @ (%dbits) %s", reg, size, annotation)
+# define pvr_debug_pds_note(...) mesa_logd(" // " __VA_ARGS__)
+# define pvr_debug_pds_flag(flags, flag) \
+ { \
+ if ((flags & flag) == flag) \
+ mesa_logd(" > " #flag); \
+ }
+# define pvr_debug(annotation) mesa_logd(annotation)
+
+#else
+# define pvr_debug_pds_const(reg, size, annotation)
+# define pvr_debug_pds_temp(reg, size, annotation)
+# define pvr_debug_pds_note(...)
+# define pvr_debug_pds_flag(flags, flag)
+# define pvr_debug(annotation)
+#endif
+
+struct pvr_pds_const_map_entry_write_state {
+ const struct pvr_pds_info *PDS_info;
+ struct pvr_const_map_entry *entry;
+ size_t size_of_last_entry_in_bytes;
+ uint32_t entry_count;
+ size_t entries_size_in_bytes;
+};
+
+static void pvr_init_pds_const_map_entry_write_state(
+ struct pvr_pds_info *PDS_info,
+ struct pvr_pds_const_map_entry_write_state *entry_write_state)
+{
+ entry_write_state->PDS_info = PDS_info;
+ entry_write_state->entry = PDS_info->entries;
+ entry_write_state->size_of_last_entry_in_bytes = 0;
+ entry_write_state->entry_count = 0;
+ entry_write_state->entries_size_in_bytes = 0;
+}
+
+/* Returns a pointer to the next struct pvr_const_map_entry. */
+static void *pvr_prepare_next_pds_const_map_entry(
+ struct pvr_pds_const_map_entry_write_state *entry_write_state,
+ size_t size_of_next_entry_in_bytes)
+{
+ /* Move on to the next entry. */
+ uint8_t *next_entry = ((uint8_t *)entry_write_state->entry +
+ entry_write_state->size_of_last_entry_in_bytes);
+ entry_write_state->entry = (struct pvr_const_map_entry *)next_entry;
+
+ entry_write_state->size_of_last_entry_in_bytes = size_of_next_entry_in_bytes;
+ entry_write_state->entry_count++;
+ entry_write_state->entries_size_in_bytes += size_of_next_entry_in_bytes;
+
+ /* Check if we can write into the next entry. */
+ assert(entry_write_state->entries_size_in_bytes <=
+ entry_write_state->PDS_info->entries_size_in_bytes);
+
+ return entry_write_state->entry;
+}
+
+static void pvr_write_pds_const_map_entry_vertex_attribute_address(
+ struct pvr_pds_const_map_entry_write_state *entry_write_state,
+ const struct pvr_pds_vertex_dma *DMA,
+ uint32_t const_val,
+ bool use_robust_vertex_fetch)
+{
+ pvr_debug_pds_note("DMA %d dwords, stride %d, offset %d, bindingIdx %d",
+ DMA->size_in_dwords,
+ DMA->stride,
+ DMA->offset,
+ DMA->binding_index);
+
+ if (use_robust_vertex_fetch) {
+ struct pvr_const_map_entry_robust_vertex_attribute_address
+ *robust_attribute_entry;
+
+ robust_attribute_entry =
+ pvr_prepare_next_pds_const_map_entry(entry_write_state,
+ sizeof(*robust_attribute_entry));
+ robust_attribute_entry->type =
+ PVR_PDS_CONST_MAP_ENTRY_TYPE_ROBUST_VERTEX_ATTRIBUTE_ADDRESS;
+ robust_attribute_entry->const_offset = const_val;
+ robust_attribute_entry->binding_index = DMA->binding_index;
+ robust_attribute_entry->component_size_in_bytes =
+ DMA->component_size_in_bytes;
+ robust_attribute_entry->offset = DMA->offset;
+ robust_attribute_entry->stride = DMA->stride;
+ robust_attribute_entry->size_in_dwords = DMA->size_in_dwords;
+ robust_attribute_entry->robustness_buffer_offset =
+ DMA->robustness_buffer_offset;
+ } else {
+ struct pvr_const_map_entry_vertex_attribute_address *attribute_entry;
+
+ attribute_entry =
+ pvr_prepare_next_pds_const_map_entry(entry_write_state,
+ sizeof(*attribute_entry));
+ attribute_entry->type =
+ PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS;
+ attribute_entry->const_offset = const_val;
+ attribute_entry->binding_index = DMA->binding_index;
+ attribute_entry->offset = DMA->offset;
+ attribute_entry->stride = DMA->stride;
+ attribute_entry->size_in_dwords = DMA->size_in_dwords;
+ }
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
+ uint32_t end,
+ uint32_t src0)
+{
+ return pvr_pds_inst_encode_dout(cc,
+ end,
+ 0,
+ src0,
+ PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
+}
+
+static uint32_t
+pvr_encode_burst(struct pvr_pds_const_map_entry_write_state *entry_write_state,
+ bool last_DMA,
+ bool halt,
+ unsigned int const32,
+ unsigned int const64,
+ unsigned int dma_size_in_dwords,
+ unsigned int destination,
+ unsigned int store)
+{
+ uint32_t literal_value;
+
+ /* Encode literal value. */
+ literal_value = dma_size_in_dwords
+ << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
+ literal_value |= destination
+ << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
+ literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
+ store;
+
+ if (last_DMA)
+ literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN;
+
+ /* Create const map entry. */
+ struct pvr_const_map_entry_literal32 *literal_entry;
+
+ literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state,
+ sizeof(*literal_entry));
+ literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+ literal_entry->const_offset = const32;
+ literal_entry->literal_value = literal_value;
+
+ /* Encode DOUTD */
+ return pvr_pds_inst_encode_dout(0,
+ halt,
+ R32_C(const32),
+ R64_C(const64),
+ PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
+}
+
+#define pvr_encode_burst_cs(psDataEntry, \
+ last_DMA, \
+ halt, \
+ const32, \
+ const64, \
+ dma_size_in_dwords, \
+ destination) \
+ pvr_encode_burst( \
+ psDataEntry, \
+ last_DMA, \
+ halt, \
+ const32, \
+ const64, \
+ dma_size_in_dwords, \
+ destination, \
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE)
+
+static uint32_t pvr_encode_direct_write(
+ struct pvr_pds_const_map_entry_write_state *entry_write_state,
+ bool last_DMA,
+ bool halt,
+ unsigned int const32,
+ unsigned int const64,
+ uint32_t data_mask,
+ unsigned int destination,
+ uint32_t destination_store,
+ const struct pvr_device_info *dev_info)
+{
+ struct pvr_const_map_entry_literal32 *literal_entry;
+
+ uint32_t instruction =
+ pvr_pds_inst_encode_dout(0,
+ halt,
+ const32,
+ const64,
+ PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
+
+ literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state,
+ sizeof(*literal_entry));
+ literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+ literal_entry->const_offset = const32;
+ literal_entry->literal_value = destination_store;
+
+ if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
+ literal_entry->literal_value |=
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED;
+ }
+
+ literal_entry->literal_value |=
+ destination << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
+
+ if (data_mask == 0x1) {
+ literal_entry->literal_value |=
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER;
+ } else if (data_mask == 0x2) {
+ literal_entry->literal_value |=
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER;
+ } else {
+ literal_entry->literal_value |=
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64;
+ }
+
+ if (last_DMA) {
+ literal_entry->literal_value |=
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+ }
+
+ return instruction;
+}
+
+/* Constant and Temporary register allocation
+ * - reserve space for a 32-bit register or a 64-bit register
+ * - returned indices are offsets to 32-bit register locations
+ * - 64-bit registers need to be aligned to even indices.
+ */
+#define RESERVE_32BIT 1U
+#define RESERVE_64BIT 2U
+
+#if defined(DEBUG)
+# define pvr_find_constant(usage, words, name) \
+ pvr_find_constant2(usage, words, name)
+# define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, name)
+#else
+# define pvr_find_constant(usage, words, name) \
+ pvr_find_constant2(usage, words, NULL);
+# define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, NULL)
+#endif
+
+static uint32_t
+pvr_find_constant2(uint8_t *const_usage, uint8_t words, const char *const_name)
+{
+ uint32_t const_index = ~0U;
+ uint32_t step = words;
+ uint8_t mask = (1 << words) - 1;
+
+ assert(words == 1 || words == 2);
+
+ /* Find a register at 'step' alignment that satisfies the mask. */
+ for (uint32_t i = 0; i < PVR_MAX_VERTEX_ATTRIB_DMAS; i++) {
+ for (uint32_t b = 0; b < PVR_PDS_DDMAD_NUM_CONSTS; b += step) {
+ if ((const_usage[i] & (mask << b)) != 0)
+ continue;
+ const_usage[i] |= (mask << b);
+ const_index = i * 8 + b;
+ pvr_debug_pds_const(const_index, words * 32, const_name);
+ return const_index;
+ }
+ }
+
+ unreachable("Unexpected: Space cannot be found for constant");
+ return ~0U;
+}
+
+#define PVR_MAX_PDS_TEMPS 32
+struct pvr_temp_usage {
+ uint32_t temp_usage;
+ uint8_t temp_used;
+ uint8_t temps_needed;
+};
+
+#define PVR_INVALID_TEMP UINT8_C(~0)
+
+static uint8_t pvr_get_temps2(struct pvr_temp_usage *temps,
+ uint8_t temps_needed,
+ const char *temp_name)
+{
+ uint8_t step = temps_needed;
+ uint8_t mask = (1 << temps_needed) - 1;
+
+ assert(temps_needed == 1 || temps_needed == 2);
+ assert(temps->temp_used + temps_needed <= PVR_MAX_PDS_TEMPS);
+
+ for (uint8_t i = 0; i < PVR_MAX_PDS_TEMPS; i += step) {
+ if ((temps->temp_usage & (mask << i)) != 0)
+ continue;
+
+ const size_t clzBits = 8 * sizeof(unsigned int);
+
+ temps->temp_usage |= (mask << i);
+ temps->temp_used += temps_needed;
+ temps->temps_needed =
+ clzBits - __builtin_clz((unsigned int)temps->temp_usage);
+
+ pvr_debug_pds_temp(i, temps_needed * 32, temp_name);
+
+ return i;
+ }
+
+ unreachable("Unexpected: Space cannot be found for temps");
+ return PVR_INVALID_TEMP;
+}
+
+/**
+ * Wrapper macro to add a toggle for "data mode", allowing us to calculate the
+ * size of a PDS program without actually attempting to store it.
+ *
+ * \param dest The array/memory pointer where the PDS program should be stored.
+ * If the given code is NULL, automatically switch to count mode
+ * instead of attempting to fill in unallocated memory.
+ * \param counter The local counter that holds the total instruction count.
+ * \param statement What function call/value should be stored at dest[counter]
+ * when condition is false.
+ */
+
+#define PVR_PDS_MODE_TOGGLE(dest, counter, statement) \
+ if (!dest) { \
+ counter++; \
+ } else { \
+ dest[counter++] = statement; \
+ PVR_PDS_PRINT_INST(statement); \
+ }
+
+/**
+ * Generates the PDS vertex primary program for the dma's listed in the input
+ * structure. Produces the constant map for the Vulkan driver based upon the
+ * requirements of the instructions added to the program.
+ *
+ * PDS Data Layout
+ * ---------------
+ *
+ * The PDS data is optimized for the DDMAD layout, with the data for those
+ * instructions laid out first. The data required for other instructions is laid
+ * out in the entries unused by the DDMADs.
+ *
+ * DDMAD layout
+ * \verbatim
+ * bank | index | usage
+ * 0 | 0:1 | temps (current index)[-]
+ * 2 | 2:3 | stride[32]
+ * 1 | 4:5 | base address[64]
+ * 3 | 6:7 | ctrl[64]
+ * \endverbatim
+ *
+ * Each DMA whose stride > 0 requires one entry, laid out as above. We stride
+ * over the banks to ensure that each ddmad reads each of its operands from a
+ * different bank (i.e. remove bank clashes)
+ *
+ * Note: This is "wasting" const[0:1] and const[2], however these free
+ * registers will be used by other, non-ddmad instructions.
+ *
+ * The const register usage is maintained in the au8ConstUsage array, the
+ * DDMAD instructions, for example, will utilize the top 5 registers in each
+ * block of 8 hence a 'usage mask' of 0xF8 (0b11111000).
+ *
+ * Constant Map
+ * ------------
+ *
+ * The constant map is built up as we add PDS instructions and passed back
+ * for the driver to fill in the PDS data section with the correct parameters
+ * for each draw call.
+ *
+ * \param input_program PDS Program description.
+ * \param code Buffer to be filled in with the PDS program. If NULL is provided,
+ * automatically switch to count-mode, preventing writes to
+ * unallocated memory.
+ * \param info PDS info structure filled in for the driver, contains the
+ * constant map.
+ * \param use_robust_vertex_fetch Do vertex fetches apply range checking.
+ * \param dev_info pvr device information struct.
+ */
+void pvr_pds_generate_vertex_primary_program(
+ struct pvr_pds_vertex_primary_program_input *input_program,
+ uint32_t *code,
+ struct pvr_pds_info *info,
+ bool use_robust_vertex_fetch,
+ const struct pvr_device_info *dev_info)
+{
+ struct pvr_pds_const_map_entry_write_state entry_write_state;
+ struct pvr_const_map_entry_doutu_address *doutu_address_entry;
+
+ uint32_t instruction = 0; /* index into code */
+ uint32_t index; /* index used for current attribute, either vertex or
+ * instance.
+ */
+
+ uint32_t total_dma_count = 0;
+ uint32_t running_dma_count = 0;
+
+ uint32_t write_instance_control = ~0;
+ uint32_t write_vertex_control = ~0;
+ uint32_t write_base_instance_control = ~0;
+ uint32_t write_base_vertex_control = ~0;
+ uint32_t pvr_write_draw_index_control = ~0;
+
+ uint32_t ddmad_count = 0;
+ uint32_t doutw_count = 0;
+
+ uint32_t base_instance = 0;
+ uint32_t base_vertex = 0;
+ uint32_t draw_index = 0;
+
+ uint8_t const_usage[PVR_MAX_VERTEX_ATTRIB_DMAS] = { 0 };
+
+ struct pvr_temp_usage temp_usage = { 0 };
+
+ uint32_t zero_temp = PVR_INVALID_TEMP;
+
+ uint32_t max_index_temp = PVR_INVALID_TEMP;
+ uint32_t current_index_temp = PVR_INVALID_TEMP;
+
+ uint32_t index_id_temp = PVR_INVALID_TEMP;
+ uint32_t base_instance_ID_temp = PVR_INVALID_TEMP;
+ uint32_t instance_ID_temp = PVR_INVALID_TEMP;
+
+ /* Debug tracing of program flags. */
+ pvr_debug("pvr_pds_generate_vertex_primary_program");
+ pvr_debug("=================================================");
+ pvr_debug_pds_flag(input_program->flags,
+ PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED);
+ pvr_debug_pds_flag(input_program->flags,
+ PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED);
+ pvr_debug_pds_flag(input_program->flags,
+ PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT);
+ pvr_debug_pds_flag(input_program->flags,
+ PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT);
+ pvr_debug_pds_flag(input_program->flags,
+ PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED);
+ pvr_debug_pds_flag(input_program->flags,
+ PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED);
+ pvr_debug_pds_flag(input_program->flags,
+ PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED);
+ pvr_debug(" ");
+
+ pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
+
+ /* At a minimum we need 2 dwords for the DOUTU, but since we allocate in
+ * blocks of 4 we can reserve dwords for the instance/vertex DOUTW.
+ */
+ info->data_size_in_dwords = 4;
+
+ /* Reserve 2 temps - these are automatically filled in by the VDM
+ *
+ * For instanced draw calls we manually increment the instance id by the
+ * base-instance offset which is either provided as a constant, or in a
+ * ptemp (for draw indirect)
+ *
+ * temp - contents
+ * ---------------
+ * 0 - index id (pre-filled)
+ * 1 - base instance + instance id
+ */
+ index_id_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Index id");
+ instance_ID_temp =
+ pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Instance id");
+
+ /* Reserve the lowest 2 dwords for DOUTU.
+ * [------XX]
+ */
+ const_usage[0] = 0x03;
+
+ /* Reserve consts for all the DDMAD's. */
+ for (uint32_t dma = 0; dma < input_program->dma_count; dma++) {
+ /* Mark the consts required by this ddmad "in-use".
+ * [XXXXX---]
+ */
+ const_usage[ddmad_count++] |= 0xf8;
+ }
+
+ /* Start off by assuming we can fit everything in the 8 dwords/ddmad
+ * footprint, if any DOUTD/DOUTW falls outside we will increase this
+ * counter.
+ */
+ if (ddmad_count)
+ info->data_size_in_dwords = PVR_PDS_DDMAD_NUM_CONSTS * ddmad_count;
+
+ if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) {
+ doutw_count++;
+ write_vertex_control =
+ pvr_find_constant(const_usage, RESERVE_32BIT, "Vertex id DOUTW Ctrl");
+ }
+
+ if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) {
+ doutw_count++;
+ write_instance_control = pvr_find_constant(const_usage,
+ RESERVE_32BIT,
+ "Instance id DOUTW Ctrl");
+ }
+
+ if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
+ doutw_count++;
+ write_base_instance_control =
+ pvr_find_constant(const_usage,
+ RESERVE_32BIT,
+ "Base Instance DOUTW Ctrl");
+ }
+
+ if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) {
+ doutw_count++;
+ write_base_vertex_control = pvr_find_constant(const_usage,
+ RESERVE_32BIT,
+ "Base Vertex DOUTW Ctrl");
+
+ /* Load base vertex from constant for non-indirect variants. */
+ if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) ==
+ 0) {
+ struct pvr_const_map_entry_base_vertex *psBaseVertexEntry =
+ (struct pvr_const_map_entry_base_vertex *)entry_write_state.entry;
+
+ base_vertex =
+ pvr_find_constant(const_usage, RESERVE_32BIT, "base_vertex");
+
+ psBaseVertexEntry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*psBaseVertexEntry));
+ psBaseVertexEntry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_VERTEX;
+ psBaseVertexEntry->const_offset = base_vertex;
+ }
+ }
+
+ if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) {
+ doutw_count++;
+ pvr_write_draw_index_control =
+ pvr_find_constant(const_usage, RESERVE_32BIT, "Draw Index DOUTW Ctrl");
+
+ /* Set draw index to 0 for non-indirect variants. */
+ if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) ==
+ 0) {
+ struct pvr_const_map_entry_literal32 *literal_entry;
+
+ draw_index =
+ pvr_find_constant(const_usage, RESERVE_32BIT, "draw_index");
+
+ literal_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*literal_entry));
+ literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+ literal_entry->const_offset = draw_index;
+ literal_entry->literal_value = 0;
+ }
+ }
+
+ if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
+ /* Load absolute instance id into uiInstanceIdTemp. */
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_pds_inst_encode_add32(
+ /* cc */ 0,
+ /* alum */ 0,
+ /* sna */ 0,
+ /* src0 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
+ /* src1 */ R32_T(instance_ID_temp),
+ /* dst */ R32TP_T(instance_ID_temp)));
+ } else if (input_program->flags &
+ PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
+ struct pvr_const_map_entry_base_instance *base_instance_entry =
+ (struct pvr_const_map_entry_base_instance *)entry_write_state.entry;
+
+ base_instance =
+ pvr_find_constant(const_usage, RESERVE_32BIT, "base_instance");
+
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_add32(
+ /* cc */ 0,
+ /* alum */ 0,
+ /* sna */ 0,
+ /* src0 */ R32_C(base_instance),
+ /* src1 */ R32_T(instance_ID_temp),
+ /* dst */ R32TP_T(instance_ID_temp)));
+
+ base_instance_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*base_instance_entry));
+ base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE;
+ base_instance_entry->const_offset = base_instance;
+ } else if (input_program->flags &
+ PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
+ struct pvr_const_map_entry_base_instance *base_instance_entry =
+ (struct pvr_const_map_entry_base_instance *)entry_write_state.entry;
+
+ base_instance = pvr_find_constant(const_usage,
+ RESERVE_32BIT,
+ "base_instance (Driver Const)");
+
+ /* Base instance provided by the driver. */
+ base_instance_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*base_instance_entry));
+ base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE;
+ base_instance_entry->const_offset = base_instance;
+ }
+
+ total_dma_count = ddmad_count;
+
+ total_dma_count += doutw_count;
+
+ if (use_robust_vertex_fetch) {
+ pvr_debug_pds_note("RobustBufferVertexFetch Initialization");
+
+ if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
+ zero_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "zero_temp");
+
+ /* Load 0 into instance_ID_temp. */
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_limm(0, /* cc */
+ zero_temp, /* SRC1 */
+ 0, /* SRC0 */
+ 0 /* GR */
+ ));
+ } else {
+ zero_temp = pvr_get_temps(&temp_usage, RESERVE_64BIT, "zero_temp");
+
+ max_index_temp =
+ pvr_get_temps(&temp_usage, RESERVE_64BIT, "uMaxIndex");
+ current_index_temp =
+ pvr_get_temps(&temp_usage, RESERVE_64BIT, "uCurrentIndex");
+
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_stflp64(
+ 0, /* cc */
+ PVR_ROGUE_PDSINST_LOP_XOR, /* LOP */
+ 1, /* IM */
+ R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
+ */
+ R64TP_T(zero_temp >> 1), /* SRC1 (REGS64TP)
+ */
+ 0, /* SRC2 (REGS32) */
+ R64TP_T(zero_temp >> 1) /* DST (REG64TP) */
+ ));
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_stflp64(
+ 0, /* cc */
+ PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
+ 1, /* IM */
+ R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
+ */
+ 0, /* SRC1 (REGS64TP) */
+ 0, /* SRC2 (REGS32) */
+ R64TP_T(current_index_temp >> 1) /* DST */
+ /* (REG64TP) */
+ ));
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_stflp64(
+ 0, /* cc */
+ PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
+ 1, /* IM */
+ R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
+ */
+ 0, /* SRC1 (REGS64TP) */
+ 0, /* SRC2 (REGS32) */
+ R64TP_T(max_index_temp >> 1) /* DST */
+ /* (REG64TP) */
+ ));
+ }
+ }
+
+ if (input_program->dma_count && use_robust_vertex_fetch) {
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCC */
+ 0, /* Neg */
+ PVR_HAS_FEATURE(dev_info, pds_ddmadt)
+ ? PVR_ROGUE_PDSINST_PREDICATE_OOB
+ : PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETC */
+ 1 /* Addr */
+ ));
+ }
+
+ for (uint32_t dma = 0; dma < input_program->dma_count; dma++) {
+ uint32_t const_base = dma * PVR_PDS_DDMAD_NUM_CONSTS;
+ uint32_t control_word;
+ struct pvr_const_map_entry_literal32 *literal_entry;
+
+ struct pvr_pds_vertex_dma *vertex_dma = &input_program->dma_list[dma];
+ bool last_DMA = (++running_dma_count == total_dma_count);
+
+ pvr_debug_pds_note("Vertex Attribute DMA %d (last=%d)", dma, last_DMA);
+
+ /* The id we use to index into this dma. */
+ if (vertex_dma->flags & PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE) {
+ pvr_debug_pds_note("Instance Rate (divisor = %d)",
+ vertex_dma->divisor);
+
+ /* 4 - madd 0 - needs to be 64-bit aligned
+ * 5 - madd 1
+ */
+ if (vertex_dma->divisor > 1) {
+ const uint32_t adjusted_instance_ID_temp =
+ pvr_get_temps(&temp_usage,
+ RESERVE_64BIT,
+ "adjusted_instance_ID_temp");
+ const uint32_t MADD_temp =
+ pvr_get_temps(&temp_usage, RESERVE_64BIT, "MADD_temp");
+
+ /* 1. Remove base instance value from temp 1 to get instance id
+ * 2. Divide the instance id by the divisor - Iout = (Iin *
+ * Multiplier) >> (shift+31)
+ * 3. Add the base instance back on.
+ *
+ * Need two zero temps for the add part of the later MAD.
+ */
+
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_add64(
+ /* cc */ 0,
+ /* alum */ 0,
+ /* sna */ 1,
+ /* src0 */ R64_T(MADD_temp >> 1),
+ /* src1 */ R64_T(MADD_temp >> 1),
+ /* dst */ R64TP_T(MADD_temp >> 1)));
+
+ if (input_program->flags &
+ PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
+ /* Subtract base instance from temp 1, put into
+ * adjusted_instance_ID_temp.
+ */
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_pds_inst_encode_add32(
+ /* cc */ 0,
+ /* alum */ 0,
+ /* sna */ 1,
+ /* src0 */ R32_T(instance_ID_temp),
+ /* src1 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
+ /* dst */ R32TP_T(adjusted_instance_ID_temp)));
+ } else if (input_program->flags &
+ PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
+ /* Subtract base instance from temp 1, put into
+ * adjusted_instance_ID_temp.
+ */
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_pds_inst_encode_add32(
+ /* cc */ 0,
+ /* alum */ 0,
+ /* sna */ 1,
+ /* src0 */ R32_T(instance_ID_temp),
+ /* src1 */ R32_C(base_instance),
+ /* dst */ R32TP_T(adjusted_instance_ID_temp)));
+ } else {
+ /* Copy instance from temp 1 to adjusted_instance_ID_temp.
+ */
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_pds_inst_encode_add32(
+ /* cc */ 0,
+ /* alum */ 0,
+ /* sna */ 0,
+ /* src0 */ R32_T(instance_ID_temp),
+ /* src1 */ R32_T(MADD_temp), /* MADD_temp is set
+ * to 0 at this point.
+ */
+ /* dst */ R32TP_T(adjusted_instance_ID_temp)));
+ }
+
+ /* shift = the bit of the next highest power of two. */
+ uint32_t shift_unsigned =
+ (31 - __builtin_clz(vertex_dma->divisor - 1)) + 1;
+ int32_t shift = (int32_t)shift_unsigned;
+ uint32_t shift_2s_comp;
+
+ pvr_debug_pds_note(
+ "Perform instance rate divide (as integer multiply and rshift)");
+
+ const uint32_t multipier_constant =
+ pvr_find_constant(const_usage,
+ RESERVE_32BIT,
+ "MultiplierConstant (for InstanceDivisor)");
+
+ /* multiplier = ( 2^(shift + 31) + (divisor - 1) ) / divisor,
+ note: the division above is integer division. */
+ uint64_t multipier64 =
+ (uint64_t)((((uint64_t)1 << ((uint64_t)shift_unsigned + 31)) +
+ ((uint64_t)vertex_dma->divisor - (uint64_t)1)) /
+ (uint64_t)vertex_dma->divisor);
+ uint32_t multiplier = (uint32_t)multipier64;
+
+ pvr_debug_pds_note(" - Value of MultiplierConstant = %u",
+ multiplier);
+ pvr_debug_pds_note(" - Value of Shift = %d", shift);
+
+ literal_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*literal_entry));
+ literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+ literal_entry->const_offset = multipier_constant;
+ literal_entry->literal_value = multiplier;
+
+ /* (Iin * Multiplier) */
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_rogue_inst_encode_mad(0, /* Sign of add is positive */
+ 0, /* Unsigned ALU mode */
+ 0, /* Unconditional */
+ R32_C(multipier_constant),
+ R32_T(adjusted_instance_ID_temp),
+ R64_T(MADD_temp / 2),
+ R64TP_T(MADD_temp / 2)));
+
+ /* >> (shift + 31) */
+ shift += 31;
+ shift *= -1;
+
+ if (shift < -31) {
+ /* >> (31) */
+ shift_2s_comp = 0xFFFE1;
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_stflp64(
+ /* cc */ 0,
+ /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
+ /* IM */ 1, /* enable immediate */
+ /* SRC0 */ R64_T(MADD_temp / 2),
+ /* SRC1 */ 0, /* This won't be used
+ in a shift
+ operation. */
+ /* SRC2 (Shift) */ shift_2s_comp,
+ /* DST */ R64TP_T(MADD_temp / 2)));
+ shift += 31;
+ }
+
+ /* >> (shift + 31) */
+ shift_2s_comp = *((uint32_t *)&shift);
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_stflp64(
+ /* cc */ 0,
+ /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
+ /* IM */ 1, /* enable immediate */
+ /* SRC0 */ R64_T(MADD_temp / 2),
+ /* SRC1 */ 0, /* This won't be used
+ * in a shift
+ * operation. */
+ /* SRC2 (Shift) */ shift_2s_comp,
+ /* DST */ R64TP_T(MADD_temp / 2)));
+
+ if (input_program->flags &
+ PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
+ /* Add base instance. */
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_pds_inst_encode_add32(
+ /* cc */ 0,
+ /* alum */ 0,
+ /* sna */ 0,
+ /* src0 */ R32_T(MADD_temp),
+ /* src1 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
+ /* dst */ R32TP_T(MADD_temp)));
+ } else if (input_program->flags &
+ PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
+ /* Add base instance. */
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_add32(
+ /* cc */ 0,
+ /* alum */ 0,
+ /* sna */ 0,
+ /* src0 */ R32_T(MADD_temp),
+ /* src1 */ R32_C(base_instance),
+ /* dst */ R32TP_T(MADD_temp)));
+ }
+
+ pvr_debug_pds_note(
+ "DMA Vertex Index will be sourced from 'MADD_temp'");
+ index = MADD_temp;
+ } else if (vertex_dma->divisor == 0) {
+ if (base_instance_ID_temp == PVR_INVALID_TEMP) {
+ base_instance_ID_temp = pvr_get_temps(&temp_usage,
+ RESERVE_32BIT,
+ "uBaseInstanceIDTemp");
+ }
+
+ /* Load 0 into instance_ID_temp. */
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_limm(
+ /* cc */ 0,
+ /* src1 */ base_instance_ID_temp,
+ /* src0 */ 0,
+ /* gr */ 0));
+
+ if (input_program->flags &
+ PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
+ /* Add base instance. */
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_pds_inst_encode_add32(
+ /* cc */ 0,
+ /* alum */ 0,
+ /* sna */ 0,
+ /* src0 */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
+ /* src1 */ R32_T(base_instance_ID_temp),
+ /* dst */ R32TP_T(base_instance_ID_temp)));
+
+ } else if (input_program->flags &
+ PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
+ /* Add base instance. */
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_pds_inst_encode_add32(
+ /* cc */ 0,
+ /* alum */ 0,
+ /* sna */ 0,
+ /* src0 */ R32_C(base_instance),
+ /* src1 */ R32_T(base_instance_ID_temp),
+ /* dst */ R32TP_T(base_instance_ID_temp)));
+ }
+
+ pvr_debug_pds_note(
+ "DMA Vertex Index will be sourced from 'uBaseInstanceIdTemp'");
+ index = base_instance_ID_temp;
+ } else {
+ pvr_debug_pds_note(
+ "DMA Vertex Index will be sourced from 'uInstanceIdTemp'");
+ index = instance_ID_temp;
+ }
+ } else {
+ pvr_debug_pds_note(
+ "DMA Vertex Index will be sourced from 'uIndexIdTemp'");
+ index = index_id_temp;
+ }
+
+ /* DDMAD Const Usage [__XX_---] */
+ pvr_write_pds_const_map_entry_vertex_attribute_address(
+ &entry_write_state,
+ vertex_dma,
+ const_base + 4,
+ use_robust_vertex_fetch);
+
+ /* DDMAD Const Usage [__XXX---] */
+ literal_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*literal_entry));
+ literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+ literal_entry->const_offset = const_base + 3;
+ literal_entry->literal_value = vertex_dma->stride;
+
+ control_word = vertex_dma->size_in_dwords
+ << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
+ control_word |= vertex_dma->destination
+ << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
+ control_word |= (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED);
+
+ /* DDMADT instructions will do a dummy doutd when OOB if
+ * PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN is set but as the driver
+ * would need to do another doutd after an OOB DDMADT to provide the 'in
+ * bounds' data the DDMADT can't be set as LAST.
+ *
+ * This requires us to include a final dummy DDMAD.LAST instruction.
+ *
+ * Pseudocode taken from SeriesXE2017.PDS Instruction Controller
+ * Specification.doc
+ *
+ * DDMAD src0,src1,src2,src3
+ *
+ * calculated_source_address := src0*src1+src2
+ * base_address := src2
+ * dma_parameters := src3[31:0]
+ * buffer_size := src3[63:33]
+ * test := src3[32]
+ *
+ * if (test == 1) {
+ * // DDMAD(T)
+ * if (calculated_source_address[39:0] + (burst_size<<2) <=
+ * base_address[39:0] + buffer_size) {
+ * OOB := 0
+ * DOUTD calculated_source_address,dma_paramters
+ * } else {
+ * OOB := 1
+ * if (last_instance == 1) {
+ * dma_parameters[BURST_SIZE] := 0
+ * DOUTD calculated_source_address,dma_paramters
+ * }
+ * }
+ * } else {
+ * // DDMAD
+ * DOUTD calculated_source_address,dma_paramters
+ * }
+ */
+
+ if (last_DMA && (!PVR_HAS_FEATURE(dev_info, pds_ddmadt) ||
+ !use_robust_vertex_fetch)) {
+ pvr_debug_pds_note("LAST DDMAD");
+ control_word |= PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
+ }
+
+ /* DDMAD Const Usage [_XXXX---] */
+ literal_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*literal_entry));
+ literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+ literal_entry->const_offset = (const_base + 6);
+ literal_entry->literal_value = control_word;
+
+ if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
+ /* DDMAD Const Usage [XXXXX---]
+ * With DDMADT an extra 32bits of SRC3 contains the information for
+ * performing out-of-bounds tests on the DMA.
+ */
+
+ if (use_robust_vertex_fetch) {
+ struct pvr_pds_const_map_entry_vertex_attr_ddmadt_oob_buffer_size
+ *obb_buffer_size;
+ obb_buffer_size =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*obb_buffer_size));
+
+ obb_buffer_size->type =
+ PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTR_DDMADT_OOB_BUFFER_SIZE;
+ obb_buffer_size->const_offset = const_base + 7;
+ obb_buffer_size->binding_index = vertex_dma->binding_index;
+ } else {
+ literal_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*literal_entry));
+ literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+ literal_entry->const_offset = const_base + 7;
+ literal_entry->literal_value = 0;
+ }
+
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_pds_inst_encode_ddmad(0, /* cc */
+ 0, /* END */
+ R32_C(const_base + 3), /* SRC0 (REGS32) */
+ index, /* SRC1 (REGS32T) */
+ R64_C((const_base + 4) >> 1), /* SRC2
+ * (REGS64)
+ */
+ R64_C((const_base + 6) >> 1) /* SRC3
+ * (REGS64C)
+ */
+ ));
+
+ if (use_robust_vertex_fetch) {
+ /* If not out of bounds, skip next DDMAD instructions. */
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_ddmad(
+ 1, /* cc */
+ 0, /* END */
+ R32_C(const_base + 3), /* SRC0 (REGS32) */
+ R32_T(zero_temp), /* SRC1 (REGS32T) */
+ R64_C((const_base + 4) >> 1), /* SRC2
+ * (REGS64)
+ */
+ R64_C((const_base + 6) >> 1) /* SRC3
+ * (REGS64C)
+ */
+ ));
+
+ /* Now the driver must have a dummy DDMAD marked as last. */
+ if (last_DMA) {
+ uint32_t dummy_dma_const = pvr_find_constant(const_usage,
+ RESERVE_64BIT,
+ "uDummyDMAConst");
+ uint32_t zero_const =
+ pvr_find_constant(const_usage, RESERVE_64BIT, "uZeroConst");
+
+ literal_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*literal_entry));
+ literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+ literal_entry->const_offset = zero_const;
+ literal_entry->literal_value = 0;
+
+ literal_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*literal_entry));
+ literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+ literal_entry->const_offset = zero_const + 1;
+ literal_entry->literal_value = 0;
+
+ literal_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*literal_entry));
+ literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+ literal_entry->const_offset = dummy_dma_const;
+ literal_entry->literal_value = 0;
+
+ literal_entry->literal_value |=
+ 0 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
+ literal_entry->literal_value |=
+ (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED);
+ literal_entry->literal_value |=
+ PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
+
+ literal_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*literal_entry));
+ literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+ literal_entry->const_offset = dummy_dma_const + 1;
+ literal_entry->literal_value = 0;
+
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_ddmad(
+ 0, /* cc */
+ 0, /* END */
+ R32_C(zero_const), /* SRC0 (REGS32)
+ */
+ R32_T(zero_temp), /* SRC1 (REGS32T)
+ */
+ R64_C((dummy_dma_const) >> 1), /* SRC2
+ (REGS64)
+ */
+ R64_C((dummy_dma_const) >> 1) /* SRC3
+ (REGS64C)
+ */
+ ));
+ }
+ }
+ } else {
+ if (use_robust_vertex_fetch) {
+ struct pvr_const_map_entry_vertex_attribute_max_index
+ *max_index_entry;
+
+ pvr_debug("RobustVertexFetch DDMAD");
+
+ const uint32_t max_index_const =
+ pvr_find_constant(const_usage, RESERVE_32BIT, "max_index_const");
+
+ max_index_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*max_index_entry));
+ max_index_entry->const_offset = max_index_const;
+ max_index_entry->type =
+ PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_MAX_INDEX;
+ max_index_entry->binding_index = vertex_dma->binding_index;
+ max_index_entry->offset = vertex_dma->offset;
+ max_index_entry->stride = vertex_dma->stride;
+ max_index_entry->size_in_dwords = vertex_dma->size_in_dwords;
+ max_index_entry->component_size_in_bytes =
+ vertex_dma->component_size_in_bytes;
+
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_pds_inst_encode_add32(0, /* cc */
+ 0, /* ALUM */
+ PVR_ROGUE_PDSINST_LOP_NONE, /* SNA */
+ R32_C(max_index_const), /* SRC0
+ * (REGS32)
+ */
+ R32_T(zero_temp), /* SRC1 (REGS32) */
+ R32TP_T(max_index_temp) /* DST
+ * (REG32TP)
+ */
+ ));
+
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_stflp32(
+ 1, /* IM */
+ 0, /* cc */
+ PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
+ index, /* SRC0 (REGS32T) */
+ 0, /* SRC1 (REGS32) */
+ 0, /* SRC2 (REG32TP) */
+ R32TP_T(current_index_temp) /* DST
+ * (REG32TP)
+ */
+ ));
+
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_pds_inst_encode_cmp(
+ 0, /* cc enable */
+ PVR_ROGUE_PDSINST_COP_GT, /* Operation */
+ R64TP_T(current_index_temp >> 1), /* SRC
+ * (REGS64TP)
+ */
+ R64_T(max_index_temp >> 1) /* SRC1 (REGS64) */
+ ));
+
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_stflp32(
+ 1, /* IM */
+ 1, /* cc */
+ PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
+ zero_temp, /* SRC0 (REGS32T) */
+ 0, /* SRC1 (REGS32) */
+ 0, /* SRC2 (REG32TP) */
+ R32TP_T(current_index_temp) /* DST
+ * (REG32TP)
+ */
+ ));
+
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_ddmad(
+ 0, /* cc */
+ 0, /* END */
+ R32_C(const_base + 3), /* SRC0 (REGS32) */
+ current_index_temp, /* SRC1 (REGS32T) */
+ R64_C((const_base + 4) >> 1), /* SRC2
+ * (REGS64)
+ */
+ (const_base + 6) >> 1 /* SRC3 (REGS64C) */
+ ));
+ } else {
+ PVR_PDS_MODE_TOGGLE(code,
+ instruction,
+ pvr_pds_inst_encode_ddmad(
+ /* cc */ 0,
+ /* end */ 0,
+ /* src0 */ R32_C(const_base + 3),
+ /* src2 */ (index),
+ /* src1 */ R64_C((const_base + 4) >> 1),
+ /* src3 */ (const_base + 6) >> 1));
+ }
+ }
+ }
+
+ if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) {
+ bool last_DMA = (++running_dma_count == total_dma_count);
+
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_encode_direct_write(
+ &entry_write_state,
+ last_DMA,
+ false,
+ R64_C(write_vertex_control),
+ R64_T(0),
+ 0x1,
+ input_program->vertex_id_register,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+ dev_info));
+ }
+
+ if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) {
+ bool last_DMA = (++running_dma_count == total_dma_count);
+
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_encode_direct_write(
+ &entry_write_state,
+ last_DMA,
+ false,
+ R64_C(write_instance_control),
+ R64_T(0),
+ 0x2,
+ input_program->instance_id_register,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+ dev_info));
+ }
+
+ if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
+ bool last_DMA = (++running_dma_count == total_dma_count);
+
+ if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
+ /* Base instance comes from ptemp 1. */
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_encode_direct_write(
+ &entry_write_state,
+ last_DMA,
+ false,
+ R64_C(write_base_instance_control),
+ R64_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP >> 1),
+ 0x2,
+ input_program->base_instance_register,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+ dev_info));
+ } else {
+ uint32_t data_mask = (base_instance & 1) ? 0x2 : 0x1;
+
+ /* Base instance comes from driver constant. */
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_encode_direct_write(
+ &entry_write_state,
+ last_DMA,
+ false,
+ R64_C(write_base_instance_control),
+ R64_C(base_instance >> 1),
+ data_mask,
+ input_program->base_instance_register,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+ dev_info));
+ }
+ }
+
+ if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) {
+ bool last_DMA = (++running_dma_count == total_dma_count);
+
+ if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
+ /* Base vertex comes from ptemp 0 (initialized by PDS hardware). */
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_encode_direct_write(
+ &entry_write_state,
+ last_DMA,
+ false,
+ R64_C(write_base_vertex_control),
+ R64_P(0),
+ 0x1,
+ input_program->base_vertex_register,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+ dev_info));
+ } else {
+ uint32_t data_mask = (base_vertex & 1) ? 0x2 : 0x1;
+
+ /* Base vertex comes from driver constant (literal 0). */
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_encode_direct_write(
+ &entry_write_state,
+ last_DMA,
+ false,
+ R64_C(write_base_vertex_control),
+ R64_C(base_vertex >> 1),
+ data_mask,
+ input_program->base_vertex_register,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+ dev_info));
+ }
+ }
+
+ if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) {
+ bool last_DMA = (++running_dma_count == total_dma_count);
+
+ if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
+ /* Draw index comes from ptemp 3. */
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_encode_direct_write(
+ &entry_write_state,
+ last_DMA,
+ false,
+ R64_C(pvr_write_draw_index_control),
+ R64_P(1),
+ 0x2,
+ input_program->draw_index_register,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+ dev_info));
+ } else {
+ uint32_t data_mask = (draw_index & 1) ? 0x2 : 0x1;
+
+ /* Draw index comes from constant (literal 0). */
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_encode_direct_write(
+ &entry_write_state,
+ last_DMA,
+ false,
+ R64_C(pvr_write_draw_index_control),
+ R64_C(draw_index >> 1),
+ data_mask,
+ input_program->draw_index_register,
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+ dev_info));
+ }
+ }
+
+ doutu_address_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*doutu_address_entry));
+ doutu_address_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS;
+ doutu_address_entry->const_offset = 0;
+ doutu_address_entry->doutu_control = input_program->usc_task_control.src0;
+
+ if (use_robust_vertex_fetch) {
+ /* Restore IF0 */
+ PVR_PDS_MODE_TOGGLE(
+ code,
+ instruction,
+ pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC */
+ 0, /* Neg */
+ PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC */
+ 1 /* Addr */
+ ));
+ }
+
+ PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_encode_doutu(1, 1, 0));
+ PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_inst_encode_halt(0));
+
+ assert(running_dma_count == total_dma_count);
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(const_usage); i++) {
+ if (const_usage[i] == 0)
+ break;
+
+ info->data_size_in_dwords =
+ 8 * i + (32 - __builtin_clz((uint32_t)const_usage[i]));
+ }
+
+ info->temps_required = temp_usage.temps_needed;
+ info->entry_count = entry_write_state.entry_count;
+ info->entries_written_size_in_bytes =
+ entry_write_state.entries_size_in_bytes;
+ info->code_size_in_dwords = instruction;
+
+ pvr_debug("=================================================\n");
+}
+
+void pvr_pds_generate_descriptor_upload_program(
+ struct pvr_descriptor_program_input *input_program,
+ uint32_t *code_section,
+ struct pvr_pds_info *info)
+{
+ unsigned int num_consts64;
+ unsigned int num_consts32;
+ unsigned int next_const64;
+ unsigned int next_const32;
+ unsigned int instruction = 0;
+ uint32_t compile_time_buffer_index = 0;
+
+ unsigned int total_dma_count = 0;
+ unsigned int running_dma_count = 0;
+
+ struct pvr_pds_const_map_entry_write_state entry_write_state;
+
+ /* Calculate the total register usage so we can stick 32-bit consts
+ * after 64. Each DOUTD/DDMAD requires 1 32-bit constant and 1 64-bit
+ * constant.
+ */
+ num_consts32 = input_program->descriptor_set_count;
+ num_consts64 = input_program->descriptor_set_count;
+ total_dma_count = input_program->descriptor_set_count;
+
+ pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
+
+ for (unsigned int index = 0; index < input_program->buffer_count; index++) {
+ struct pvr_pds_buffer *buffer = &input_program->buffers[index];
+
+ /* This switch statement looks pointless but we want to optimize DMAs
+ * that can be done as a DOUTW.
+ */
+ switch (buffer->type) {
+ default: {
+ /* 1 DOUTD per compile time buffer: */
+ num_consts32++;
+ num_consts64++;
+ total_dma_count++;
+ break;
+ }
+ }
+ }
+
+ /* DOUTU for the secondary update program requires a 64-bit constant. */
+ if (input_program->secondary_program_present)
+ num_consts64++;
+
+ info->data_size_in_dwords = (num_consts64 * 2) + (num_consts32);
+
+ /* Start counting constants. */
+ next_const64 = 0;
+ next_const32 = num_consts64 * 2;
+
+ /* For each descriptor set perform a DOUTD. */
+ for (unsigned int descriptor_index = 0;
+ descriptor_index < input_program->descriptor_set_count;
+ descriptor_index++) {
+ struct pvr_const_map_entry_descriptor_set *descriptor_set_entry;
+ struct pvr_pds_descriptor_set *descriptor_set =
+ &input_program->descriptor_sets[descriptor_index];
+
+ bool last_DMA = (++running_dma_count == total_dma_count);
+ bool halt = last_DMA && !input_program->secondary_program_present;
+
+ descriptor_set_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*descriptor_set_entry));
+ descriptor_set_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET;
+ descriptor_set_entry->const_offset = next_const64 * 2;
+ descriptor_set_entry->descriptor_set = descriptor_set->descriptor_set;
+ descriptor_set_entry->primary = descriptor_set->primary;
+ descriptor_set_entry->offset_in_dwords = descriptor_set->offset_in_dwords;
+
+ PVR_PDS_MODE_TOGGLE(code_section,
+ instruction,
+ pvr_encode_burst_cs(&entry_write_state,
+ last_DMA,
+ halt,
+ next_const32,
+ next_const64,
+ descriptor_set->size_in_dwords,
+ descriptor_set->destination));
+
+ next_const64++;
+ next_const32++;
+ }
+
+ for (unsigned int index = 0; index < input_program->buffer_count; index++) {
+ struct pvr_pds_buffer *buffer = &input_program->buffers[index];
+
+ bool last_DMA = (++running_dma_count == total_dma_count);
+ bool halt = last_DMA && !input_program->secondary_program_present;
+
+ switch (buffer->type) {
+ case PVR_BUFFER_TYPE_PUSH_CONSTS: {
+ struct pvr_const_map_entry_special_buffer *special_buffer_entry;
+
+ special_buffer_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*special_buffer_entry));
+ special_buffer_entry->type =
+ PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
+ special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_PUSH_CONSTS;
+ special_buffer_entry->buffer_index = buffer->source_offset;
+ break;
+ }
+ case PVR_BUFFER_TYPE_DYNAMIC: {
+ struct pvr_const_map_entry_special_buffer *special_buffer_entry;
+
+ special_buffer_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*special_buffer_entry));
+ special_buffer_entry->type =
+ PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
+ special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_DYNAMIC;
+ special_buffer_entry->buffer_index = buffer->source_offset;
+ break;
+ }
+ case PVR_BUFFER_TYPES_COMPILE_TIME: {
+ struct pvr_const_map_entry_special_buffer *special_buffer_entry;
+
+ special_buffer_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*special_buffer_entry));
+ special_buffer_entry->type =
+ PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
+ special_buffer_entry->buffer_type = PVR_BUFFER_TYPES_COMPILE_TIME;
+ special_buffer_entry->buffer_index = compile_time_buffer_index++;
+ break;
+ }
+ case PVR_BUFFER_TYPES_BUFFER_LENGTHS: {
+ struct pvr_const_map_entry_special_buffer *special_buffer_entry;
+
+ special_buffer_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*special_buffer_entry));
+ special_buffer_entry->type =
+ PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
+ special_buffer_entry->buffer_type = PVR_BUFFER_TYPES_BUFFER_LENGTHS;
+ break;
+ }
+ case PVR_BUFFER_TYPE_BLEND_CONSTS: {
+ struct pvr_const_map_entry_special_buffer *special_buffer_entry;
+
+ special_buffer_entry =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*special_buffer_entry));
+ special_buffer_entry->type =
+ PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
+ special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_BLEND_CONSTS;
+ special_buffer_entry->buffer_index =
+ input_program->blend_constants_used_mask;
+ break;
+ }
+ case PVR_BUFFER_TYPE_UBO: {
+ struct pvr_const_map_entry_constant_buffer *constant_buffer_entry;
+
+ constant_buffer_entry = pvr_prepare_next_pds_const_map_entry(
+ &entry_write_state,
+ sizeof(*constant_buffer_entry));
+ constant_buffer_entry->type =
+ PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER;
+ constant_buffer_entry->buffer_id = buffer->buffer_id;
+ constant_buffer_entry->desc_set = buffer->desc_set;
+ constant_buffer_entry->binding = buffer->binding;
+ constant_buffer_entry->offset = buffer->source_offset;
+ constant_buffer_entry->size_in_dwords = buffer->size_in_dwords;
+ break;
+ }
+ case PVR_BUFFER_TYPES_UBO_ZEROING: {
+ struct pvr_const_map_entry_constant_buffer_zeroing
+ *constant_buffer_entry;
+
+ constant_buffer_entry = pvr_prepare_next_pds_const_map_entry(
+ &entry_write_state,
+ sizeof(*constant_buffer_entry));
+ constant_buffer_entry->type =
+ PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER_ZEROING;
+ constant_buffer_entry->buffer_id = buffer->buffer_id;
+ constant_buffer_entry->offset = buffer->source_offset;
+ constant_buffer_entry->size_in_dwords = buffer->size_in_dwords;
+ break;
+ }
+ }
+
+ entry_write_state.entry->const_offset = next_const64 * 2;
+
+ PVR_PDS_MODE_TOGGLE(code_section,
+ instruction,
+ pvr_encode_burst_cs(&entry_write_state,
+ last_DMA,
+ halt,
+ next_const32,
+ next_const64,
+ buffer->size_in_dwords,
+ buffer->destination));
+
+ next_const64++;
+ next_const32++;
+ }
+
+ if (total_dma_count != running_dma_count)
+ fprintf(stderr, "Mismatch in DMA count\n");
+
+ if (input_program->secondary_program_present) {
+ struct pvr_const_map_entry_doutu_address *doutu_address;
+
+ PVR_PDS_MODE_TOGGLE(code_section,
+ instruction,
+ pvr_pds_encode_doutu(false, true, next_const64));
+
+ doutu_address =
+ pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+ sizeof(*doutu_address));
+ doutu_address->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS;
+ doutu_address->const_offset = next_const64 * 2;
+ doutu_address->doutu_control = input_program->secondary_task_control.src0;
+
+ next_const64++;
+ }
+
+ if (instruction == 0 && input_program->must_not_be_empty) {
+ PVR_PDS_MODE_TOGGLE(code_section,
+ instruction,
+ pvr_pds_inst_encode_halt(
+ /* cc */ false));
+ }
+
+ info->entry_count = entry_write_state.entry_count;
+ info->entries_written_size_in_bytes =
+ entry_write_state.entries_size_in_bytes;
+ info->code_size_in_dwords = instruction;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_csb.h"
+#include "pvr_private.h"
+#include "util/list.h"
+#include "vk_alloc.h"
+#include "vk_command_buffer.h"
+#include "vk_command_pool.h"
+#include "vk_log.h"
+
+/* TODO: Investigate where this limit comes from. */
+#define PVR_MAX_TRANSFER_SIZE_IN_TEXELS 2048U
+
+void pvr_CmdBlitImage2KHR(VkCommandBuffer commandBuffer,
+ const VkBlitImageInfo2KHR *pBlitImageInfo)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdCopyImageToBuffer2KHR(
+ VkCommandBuffer commandBuffer,
+ const VkCopyImageToBufferInfo2KHR *pCopyImageToBufferInfo)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,
+ const VkCopyImageInfo2KHR *pCopyImageInfo)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
+ VkBuffer dstBuffer,
+ VkDeviceSize dstOffset,
+ VkDeviceSize dataSize,
+ const void *pData)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdFillBuffer(VkCommandBuffer commandBuffer,
+ VkBuffer dstBuffer,
+ VkDeviceSize dstOffset,
+ VkDeviceSize fillSize,
+ uint32_t data)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdCopyBufferToImage2KHR(
+ VkCommandBuffer commandBuffer,
+ const VkCopyBufferToImageInfo2KHR *pCopyBufferToImageInfo)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdClearColorImage(VkCommandBuffer commandBuffer,
+ VkImage _image,
+ VkImageLayout imageLayout,
+ const VkClearColorValue *pColor,
+ uint32_t rangeCount,
+ const VkImageSubresourceRange *pRanges)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
+ VkImage image_h,
+ VkImageLayout imageLayout,
+ const VkClearDepthStencilValue *pDepthStencil,
+ uint32_t rangeCount,
+ const VkImageSubresourceRange *pRanges)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
+ const VkCopyBufferInfo2KHR *pCopyBufferInfo)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferInfo->srcBuffer);
+ PVR_FROM_HANDLE(pvr_buffer, dst, pCopyBufferInfo->dstBuffer);
+ const size_t regions_size =
+ pCopyBufferInfo->regionCount * sizeof(*pCopyBufferInfo->pRegions);
+ struct pvr_transfer_cmd *transfer_cmd;
+
+ PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+ transfer_cmd = vk_alloc(&cmd_buffer->vk.pool->alloc,
+ sizeof(*transfer_cmd) + regions_size,
+ 8U,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!transfer_cmd) {
+ cmd_buffer->state.status =
+ vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ return;
+ }
+
+ transfer_cmd->src = src;
+ transfer_cmd->dst = dst;
+ transfer_cmd->region_count = pCopyBufferInfo->regionCount;
+ memcpy(transfer_cmd->regions, pCopyBufferInfo->pRegions, regions_size);
+
+ pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
+}
+
+void pvr_CmdClearAttachments(VkCommandBuffer commandBuffer,
+ uint32_t attachmentCount,
+ const VkClearAttachment *pAttachments,
+ uint32_t rectCount,
+ const VkClearRect *pRects)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
+ const VkResolveImageInfo2KHR *pResolveImageInfo)
+{
+ assert(!"Unimplemented");
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_bo.h"
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+
+static uint32_t pvr_bo_alloc_to_winsys_flags(uint64_t flags)
+{
+ uint32_t ws_flags = 0;
+
+ if (flags & PVR_BO_ALLOC_FLAG_CPU_ACCESS)
+ ws_flags |= PVR_WINSYS_BO_FLAG_CPU_ACCESS;
+
+ if (flags & PVR_BO_ALLOC_FLAG_GPU_UNCACHED)
+ ws_flags |= PVR_WINSYS_BO_FLAG_GPU_UNCACHED;
+
+ if (flags & PVR_BO_ALLOC_FLAG_PM_FW_PROTECT)
+ ws_flags |= PVR_WINSYS_BO_FLAG_PM_FW_PROTECT;
+
+ if (flags & PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC)
+ ws_flags |= PVR_WINSYS_BO_FLAG_ZERO_ON_ALLOC;
+
+ return ws_flags;
+}
+
+/**
+ * \brief Helper interface to allocate a GPU buffer and map it to both host and
+ * device virtual memory. Host mapping is conditional and is controlled by
+ * flags.
+ *
+ * \param[in] device Logical device pointer.
+ * \param[in] heap Heap to allocate device virtual address from.
+ * \param[in] size Size of buffer to allocate.
+ * \param[in] alignment Required alignment of the allocation. Must be a power
+ * of two.
+ * \param[in] flags Controls allocation, CPU and GPU mapping behavior
+ * using PVR_BO_ALLOC_FLAG_*.
+ * \param[out] pvr_bo_out On success output buffer is returned in this pointer.
+ * \return VK_SUCCESS on success, or error code otherwise.
+ *
+ * \sa #pvr_bo_free()
+ */
+VkResult pvr_bo_alloc(struct pvr_device *device,
+ struct pvr_winsys_heap *heap,
+ uint64_t size,
+ uint64_t alignment,
+ uint64_t flags,
+ struct pvr_bo **const pvr_bo_out)
+{
+ const uint32_t ws_flags = pvr_bo_alloc_to_winsys_flags(flags);
+ struct pvr_bo *pvr_bo;
+ pvr_dev_addr_t addr;
+ VkResult result;
+
+ pvr_bo = vk_alloc(&device->vk.alloc,
+ sizeof(*pvr_bo),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!pvr_bo)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ result = device->ws->ops->buffer_create(device->ws,
+ size,
+ alignment,
+ PVR_WINSYS_BO_TYPE_GPU,
+ ws_flags,
+ &pvr_bo->bo);
+ if (result != VK_SUCCESS)
+ goto err_vk_free;
+
+ if (flags & PVR_BO_ALLOC_FLAG_CPU_MAPPED) {
+ void *map = device->ws->ops->buffer_map(pvr_bo->bo);
+ if (!map) {
+ result = VK_ERROR_MEMORY_MAP_FAILED;
+ goto err_buffer_destroy;
+ }
+ }
+
+ pvr_bo->vma = device->ws->ops->heap_alloc(heap, size, alignment);
+ if (!pvr_bo->vma) {
+ result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ goto err_buffer_unmap;
+ }
+
+ addr = device->ws->ops->vma_map(pvr_bo->vma, pvr_bo->bo, 0, size);
+ if (!addr.addr) {
+ result = VK_ERROR_MEMORY_MAP_FAILED;
+ goto err_heap_free;
+ }
+
+ *pvr_bo_out = pvr_bo;
+
+ return VK_SUCCESS;
+
+err_heap_free:
+ device->ws->ops->heap_free(pvr_bo->vma);
+
+err_buffer_unmap:
+ if (flags & PVR_BO_ALLOC_FLAG_CPU_MAPPED)
+ device->ws->ops->buffer_unmap(pvr_bo->bo);
+
+err_buffer_destroy:
+ device->ws->ops->buffer_destroy(pvr_bo->bo);
+
+err_vk_free:
+ vk_free(&device->vk.alloc, pvr_bo);
+
+ return result;
+}
+
+/**
+ * \brief Interface to map the buffer into host virtual address space.
+ *
+ * Buffer should have been created with the #PVR_BO_ALLOC_FLAG_CPU_ACCESS
+ * flag. It should also not already be mapped or it should have been unmapped
+ * using #pvr_bo_cpu_unmap() before mapping again.
+ *
+ * \param[in] device Logical device pointer.
+ * \param[in] pvr_bo Buffer to map.
+ * \return Valid host virtual address on success, or NULL otherwise.
+ *
+ * \sa #pvr_bo_alloc(), #PVR_BO_ALLOC_FLAG_CPU_MAPPED
+ */
+void *pvr_bo_cpu_map(struct pvr_device *device, struct pvr_bo *pvr_bo)
+{
+ assert(!pvr_bo->bo->map);
+
+ return device->ws->ops->buffer_map(pvr_bo->bo);
+}
+
+/**
+ * \brief Interface to unmap the buffer from host virtual address space.
+ *
+ * Buffer should have a valid mapping, created either using #pvr_bo_cpu_map() or
+ * by passing #PVR_BO_ALLOC_FLAG_CPU_MAPPED flag to #pvr_bo_alloc() at
+ * allocation time.
+ *
+ * Buffer can be remapped using #pvr_bo_cpu_map().
+ *
+ * \param[in] device Logical device pointer.
+ * \param[in] pvr_bo Buffer to unmap.
+ */
+void pvr_bo_cpu_unmap(struct pvr_device *device, struct pvr_bo *pvr_bo)
+{
+ assert(pvr_bo->bo->map);
+ device->ws->ops->buffer_unmap(pvr_bo->bo);
+}
+
+/**
+ * \brief Interface to free the buffer object.
+ *
+ * \param[in] device Logical device pointer.
+ * \param[in] pvr_bo Buffer to free.
+ *
+ * \sa #pvr_bo_alloc()
+ */
+void pvr_bo_free(struct pvr_device *device, struct pvr_bo *pvr_bo)
+{
+ if (!pvr_bo)
+ return;
+
+ device->ws->ops->vma_unmap(pvr_bo->vma);
+ device->ws->ops->heap_free(pvr_bo->vma);
+
+ if (pvr_bo->bo->map)
+ device->ws->ops->buffer_unmap(pvr_bo->bo);
+
+ device->ws->ops->buffer_destroy(pvr_bo->bo);
+
+ vk_free(&device->vk.alloc, pvr_bo);
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_BO_H
+#define PVR_BO_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "util/list.h"
+#include "util/macros.h"
+
+struct pvr_device;
+struct pvr_winsys_bo;
+struct pvr_winsys_vma;
+struct pvr_winsys_heap;
+
+struct pvr_bo {
+ /* Since multiple components (csb, caching logic, etc) can make use of
+ * linking buffers in a list, we add 'link' in pvr_bo to avoid an extra
+ * level of structure inheritance. It's the responsibility of the buffer
+ * user to manage the list and remove the buffer from the list before
+ * freeing it.
+ */
+ struct list_head link;
+
+ struct pvr_winsys_bo *bo;
+ struct pvr_winsys_vma *vma;
+};
+
+/**
+ * \brief Flag passed to #pvr_bo_alloc() to indicate that the buffer should be
+ * CPU accessible. This is required in order to map a buffer with
+ * #pvr_bo_cpu_map().
+ */
+#define PVR_BO_ALLOC_FLAG_CPU_ACCESS BITFIELD_BIT(0U)
+/**
+ * \brief Flag passed to #pvr_bo_alloc() to indicate that the buffer should
+ * be mapped to the CPU. Implies #PVR_BO_ALLOC_FLAG_CPU_ACCESS.
+ */
+#define PVR_BO_ALLOC_FLAG_CPU_MAPPED \
+ (BITFIELD_BIT(1U) | PVR_BO_ALLOC_FLAG_CPU_ACCESS)
+/**
+ * \brief Flag passed to #pvr_bo_alloc() to indicate that the buffer should be
+ * mapped to the GPU as uncached.
+ */
+#define PVR_BO_ALLOC_FLAG_GPU_UNCACHED BITFIELD_BIT(2U)
+/**
+ * \brief Flag passed to #pvr_bo_alloc() to indicate that the buffer GPU mapping
+ * should be restricted to only allow access to the Parameter Manager unit and
+ * firmware processor.
+ */
+#define PVR_BO_ALLOC_FLAG_PM_FW_PROTECT BITFIELD_BIT(3U)
+/**
+ * \brief Flag passed to #pvr_bo_alloc() to indicate that the buffer should be
+ * zeroed at allocation time.
+ */
+#define PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC BITFIELD_BIT(4U)
+
+VkResult pvr_bo_alloc(struct pvr_device *device,
+ struct pvr_winsys_heap *heap,
+ uint64_t size,
+ uint64_t alignment,
+ uint64_t flags,
+ struct pvr_bo **const bo_out);
+void *pvr_bo_cpu_map(struct pvr_device *device, struct pvr_bo *bo);
+void pvr_bo_cpu_unmap(struct pvr_device *device, struct pvr_bo *bo);
+void pvr_bo_free(struct pvr_device *device, struct pvr_bo *bo);
+
+#endif /* PVR_BO_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <vulkan/vulkan.h>
+
+#include "c11_compat.h"
+#include "hwdef/rogue_hw_defs.h"
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_bo.h"
+#include "pvr_csb.h"
+#include "pvr_device_info.h"
+#include "pvr_end_of_tile.h"
+#include "pvr_formats.h"
+#include "pvr_hw_pass.h"
+#include "pvr_job_common.h"
+#include "pvr_job_render.h"
+#include "pvr_limits.h"
+#include "pvr_pds.h"
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+#include "util/compiler.h"
+#include "util/list.h"
+#include "util/macros.h"
+#include "util/u_dynarray.h"
+#include "util/u_pack_color.h"
+#include "vk_alloc.h"
+#include "vk_command_buffer.h"
+#include "vk_command_pool.h"
+#include "vk_format.h"
+#include "vk_log.h"
+#include "vk_object.h"
+#include "vk_util.h"
+
+/* Structure used to pass data into pvr_compute_generate_control_stream()
+ * function.
+ */
+struct pvr_compute_kernel_info {
+ pvr_dev_addr_t indirect_buffer_addr;
+ bool global_offsets_present;
+ uint32_t usc_common_size;
+ uint32_t usc_unified_size;
+ uint32_t pds_temp_size;
+ uint32_t pds_data_size;
+ bool usc_target_any;
+ bool is_fence;
+ uint32_t pds_data_offset;
+ uint32_t pds_code_offset;
+ enum PVRX(CDMCTRL_SD_TYPE) sd_type;
+ bool usc_common_shared;
+ uint32_t local_size[3];
+ uint32_t global_size[3];
+ uint32_t max_instances;
+};
+
+static void pvr_cmd_buffer_free_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
+ struct pvr_sub_cmd *sub_cmd)
+{
+ switch (sub_cmd->type) {
+ case PVR_SUB_CMD_TYPE_GRAPHICS:
+ pvr_csb_finish(&sub_cmd->gfx.control_stream);
+ pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.depth_bias_bo);
+ pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.scissor_bo);
+ break;
+
+ case PVR_SUB_CMD_TYPE_COMPUTE:
+ pvr_csb_finish(&sub_cmd->compute.control_stream);
+ break;
+
+ case PVR_SUB_CMD_TYPE_TRANSFER:
+ list_for_each_entry_safe (struct pvr_transfer_cmd,
+ transfer_cmd,
+ &sub_cmd->transfer.transfer_cmds,
+ link) {
+ list_del(&transfer_cmd->link);
+ vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
+ }
+ break;
+
+ default:
+ pvr_finishme("Unsupported sub-command type %d", sub_cmd->type);
+ break;
+ }
+
+ list_del(&sub_cmd->link);
+ vk_free(&cmd_buffer->vk.pool->alloc, sub_cmd);
+}
+
+static void pvr_cmd_buffer_free_sub_cmds(struct pvr_cmd_buffer *cmd_buffer)
+{
+ list_for_each_entry_safe (struct pvr_sub_cmd,
+ sub_cmd,
+ &cmd_buffer->sub_cmds,
+ link) {
+ pvr_cmd_buffer_free_sub_cmd(cmd_buffer, sub_cmd);
+ }
+}
+
+static void pvr_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer)
+{
+ struct pvr_cmd_buffer *cmd_buffer =
+ container_of(vk_cmd_buffer, struct pvr_cmd_buffer, vk);
+
+ vk_free(&cmd_buffer->vk.pool->alloc,
+ cmd_buffer->state.render_pass_info.attachments);
+ vk_free(&cmd_buffer->vk.pool->alloc,
+ cmd_buffer->state.render_pass_info.clear_values);
+
+ pvr_cmd_buffer_free_sub_cmds(cmd_buffer);
+
+ list_for_each_entry_safe (struct pvr_bo, bo, &cmd_buffer->bo_list, link) {
+ list_del(&bo->link);
+ pvr_bo_free(cmd_buffer->device, bo);
+ }
+
+ util_dynarray_fini(&cmd_buffer->scissor_array);
+ util_dynarray_fini(&cmd_buffer->depth_bias_array);
+
+ vk_command_buffer_finish(&cmd_buffer->vk);
+ vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer);
+}
+
+static VkResult pvr_cmd_buffer_create(struct pvr_device *device,
+ struct vk_command_pool *pool,
+ VkCommandBufferLevel level,
+ VkCommandBuffer *pCommandBuffer)
+{
+ struct pvr_cmd_buffer *cmd_buffer;
+ VkResult result;
+
+ cmd_buffer = vk_zalloc(&pool->alloc,
+ sizeof(*cmd_buffer),
+ 8U,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!cmd_buffer)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ result = vk_command_buffer_init(&cmd_buffer->vk, pool, level);
+ if (result != VK_SUCCESS) {
+ vk_free(&pool->alloc, cmd_buffer);
+ return result;
+ }
+
+ cmd_buffer->vk.destroy = pvr_cmd_buffer_destroy;
+ cmd_buffer->device = device;
+
+ util_dynarray_init(&cmd_buffer->depth_bias_array, NULL);
+ util_dynarray_init(&cmd_buffer->scissor_array, NULL);
+
+ cmd_buffer->state.status = VK_SUCCESS;
+ cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INITIAL;
+
+ list_inithead(&cmd_buffer->sub_cmds);
+ list_inithead(&cmd_buffer->bo_list);
+
+ *pCommandBuffer = pvr_cmd_buffer_to_handle(cmd_buffer);
+
+ return VK_SUCCESS;
+}
+
+VkResult
+pvr_AllocateCommandBuffers(VkDevice _device,
+ const VkCommandBufferAllocateInfo *pAllocateInfo,
+ VkCommandBuffer *pCommandBuffers)
+{
+ VK_FROM_HANDLE(vk_command_pool, pool, pAllocateInfo->commandPool);
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ VkResult result = VK_SUCCESS;
+ uint32_t i;
+
+ for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
+ result = pvr_cmd_buffer_create(device,
+ pool,
+ pAllocateInfo->level,
+ &pCommandBuffers[i]);
+ if (result != VK_SUCCESS)
+ break;
+ }
+
+ if (result != VK_SUCCESS) {
+ while (i--) {
+ VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, pCommandBuffers[i]);
+ pvr_cmd_buffer_destroy(cmd_buffer);
+ }
+
+ for (i = 0; i < pAllocateInfo->commandBufferCount; i++)
+ pCommandBuffers[i] = VK_NULL_HANDLE;
+ }
+
+ return result;
+}
+
+static void pvr_cmd_buffer_update_barriers(struct pvr_cmd_buffer *cmd_buffer,
+ enum pvr_sub_cmd_type type)
+{
+ struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+ uint32_t barriers;
+
+ switch (type) {
+ case PVR_SUB_CMD_TYPE_GRAPHICS:
+ barriers = PVR_PIPELINE_STAGE_GEOM_BIT | PVR_PIPELINE_STAGE_FRAG_BIT;
+ break;
+
+ case PVR_SUB_CMD_TYPE_COMPUTE:
+ barriers = PVR_PIPELINE_STAGE_COMPUTE_BIT;
+ break;
+
+ case PVR_SUB_CMD_TYPE_TRANSFER:
+ barriers = PVR_PIPELINE_STAGE_TRANSFER_BIT;
+ break;
+
+ default:
+ barriers = 0;
+ pvr_finishme("Unsupported sub-command type %d", type);
+ break;
+ }
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(state->barriers_needed); i++)
+ state->barriers_needed[i] |= barriers;
+}
+
+static VkResult pvr_cmd_buffer_upload_tables(struct pvr_device *device,
+ struct pvr_cmd_buffer *cmd_buffer)
+{
+ struct pvr_sub_cmd *sub_cmd = cmd_buffer->state.current_sub_cmd;
+ const uint32_t cache_line_size =
+ rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+ VkResult result;
+
+ assert(!sub_cmd->gfx.depth_bias_bo && !sub_cmd->gfx.scissor_bo);
+
+ if (cmd_buffer->depth_bias_array.size > 0) {
+ result =
+ pvr_gpu_upload(device,
+ device->heaps.general_heap,
+ util_dynarray_begin(&cmd_buffer->depth_bias_array),
+ cmd_buffer->depth_bias_array.size,
+ cache_line_size,
+ &sub_cmd->gfx.depth_bias_bo);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ if (cmd_buffer->scissor_array.size > 0) {
+ result = pvr_gpu_upload(device,
+ device->heaps.general_heap,
+ util_dynarray_begin(&cmd_buffer->scissor_array),
+ cmd_buffer->scissor_array.size,
+ cache_line_size,
+ &sub_cmd->gfx.scissor_bo);
+ if (result != VK_SUCCESS)
+ goto err_free_depth_bias_bo;
+ }
+
+ util_dynarray_clear(&cmd_buffer->depth_bias_array);
+ util_dynarray_clear(&cmd_buffer->scissor_array);
+
+ return VK_SUCCESS;
+
+err_free_depth_bias_bo:
+ pvr_bo_free(device, sub_cmd->gfx.depth_bias_bo);
+ sub_cmd->gfx.depth_bias_bo = NULL;
+
+ return result;
+}
+
+static VkResult pvr_cmd_buffer_emit_ppp_state(struct pvr_cmd_buffer *cmd_buffer)
+{
+ struct pvr_sub_cmd *sub_cmd = cmd_buffer->state.current_sub_cmd;
+ struct pvr_framebuffer *framebuffer =
+ cmd_buffer->state.render_pass_info.framebuffer;
+
+ pvr_csb_emit (&sub_cmd->gfx.control_stream, VDMCTRL_PPP_STATE0, state0) {
+ state0.addrmsb = framebuffer->ppp_state_bo->vma->dev_addr;
+ state0.word_count = framebuffer->ppp_state_size;
+ }
+
+ pvr_csb_emit (&sub_cmd->gfx.control_stream, VDMCTRL_PPP_STATE1, state1) {
+ state1.addrlsb = framebuffer->ppp_state_bo->vma->dev_addr;
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer,
+ const void *const data,
+ const size_t size,
+ struct pvr_bo **const pvr_bo_out)
+{
+ struct pvr_device *const device = cmd_buffer->device;
+ const uint32_t cache_line_size =
+ rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+ struct pvr_bo *pvr_bo;
+ VkResult result;
+
+ result = pvr_gpu_upload(device,
+ device->heaps.general_heap,
+ data,
+ size,
+ cache_line_size,
+ &pvr_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ list_add(&pvr_bo->link, &cmd_buffer->bo_list);
+
+ *pvr_bo_out = pvr_bo;
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+pvr_cmd_buffer_upload_usc(struct pvr_cmd_buffer *const cmd_buffer,
+ const void *const code,
+ const size_t code_size,
+ uint64_t code_alignment,
+ struct pvr_bo **const pvr_bo_out)
+{
+ struct pvr_device *const device = cmd_buffer->device;
+ const uint32_t cache_line_size =
+ rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+ struct pvr_bo *pvr_bo;
+ VkResult result;
+
+ code_alignment = MAX2(code_alignment, cache_line_size);
+
+ result =
+ pvr_gpu_upload_usc(device, code, code_size, code_alignment, &pvr_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ list_add(&pvr_bo->link, &cmd_buffer->bo_list);
+
+ *pvr_bo_out = pvr_bo;
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+pvr_cmd_buffer_upload_pds(struct pvr_cmd_buffer *const cmd_buffer,
+ const uint32_t *data,
+ uint32_t data_size_dwords,
+ uint32_t data_alignment,
+ const uint32_t *code,
+ uint32_t code_size_dwords,
+ uint32_t code_alignment,
+ uint64_t min_alignment,
+ struct pvr_pds_upload *const pds_upload_out)
+{
+ struct pvr_device *const device = cmd_buffer->device;
+ VkResult result;
+
+ result = pvr_gpu_upload_pds(device,
+ data,
+ data_size_dwords,
+ data_alignment,
+ code,
+ code_size_dwords,
+ code_alignment,
+ min_alignment,
+ pds_upload_out);
+ if (result != VK_SUCCESS)
+ return result;
+
+ list_add(&pds_upload_out->pvr_bo->link, &cmd_buffer->bo_list);
+
+ return VK_SUCCESS;
+}
+
+static inline VkResult
+pvr_cmd_buffer_upload_pds_data(struct pvr_cmd_buffer *const cmd_buffer,
+ const uint32_t *data,
+ uint32_t data_size_dwords,
+ uint32_t data_alignment,
+ struct pvr_pds_upload *const pds_upload_out)
+{
+ return pvr_cmd_buffer_upload_pds(cmd_buffer,
+ data,
+ data_size_dwords,
+ data_alignment,
+ NULL,
+ 0,
+ 0,
+ data_alignment,
+ pds_upload_out);
+}
+
+static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
+ struct pvr_cmd_buffer *const cmd_buffer,
+ const uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
+ struct pvr_pds_upload *const pds_upload_out)
+{
+ struct pvr_pds_event_program pixel_event_program = {
+ /* No data to DMA, just a DOUTU needed. */
+ .num_emit_word_pairs = 0,
+ };
+ const uint32_t staging_buffer_size =
+ cmd_buffer->device->pixel_event_data_size_in_dwords * sizeof(uint32_t);
+ const VkAllocationCallbacks *const allocator = &cmd_buffer->vk.pool->alloc;
+ struct pvr_device *const device = cmd_buffer->device;
+ /* FIXME: This should come from the compiler for the USC pixel program. */
+ const uint32_t usc_temp_count = 0;
+ struct pvr_bo *usc_eot_program;
+ uint8_t *usc_eot_program_ptr;
+ uint32_t *staging_buffer;
+ VkResult result;
+
+ result = pvr_cmd_buffer_upload_usc(cmd_buffer,
+ pvr_end_of_tile_program,
+ sizeof(pvr_end_of_tile_program),
+ 4,
+ &usc_eot_program);
+ if (result != VK_SUCCESS)
+ return result;
+
+ assert((pbe_cs_words[1] & 0x3F) == 0x20);
+
+ /* FIXME: Stop patching the framebuffer address (this will require the
+ * end-of-tile program to be generated at run-time).
+ */
+ pvr_bo_cpu_map(device, usc_eot_program);
+ usc_eot_program_ptr = usc_eot_program->bo->map;
+ usc_eot_program_ptr[6] = (pbe_cs_words[0] >> 0) & 0xFF;
+ usc_eot_program_ptr[7] = (pbe_cs_words[0] >> 8) & 0xFF;
+ usc_eot_program_ptr[8] = (pbe_cs_words[0] >> 16) & 0xFF;
+ usc_eot_program_ptr[9] = (pbe_cs_words[0] >> 24) & 0xFF;
+ pvr_bo_cpu_unmap(device, usc_eot_program);
+
+ pvr_pds_setup_doutu(&pixel_event_program.task_control,
+ usc_eot_program->vma->dev_addr.addr,
+ usc_temp_count,
+ PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
+ false);
+
+ /* TODO: We could skip allocating this and generate directly into the device
+ * buffer thus removing one allocation and memcpy() per job. Would this
+ * speed up things in a noticeable way?
+ */
+ staging_buffer = vk_alloc(allocator,
+ staging_buffer_size,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!staging_buffer) {
+ result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto err_free_usc_pixel_program;
+ }
+
+ /* Generate the data segment. The code segment was uploaded earlier when
+ * setting up the PDS static heap data.
+ */
+ pvr_pds_generate_pixel_event_data_segment(&pixel_event_program,
+ staging_buffer,
+ &device->pdevice->dev_info);
+
+ result = pvr_cmd_buffer_upload_pds_data(
+ cmd_buffer,
+ staging_buffer,
+ cmd_buffer->device->pixel_event_data_size_in_dwords,
+ 4,
+ pds_upload_out);
+ if (result != VK_SUCCESS)
+ goto err_free_pixel_event_staging_buffer;
+
+ vk_free(allocator, staging_buffer);
+
+ return VK_SUCCESS;
+
+err_free_pixel_event_staging_buffer:
+ vk_free(allocator, staging_buffer);
+
+err_free_usc_pixel_program:
+ list_del(&usc_eot_program->link);
+ pvr_bo_free(device, usc_eot_program);
+
+ return result;
+}
+
+static uint32_t pvr_get_hw_clear_color(VkFormat vk_format,
+ const VkClearValue *clear_value)
+{
+ union util_color uc = { .ui = 0 };
+
+ switch (vk_format) {
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ util_pack_color(clear_value->color.float32,
+ PIPE_FORMAT_R8G8B8A8_UNORM,
+ &uc);
+ break;
+
+ default:
+ assert(!"Unsupported format");
+ uc.ui[0] = 0;
+ break;
+ }
+
+ return uc.ui[0];
+}
+
+static VkResult
+pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
+ uint32_t idx,
+ pvr_dev_addr_t *const addr_out)
+{
+ const struct pvr_render_pass_info *render_pass_info =
+ &cmd_buffer->state.render_pass_info;
+ const struct pvr_render_pass *pass = render_pass_info->pass;
+ const struct pvr_renderpass_hwsetup_render *hw_render =
+ &pass->hw_setup->renders[idx];
+ ASSERTED const struct pvr_load_op *load_op = hw_render->client_data;
+ const struct pvr_renderpass_colorinit *color_init =
+ &hw_render->color_init[0];
+ const struct pvr_render_pass_attachment *attachment =
+ &pass->attachments[color_init->driver_id];
+ const VkClearValue *clear_value =
+ &render_pass_info->clear_values[color_init->driver_id];
+ uint32_t hw_clear_value;
+ struct pvr_bo *clear_bo;
+ VkResult result;
+
+ pvr_finishme("Add missing load op data support");
+
+ assert(load_op->is_hw_object);
+ assert(hw_render->color_init_count == 1);
+
+ /* FIXME: add support for RENDERPASS_SURFACE_INITOP_LOAD. */
+ assert(color_init->op == RENDERPASS_SURFACE_INITOP_CLEAR);
+
+ /* FIXME: do this at the point we store the clear values? */
+ hw_clear_value = pvr_get_hw_clear_color(attachment->vk_format, clear_value);
+
+ result = pvr_cmd_buffer_upload_general(cmd_buffer,
+ &hw_clear_value,
+ sizeof(hw_clear_value),
+ &clear_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ *addr_out = clear_bo->vma->dev_addr;
+
+ return VK_SUCCESS;
+}
+
+static VkResult pvr_load_op_pds_data_create_and_upload(
+ struct pvr_cmd_buffer *cmd_buffer,
+ uint32_t idx,
+ pvr_dev_addr_t constants_addr,
+ struct pvr_pds_upload *const pds_upload_out)
+{
+ const struct pvr_render_pass_info *render_pass_info =
+ &cmd_buffer->state.render_pass_info;
+ const struct pvr_load_op *load_op =
+ render_pass_info->pass->hw_setup->renders[idx].client_data;
+ struct pvr_device *device = cmd_buffer->device;
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ struct pvr_pds_pixel_shader_sa_program program = { 0 };
+ uint32_t staging_buffer_size;
+ uint32_t *staging_buffer;
+ VkResult result;
+
+ program.num_texture_dma_kicks = 1;
+
+ pvr_csb_pack (&program.texture_dma_address[0],
+ PDSINST_DOUT_FIELDS_DOUTD_SRC0,
+ value) {
+ value.sbase = constants_addr;
+ }
+
+ pvr_csb_pack (&program.texture_dma_control[0],
+ PDSINST_DOUT_FIELDS_DOUTD_SRC1,
+ value) {
+ value.dest = PVRX(PDSINST_DOUTD_DEST_COMMON_STORE);
+ value.a0 = load_op->shareds_dest_offset;
+ value.bsize = load_op->shareds_count;
+ }
+
+ pvr_pds_set_sizes_pixel_shader_sa_texture_data(&program, dev_info);
+
+ staging_buffer_size = program.data_size * sizeof(*staging_buffer);
+
+ staging_buffer = vk_alloc(&cmd_buffer->vk.pool->alloc,
+ staging_buffer_size,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!staging_buffer)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ pvr_pds_generate_pixel_shader_sa_texture_state_data(&program,
+ staging_buffer,
+ dev_info);
+
+ result = pvr_cmd_buffer_upload_pds_data(cmd_buffer,
+ staging_buffer,
+ program.data_size,
+ 1,
+ pds_upload_out);
+ if (result != VK_SUCCESS) {
+ vk_free(&cmd_buffer->vk.pool->alloc, staging_buffer);
+ return result;
+ }
+
+ vk_free(&cmd_buffer->vk.pool->alloc, staging_buffer);
+
+ return VK_SUCCESS;
+}
+
+/* FIXME: Should this function be specific to the HW background object, in
+ * which case its name should be changed, or should it have the load op
+ * structure passed in?
+ */
+static VkResult
+pvr_load_op_data_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
+ uint32_t idx,
+ struct pvr_pds_upload *const pds_upload_out)
+{
+ pvr_dev_addr_t constants_addr;
+ VkResult result;
+
+ result =
+ pvr_load_op_constants_create_and_upload(cmd_buffer, idx, &constants_addr);
+ if (result != VK_SUCCESS)
+ return result;
+
+ return pvr_load_op_pds_data_create_and_upload(cmd_buffer,
+ idx,
+ constants_addr,
+ pds_upload_out);
+}
+
+static void pvr_pds_bgnd_pack_state(
+ const struct pvr_load_op *load_op,
+ const struct pvr_pds_upload *load_op_program,
+ uint64_t pds_reg_values[static const ROGUE_NUM_CR_PDS_BGRND_WORDS])
+{
+ pvr_csb_pack (&pds_reg_values[0], CR_PDS_BGRND0_BASE, value) {
+ value.shader_addr.addr = load_op->pds_frag_prog.data_offset;
+ value.texunicode_addr.addr = load_op->pds_tex_state_prog.code_offset;
+ }
+
+ pvr_csb_pack (&pds_reg_values[1], CR_PDS_BGRND1_BASE, value) {
+ value.texturedata_addr.addr = load_op_program->data_offset;
+ }
+
+ pvr_csb_pack (&pds_reg_values[2], CR_PDS_BGRND3_SIZEINFO, value) {
+ value.usc_sharedsize =
+ DIV_ROUND_UP(load_op->const_shareds_count,
+ PVRX(CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE));
+ value.pds_texturestatesize = DIV_ROUND_UP(
+ load_op_program->data_size,
+ PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE));
+ value.pds_tempsize =
+ DIV_ROUND_UP(load_op->temps_count,
+ PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE));
+ }
+}
+
+/**
+ * \brief Calculates the stride in pixels based on the pitch in bytes and pixel
+ * format.
+ *
+ * \param[in] pitch Width pitch in bytes.
+ * \param[in] vk_format Vulkan image format.
+ * \return Stride in pixels.
+ */
+static inline uint32_t pvr_stride_from_pitch(uint32_t pitch, VkFormat vk_format)
+{
+ const unsigned int cpp = vk_format_get_blocksize(vk_format);
+
+ assert(pitch % cpp == 0);
+
+ return pitch / cpp;
+}
+
+static void pvr_setup_pbe_state(
+ struct pvr_device *const device,
+ struct pvr_framebuffer *framebuffer,
+ uint32_t mrt_index,
+ const struct usc_mrt_resource *mrt_resource,
+ const struct pvr_image_view *const iview,
+ const VkRect2D *render_area,
+ const bool down_scale,
+ const uint32_t samples,
+ uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
+ uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS])
+{
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ const struct pvr_image *image = iview->image;
+ uint32_t level_pitch = image->mip_levels[iview->vk.base_mip_level].pitch;
+
+ struct pvr_pbe_surf_params surface_params;
+ struct pvr_pbe_render_params render_params;
+ bool with_packed_usc_channel;
+ const uint8_t *swizzle;
+ uint32_t position;
+
+ /* down_scale should be true when performing a resolve, in which case there
+ * should be more than one sample.
+ */
+ assert((down_scale && samples > 1U) || (!down_scale && samples == 1U));
+
+ /* Setup surface parameters. */
+
+ if (PVR_HAS_FEATURE(dev_info, usc_f16sop_u8)) {
+ switch (iview->vk.format) {
+ case VK_FORMAT_B8G8R8A8_UNORM:
+ with_packed_usc_channel = true;
+ break;
+ case VK_FORMAT_D32_SFLOAT:
+ with_packed_usc_channel = false;
+ break;
+ default:
+ unreachable("Unsupported Vulkan image format");
+ }
+ } else {
+ with_packed_usc_channel = false;
+ }
+
+ swizzle = pvr_get_format_swizzle(iview->vk.format);
+ memcpy(surface_params.swizzle, swizzle, sizeof(surface_params.swizzle));
+
+ pvr_pbe_get_src_format_and_gamma(iview->vk.format,
+ PVR_PBE_GAMMA_NONE,
+ with_packed_usc_channel,
+ &surface_params.source_format,
+ &surface_params.gamma);
+
+ surface_params.is_normalized = vk_format_is_normalized(iview->vk.format);
+ surface_params.pbe_packmode = pvr_get_pbe_packmode(iview->vk.format);
+ surface_params.nr_components = vk_format_get_nr_components(iview->vk.format);
+
+ /* FIXME: Should we have an inline function to return the address of a mip
+ * level?
+ */
+ surface_params.addr.addr =
+ image->vma->dev_addr.addr +
+ image->mip_levels[iview->vk.base_mip_level].offset;
+
+ surface_params.mem_layout = image->memlayout;
+ surface_params.stride = pvr_stride_from_pitch(level_pitch, iview->vk.format);
+ surface_params.depth = iview->vk.extent.depth;
+ surface_params.width = iview->vk.extent.width;
+ surface_params.height = iview->vk.extent.height;
+ surface_params.z_only_render = false;
+ surface_params.down_scale = down_scale;
+ surface_params.msaa_mode = samples;
+
+ /* Setup render parameters. */
+
+ if (mrt_resource->type == USC_MRT_RESOURCE_TYPE_MEMORY) {
+ position = mrt_resource->u.mem.offset_in_dwords;
+ } else {
+ assert(mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER);
+ assert(mrt_resource->u.reg.offset == 0);
+
+ position = mrt_resource->u.reg.out_reg;
+ }
+
+ assert(position <= 3 || PVR_HAS_FEATURE(dev_info, eight_output_registers));
+
+ switch (position) {
+ case 0:
+ case 4:
+ render_params.source_start = PVR_PBE_STARTPOS_BIT0;
+ break;
+ case 1:
+ case 5:
+ render_params.source_start = PVR_PBE_STARTPOS_BIT32;
+ break;
+ case 2:
+ case 6:
+ render_params.source_start = PVR_PBE_STARTPOS_BIT64;
+ break;
+ case 3:
+ case 7:
+ render_params.source_start = PVR_PBE_STARTPOS_BIT96;
+ break;
+ default:
+ assert(!"Invalid output register");
+ break;
+ }
+
+ render_params.min_x_clip = MAX2(0, render_area->offset.x);
+ render_params.min_y_clip = MAX2(0, render_area->offset.y);
+ render_params.max_x_clip =
+ MIN2(framebuffer->width,
+ render_area->offset.x + render_area->extent.width) -
+ 1;
+ render_params.max_y_clip =
+ MIN2(framebuffer->height,
+ render_area->offset.y + render_area->extent.height) -
+ 1;
+
+ render_params.slice = 0;
+ render_params.mrt_index = mrt_index;
+
+ pvr_pbe_pack_state(device,
+ &surface_params,
+ &render_params,
+ pbe_cs_words,
+ pbe_reg_words);
+}
+
+static struct pvr_render_target *
+pvr_get_render_target(const struct pvr_render_pass *pass,
+ const struct pvr_framebuffer *framebuffer,
+ uint32_t idx)
+{
+ const struct pvr_renderpass_hwsetup_render *hw_render =
+ &pass->hw_setup->renders[idx];
+ uint32_t rt_idx = 0;
+
+ switch (hw_render->sample_count) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ rt_idx = util_logbase2(hw_render->sample_count);
+ break;
+
+ default:
+ unreachable("Unsupported sample count");
+ break;
+ }
+
+ return &framebuffer->render_targets[rt_idx];
+}
+
+static uint32_t
+pvr_pass_get_pixel_output_width(const struct pvr_render_pass *pass,
+ uint32_t idx,
+ const struct pvr_device_info *dev_info)
+{
+ const struct pvr_renderpass_hwsetup_render *hw_render =
+ &pass->hw_setup->renders[idx];
+ /* Default value based on the maximum value found in all existing cores. The
+ * maximum is used as this is being treated as a lower bound, making it a
+ * "safer" choice than the minimum value found in all existing cores.
+ */
+ const uint32_t min_output_regs =
+ PVR_GET_FEATURE_VALUE(dev_info, usc_min_output_registers_per_pix, 2U);
+ const uint32_t width = MAX2(hw_render->output_regs_count, min_output_regs);
+
+ return util_next_power_of_two(width);
+}
+
+static VkResult pvr_sub_cmd_gfx_job_init(struct pvr_device *device,
+ struct pvr_cmd_buffer *cmd_buffer,
+ struct pvr_sub_cmd *sub_cmd)
+{
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ struct pvr_render_pass_info *render_pass_info =
+ &cmd_buffer->state.render_pass_info;
+ const struct pvr_renderpass_hwsetup_render *hw_render =
+ &render_pass_info->pass->hw_setup->renders[sub_cmd->gfx.hw_render_idx];
+ struct pvr_render_job *job = &sub_cmd->gfx.job;
+ struct pvr_pds_upload pds_pixel_event_program;
+
+ uint32_t pbe_cs_words[PVR_MAX_COLOR_ATTACHMENTS]
+ [ROGUE_NUM_PBESTATE_STATE_WORDS];
+ struct pvr_render_target *render_target;
+ VkResult result;
+
+ assert(hw_render->eot_surface_count < ARRAY_SIZE(pbe_cs_words));
+
+ for (uint32_t i = 0; i < hw_render->eot_surface_count; i++) {
+ const struct pvr_renderpass_hwsetup_eot_surface *surface =
+ &hw_render->eot_surfaces[i];
+ const struct pvr_image_view *iview =
+ render_pass_info->attachments[surface->attachment_index];
+ const struct usc_mrt_resource *mrt_resource =
+ &hw_render->eot_setup.mrt_resources[surface->mrt_index];
+ uint32_t samples = 1;
+
+ if (surface->need_resolve)
+ pvr_finishme("Set up job resolve information.");
+
+ pvr_setup_pbe_state(device,
+ render_pass_info->framebuffer,
+ surface->mrt_index,
+ mrt_resource,
+ iview,
+ &render_pass_info->render_area,
+ surface->need_resolve,
+ samples,
+ pbe_cs_words[i],
+ job->pbe_reg_words[i]);
+ }
+
+ /* FIXME: The fragment program only supports a single surface at present. */
+ assert(hw_render->eot_surface_count == 1);
+ result = pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
+ cmd_buffer,
+ pbe_cs_words[0],
+ &pds_pixel_event_program);
+ if (result != VK_SUCCESS)
+ return result;
+
+ job->pds_pixel_event_data_offset = pds_pixel_event_program.data_offset;
+
+ /* FIXME: Don't do this if there is a barrier load. */
+ if (render_pass_info->enable_bg_tag) {
+ const struct pvr_load_op *load_op = hw_render->client_data;
+ struct pvr_pds_upload load_op_program;
+
+ /* FIXME: Should we free the PDS pixel event data or let it be freed
+ * when the pool gets emptied?
+ */
+ result = pvr_load_op_data_create_and_upload(cmd_buffer,
+ sub_cmd->gfx.hw_render_idx,
+ &load_op_program);
+ if (result != VK_SUCCESS)
+ return result;
+
+ pvr_pds_bgnd_pack_state(load_op,
+ &load_op_program,
+ job->pds_bgnd_reg_values);
+ }
+
+ job->enable_bg_tag = render_pass_info->enable_bg_tag;
+ job->process_empty_tiles = render_pass_info->process_empty_tiles;
+
+ render_target = pvr_get_render_target(render_pass_info->pass,
+ render_pass_info->framebuffer,
+ sub_cmd->gfx.hw_render_idx);
+ job->rt_dataset = render_target->rt_dataset;
+
+ job->ctrl_stream_addr =
+ pvr_csb_get_start_address(&sub_cmd->gfx.control_stream);
+
+ /* FIXME: Need to set up the border color table at device creation
+ * time. Set to invalid for the time being.
+ */
+ job->border_colour_table_addr = PVR_DEV_ADDR_INVALID;
+
+ if (sub_cmd->gfx.depth_bias_bo)
+ job->depth_bias_table_addr = sub_cmd->gfx.depth_bias_bo->vma->dev_addr;
+ else
+ job->depth_bias_table_addr = PVR_DEV_ADDR_INVALID;
+
+ if (sub_cmd->gfx.scissor_bo)
+ job->scissor_table_addr = sub_cmd->gfx.scissor_bo->vma->dev_addr;
+ else
+ job->scissor_table_addr = PVR_DEV_ADDR_INVALID;
+
+ job->pixel_output_width =
+ pvr_pass_get_pixel_output_width(render_pass_info->pass,
+ sub_cmd->gfx.hw_render_idx,
+ dev_info);
+
+ if (hw_render->ds_surface_id != -1) {
+ struct pvr_image_view *iview =
+ render_pass_info->attachments[hw_render->ds_surface_id];
+ const struct pvr_image *image = iview->image;
+
+ if (vk_format_has_depth(image->vk.format)) {
+ uint32_t level_pitch =
+ image->mip_levels[iview->vk.base_mip_level].pitch;
+
+ /* FIXME: Is this sufficient for depth buffers? */
+ job->depth_addr = image->dev_addr;
+
+ job->depth_stride =
+ pvr_stride_from_pitch(level_pitch, iview->vk.format);
+ job->depth_height = iview->vk.extent.height;
+ job->depth_physical_width =
+ u_minify(image->physical_extent.width, iview->vk.base_mip_level);
+ job->depth_physical_height =
+ u_minify(image->physical_extent.height, iview->vk.base_mip_level);
+ job->depth_layer_size = image->layer_size;
+
+ if (hw_render->ds_surface_id < render_pass_info->clear_value_count) {
+ VkClearValue *clear_values =
+ &render_pass_info->clear_values[hw_render->ds_surface_id];
+
+ job->depth_clear_value = clear_values->depthStencil.depth;
+ } else {
+ job->depth_clear_value = 1.0f;
+ }
+
+ job->depth_vk_format = iview->vk.format;
+
+ job->depth_memlayout = image->memlayout;
+ } else {
+ job->depth_addr = PVR_DEV_ADDR_INVALID;
+ job->depth_stride = 0;
+ job->depth_height = 0;
+ job->depth_physical_width = 0;
+ job->depth_physical_height = 0;
+ job->depth_layer_size = 0;
+ job->depth_clear_value = 1.0f;
+ job->depth_vk_format = VK_FORMAT_UNDEFINED;
+ job->depth_memlayout = PVR_MEMLAYOUT_LINEAR;
+ }
+
+ if (vk_format_has_stencil(image->vk.format)) {
+ /* FIXME: Is this sufficient for stencil buffers? */
+ job->stencil_addr = image->dev_addr;
+ } else {
+ job->stencil_addr = PVR_DEV_ADDR_INVALID;
+ }
+
+ job->samples = image->vk.samples;
+ } else {
+ pvr_finishme("Set up correct number of samples for render job");
+
+ job->depth_addr = PVR_DEV_ADDR_INVALID;
+ job->depth_stride = 0;
+ job->depth_height = 0;
+ job->depth_physical_width = 0;
+ job->depth_physical_height = 0;
+ job->depth_layer_size = 0;
+ job->depth_clear_value = 1.0f;
+ job->depth_vk_format = VK_FORMAT_UNDEFINED;
+ job->depth_memlayout = PVR_MEMLAYOUT_LINEAR;
+
+ job->stencil_addr = PVR_DEV_ADDR_INVALID;
+
+ job->samples = 1;
+ }
+
+ if (sub_cmd->gfx.max_tiles_in_flight ==
+ PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U)) {
+ /* Use the default limit based on the partition store. */
+ job->max_tiles_in_flight = 0U;
+ } else {
+ job->max_tiles_in_flight = sub_cmd->gfx.max_tiles_in_flight;
+ }
+
+ job->frag_uses_atomic_ops = sub_cmd->gfx.frag_uses_atomic_ops;
+ job->disable_compute_overlap = false;
+ job->max_shared_registers = cmd_buffer->state.max_shared_regs;
+ job->run_frag = true;
+ job->geometry_terminate = true;
+
+ return VK_SUCCESS;
+}
+
+/* Number of shareds used in the Issue Data Fence(IDF)/Wait Data Fence(WDF)
+ * kernel.
+ */
+#define PVR_IDF_WDF_IN_REGISTER_CONST_COUNT 12U
+
+static void pvr_sub_cmd_compute_job_init(struct pvr_device *device,
+ struct pvr_cmd_buffer *cmd_buffer,
+ struct pvr_sub_cmd *sub_cmd)
+{
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+
+ if (sub_cmd->compute.uses_barrier) {
+ sub_cmd->compute.submit_info.flags |=
+ PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
+ }
+
+ pvr_csb_pack (&sub_cmd->compute.submit_info.regs.cdm_ctrl_stream_base,
+ CR_CDM_CTRL_STREAM_BASE,
+ value) {
+ value.addr = pvr_csb_get_start_address(&sub_cmd->compute.control_stream);
+ }
+
+ /* FIXME: Need to set up the border color table at device creation
+ * time. Set to invalid for the time being.
+ */
+ pvr_csb_pack (&sub_cmd->compute.submit_info.regs.tpu_border_colour_table,
+ CR_TPU_BORDER_COLOUR_TABLE_CDM,
+ value) {
+ value.border_colour_table_address = PVR_DEV_ADDR_INVALID;
+ }
+
+ sub_cmd->compute.num_shared_regs = MAX2(PVR_IDF_WDF_IN_REGISTER_CONST_COUNT,
+ cmd_buffer->state.max_shared_regs);
+
+ cmd_buffer->state.max_shared_regs = 0U;
+
+ if (PVR_HAS_FEATURE(dev_info, compute_morton_capable))
+ sub_cmd->compute.submit_info.regs.cdm_item = 0;
+
+ pvr_csb_pack (&sub_cmd->compute.submit_info.regs.tpu, CR_TPU, value) {
+ value.tag_cem_4k_face_packing = true;
+ }
+
+ if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
+ PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
+ rogue_get_num_phantoms(dev_info) > 1 &&
+ sub_cmd->compute.uses_atomic_ops) {
+ /* Each phantom has its own MCU, so atomicity can only be guaranteed
+ * when all work items are processed on the same phantom. This means we
+ * need to disable all USCs other than those of the first phantom, which
+ * has 4 clusters.
+ */
+ pvr_csb_pack (&sub_cmd->compute.submit_info.regs.compute_cluster,
+ CR_COMPUTE_CLUSTER,
+ value) {
+ value.mask = 0xFU;
+ }
+ } else {
+ pvr_csb_pack (&sub_cmd->compute.submit_info.regs.compute_cluster,
+ CR_COMPUTE_CLUSTER,
+ value) {
+ value.mask = 0U;
+ }
+ }
+
+ if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support) &&
+ sub_cmd->compute.uses_atomic_ops) {
+ sub_cmd->compute.submit_info.flags |= PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE;
+ }
+}
+
+#define PIXEL_ALLOCATION_SIZE_MAX_IN_BLOCKS \
+ (1024 / PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE))
+
+static uint32_t pvr_compute_slot_size(const struct pvr_device_info *dev_info,
+ uint32_t coeff_regs_count,
+ bool use_barrier,
+ const uint32_t local_size[static 3U])
+{
+ uint32_t max_workgroups_per_task = ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK;
+ uint32_t max_avail_coeff_regs =
+ rogue_get_cdm_max_local_mem_size_regs(dev_info);
+ uint32_t localstore_chunks_count =
+ DIV_ROUND_UP(coeff_regs_count << 2,
+ PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE));
+ uint32_t total_workitems = local_size[0U] * local_size[1U] * local_size[2U];
+
+ /* Ensure that we cannot have more workgroups in a slot than the available
+ * number of coefficients allow us to have.
+ */
+ if (coeff_regs_count > 0U) {
+ /* If TA or 3D can overlap with CDM, or if the TA is running a geometry
+ * shader then we need to consider this in calculating max allowed
+ * work-groups.
+ */
+ if (PVR_HAS_QUIRK(dev_info, 52354) &&
+ (PVR_HAS_FEATURE(dev_info, compute_overlap) ||
+ PVR_HAS_FEATURE(dev_info, gs_rta_support))) {
+ /* Solve for n (number of work-groups per task). All values are in
+ * size of common store alloc blocks:
+ *
+ * n + (2n + 7) * (local_memory_size_max - 1) =
+ * (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
+ * ==>
+ * n + 2n * (local_memory_size_max - 1) =
+ * (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
+ * - (7 * (local_memory_size_max - 1))
+ * ==>
+ * n * (1 + 2 * (local_memory_size_max - 1)) =
+ * (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
+ * - (7 * (local_memory_size_max - 1))
+ * ==>
+ * n = ((coefficient_memory_pool_size) -
+ * (7 * pixel_allocation_size_max) -
+ * (7 * (local_memory_size_max - 1)) / (1 +
+ * 2 * (local_memory_size_max - 1)))
+ */
+ uint32_t max_common_store_blocks =
+ DIV_ROUND_UP(max_avail_coeff_regs * 4U,
+ PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE));
+
+ /* (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
+ */
+ max_common_store_blocks -= ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES *
+ PIXEL_ALLOCATION_SIZE_MAX_IN_BLOCKS;
+
+ /* - (7 * (local_memory_size_max - 1)) */
+ max_common_store_blocks -= (ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES *
+ (localstore_chunks_count - 1U));
+
+ /* Divide by (1 + 2 * (local_memory_size_max - 1)) */
+ max_workgroups_per_task = max_common_store_blocks /
+ (1U + 2U * (localstore_chunks_count - 1U));
+
+ max_workgroups_per_task =
+ MIN2(max_workgroups_per_task,
+ ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK);
+
+ } else {
+ max_workgroups_per_task =
+ MIN2((max_avail_coeff_regs / coeff_regs_count),
+ max_workgroups_per_task);
+ }
+ }
+
+ /* max_workgroups_per_task should at least be one. */
+ assert(max_workgroups_per_task >= 1U);
+
+ if (total_workitems >= ROGUE_MAX_INSTANCES_PER_TASK) {
+ /* In this case, the work group size will have been padded up to the
+ * next ROGUE_MAX_INSTANCES_PER_TASK so we just set max instances to be
+ * ROGUE_MAX_INSTANCES_PER_TASK.
+ */
+ return ROGUE_MAX_INSTANCES_PER_TASK;
+ }
+
+ /* In this case, the number of instances in the slot must be clamped to
+ * accommodate whole work-groups only.
+ */
+ if (PVR_HAS_QUIRK(dev_info, 49032) || use_barrier) {
+ max_workgroups_per_task =
+ MIN2(max_workgroups_per_task,
+ ROGUE_MAX_INSTANCES_PER_TASK / total_workitems);
+ return total_workitems * max_workgroups_per_task;
+ }
+
+ return MIN2(total_workitems * max_workgroups_per_task,
+ ROGUE_MAX_INSTANCES_PER_TASK);
+}
+
+static void
+pvr_compute_generate_control_stream(struct pvr_csb *csb,
+ const struct pvr_compute_kernel_info *info)
+{
+ /* Compute kernel 0. */
+ pvr_csb_emit (csb, CDMCTRL_KERNEL0, kernel0) {
+ kernel0.indirect_present = !!info->indirect_buffer_addr.addr;
+ kernel0.global_offsets_present = info->global_offsets_present;
+ kernel0.usc_common_size = info->usc_common_size;
+ kernel0.usc_unified_size = info->usc_unified_size;
+ kernel0.pds_temp_size = info->pds_temp_size;
+ kernel0.pds_data_size = info->pds_data_size;
+
+ if (info->usc_target_any)
+ kernel0.usc_target = PVRX(CDMCTRL_USC_TARGET_ANY);
+ else
+ kernel0.usc_target = PVRX(CDMCTRL_USC_TARGET_ALL);
+
+ kernel0.fence = info->is_fence;
+ }
+
+ /* Compute kernel 1. */
+ pvr_csb_emit (csb, CDMCTRL_KERNEL1, kernel1) {
+ kernel1.data_addr.addr = info->pds_data_offset;
+ kernel1.sd_type = info->sd_type;
+
+ if (!info->is_fence)
+ kernel1.usc_common_shared = info->usc_common_shared;
+ }
+
+ /* Compute kernel 2. */
+ pvr_csb_emit (csb, CDMCTRL_KERNEL2, kernel2) {
+ kernel2.code_addr.addr = info->pds_code_offset;
+ }
+
+ if (info->indirect_buffer_addr.addr) {
+ /* Compute kernel 6. */
+ pvr_csb_emit (csb, CDMCTRL_KERNEL6, kernel6) {
+ kernel6.indirect_addrmsb = info->indirect_buffer_addr;
+ }
+
+ /* Compute kernel 7. */
+ pvr_csb_emit (csb, CDMCTRL_KERNEL7, kernel7) {
+ kernel7.indirect_addrlsb = info->indirect_buffer_addr;
+ }
+ } else {
+ /* Compute kernel 3. */
+ pvr_csb_emit (csb, CDMCTRL_KERNEL3, kernel3) {
+ assert(info->global_size[0U] > 0U);
+ kernel3.workgroup_x = info->global_size[0U] - 1U;
+ }
+
+ /* Compute kernel 4. */
+ pvr_csb_emit (csb, CDMCTRL_KERNEL4, kernel4) {
+ assert(info->global_size[1U] > 0U);
+ kernel4.workgroup_y = info->global_size[1U] - 1U;
+ }
+
+ /* Compute kernel 5. */
+ pvr_csb_emit (csb, CDMCTRL_KERNEL5, kernel5) {
+ assert(info->global_size[2U] > 0U);
+ kernel5.workgroup_z = info->global_size[2U] - 1U;
+ }
+ }
+
+ /* Compute kernel 8. */
+ pvr_csb_emit (csb, CDMCTRL_KERNEL8, kernel8) {
+ if (info->max_instances == ROGUE_MAX_INSTANCES_PER_TASK)
+ kernel8.max_instances = 0U;
+ else
+ kernel8.max_instances = info->max_instances;
+
+ assert(info->local_size[0U] > 0U);
+ kernel8.workgroup_size_x = info->local_size[0U] - 1U;
+ assert(info->local_size[1U] > 0U);
+ kernel8.workgroup_size_y = info->local_size[1U] - 1U;
+ assert(info->local_size[2U] > 0U);
+ kernel8.workgroup_size_z = info->local_size[2U] - 1U;
+ }
+}
+
+static void pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer,
+ bool deallocate_shareds)
+{
+ const struct pvr_pds_upload *program =
+ &cmd_buffer->device->pds_compute_fence_program;
+ const struct pvr_device_info *dev_info =
+ &cmd_buffer->device->pdevice->dev_info;
+ struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+ struct pvr_csb *csb = &state->current_sub_cmd->compute.control_stream;
+
+ struct pvr_compute_kernel_info info = {
+ .indirect_buffer_addr.addr = 0ULL,
+ .global_offsets_present = false,
+ .usc_common_size = 0U,
+ .usc_unified_size = 0U,
+ .pds_temp_size = 0U,
+ .pds_data_size =
+ DIV_ROUND_UP(program->data_size << 2,
+ PVRX(CDMCTRL_KERNEL0_PDS_DATA_SIZE_UNIT_SIZE)),
+ .usc_target_any = true,
+ .is_fence = true,
+ .pds_data_offset = program->data_offset,
+ .sd_type = PVRX(CDMCTRL_SD_TYPE_PDS),
+ .usc_common_shared = deallocate_shareds,
+ .pds_code_offset = program->code_offset,
+ .global_size = { 1U, 1U, 1U },
+ .local_size = { 1U, 1U, 1U },
+ };
+
+ /* We don't need to pad work-group size for this case. */
+ /* Here we calculate the slot size. This can depend on the use of barriers,
+ * local memory, BRN's or other factors.
+ */
+ info.max_instances =
+ pvr_compute_slot_size(dev_info, 0U, false, info.local_size);
+
+ pvr_compute_generate_control_stream(csb, &info);
+}
+
+static VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
+{
+ struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+ struct pvr_sub_cmd *sub_cmd = state->current_sub_cmd;
+ struct pvr_device *device = cmd_buffer->device;
+ VkResult result;
+
+ /* FIXME: Is this NULL check required because this function is called from
+ * pvr_resolve_unemitted_resolve_attachments()? See comment about this
+ * function being called twice in a row in pvr_CmdEndRenderPass().
+ */
+ if (!sub_cmd)
+ return VK_SUCCESS;
+
+ switch (sub_cmd->type) {
+ case PVR_SUB_CMD_TYPE_GRAPHICS:
+ if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
+ result = pvr_csb_emit_return(&sub_cmd->gfx.control_stream);
+ if (result != VK_SUCCESS) {
+ state->status = result;
+ return result;
+ }
+
+ break;
+ }
+
+ /* TODO: Check if the sub_cmd can be skipped based on
+ * sub_cmd->gfx.empty_cmd flag.
+ */
+
+ result = pvr_cmd_buffer_upload_tables(device, cmd_buffer);
+ if (result != VK_SUCCESS) {
+ state->status = result;
+ return result;
+ }
+
+ result = pvr_cmd_buffer_emit_ppp_state(cmd_buffer);
+ if (result != VK_SUCCESS) {
+ state->status = result;
+ return result;
+ }
+
+ result = pvr_csb_emit_terminate(&sub_cmd->gfx.control_stream);
+ if (result != VK_SUCCESS) {
+ state->status = result;
+ return result;
+ }
+
+ result = pvr_sub_cmd_gfx_job_init(device, cmd_buffer, sub_cmd);
+ if (result != VK_SUCCESS) {
+ state->status = result;
+ return result;
+ }
+
+ break;
+
+ case PVR_SUB_CMD_TYPE_COMPUTE:
+ pvr_compute_generate_fence(cmd_buffer, true);
+
+ result = pvr_csb_emit_terminate(&sub_cmd->compute.control_stream);
+ if (result != VK_SUCCESS) {
+ state->status = result;
+ return result;
+ }
+
+ pvr_sub_cmd_compute_job_init(device, cmd_buffer, sub_cmd);
+ break;
+
+ case PVR_SUB_CMD_TYPE_TRANSFER:
+ break;
+
+ default:
+ pvr_finishme("Unsupported sub-command type %d", sub_cmd->type);
+ break;
+ }
+
+ state->current_sub_cmd = NULL;
+
+ return VK_SUCCESS;
+}
+
+static void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer_state *state,
+ bool start_geom)
+{
+ if (start_geom) {
+ /*
+ * Initial geometry phase State.
+ * It's the driver's responsibility to ensure that the state of the
+ * hardware is correctly initialized at the start of every geometry
+ * phase. This is required to prevent stale state from a previous
+ * geometry phase erroneously affecting the next geometry phase. The
+ * following fields in PPP State Header, and their corresponding state
+ * words, must be supplied in the first PPP State Update of a geometry
+ * phase that contains any geometry (draw calls). Any field not listed
+ * below is safe to ignore.
+ *
+ * TA_PRES_STREAM_OUT_SIZE
+ * TA_PRES_PPPCTRL
+ * TA_PRES_VARYING_WORD2
+ * TA_PRES_VARYING_WORD1
+ * TA_PRES_VARYING_WORD0
+ * TA_PRES_OUTSELECTS
+ * TA_PRES_WCLAMP
+ * TA_VIEWPORT_COUNT
+ * TA_PRES_VIEWPORT
+ * TA_PRES_REGION_CLIP
+ * TA_PRES_PDSSTATEPTR0
+ * TA_PRES_ISPCTLFB
+ * TA_PRES_ISPCTLFA
+ * TA_PRES_ISPCTL
+ *
+ * If a geometry phase does not contain any geometry, this restriction
+ * can be ignored. If the first draw call in a geometry phase will only
+ * update the depth or stencil buffers i.e. ISP_TAGWRITEDISABLE is set
+ * in the ISP State Control Word, the PDS State Pointers
+ * (TA_PRES_PDSSTATEPTR*) in the first PPP State Update do not need to
+ * be supplied, since they will never reach the PDS in the fragment
+ * phase.
+ */
+
+ state->emit_state_bits = 0;
+
+ state->emit_state.stream_out = true;
+ state->emit_state.ppp_control = true;
+ state->emit_state.varying_word2 = true;
+ state->emit_state.varying_word1 = true;
+ state->emit_state.varying_word0 = true;
+ state->emit_state.output_selects = true;
+ state->emit_state.wclamp = true;
+ state->emit_state.viewport = true;
+ state->emit_state.region_clip = true;
+ state->emit_state.pds_fragment_stateptr0 = true;
+ state->emit_state.isp_fb = true;
+ state->emit_state.isp = true;
+ } else {
+ state->emit_state.ppp_control = true;
+ state->emit_state.varying_word1 = true;
+ state->emit_state.varying_word0 = true;
+ state->emit_state.output_selects = true;
+ state->emit_state.viewport = true;
+ state->emit_state.region_clip = true;
+ state->emit_state.pds_fragment_stateptr0 = true;
+ state->emit_state.isp_fb = true;
+ state->emit_state.isp = true;
+ }
+
+ memset(&state->ppp_state, 0U, sizeof(state->ppp_state));
+
+ state->dirty.vertex_bindings = true;
+ state->dirty.gfx_pipeline_binding = true;
+ state->dirty.viewport = true;
+}
+
+static VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
+ enum pvr_sub_cmd_type type)
+{
+ struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+ struct pvr_device *device = cmd_buffer->device;
+ struct pvr_sub_cmd *sub_cmd;
+ VkResult result;
+
+ /* Check the current status of the buffer. */
+ if (state->status != VK_SUCCESS)
+ return state->status;
+
+ pvr_cmd_buffer_update_barriers(cmd_buffer, type);
+
+ if (state->current_sub_cmd) {
+ if (state->current_sub_cmd->type == type) {
+ /* Continue adding to the current sub command. */
+ return VK_SUCCESS;
+ }
+
+ /* End the current sub command. */
+ result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ sub_cmd = vk_zalloc(&cmd_buffer->vk.pool->alloc,
+ sizeof(*sub_cmd),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!sub_cmd) {
+ state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return state->status;
+ }
+
+ sub_cmd->type = type;
+
+ switch (type) {
+ case PVR_SUB_CMD_TYPE_GRAPHICS:
+
+ sub_cmd->gfx.depth_usage = PVR_DEPTH_STENCIL_USAGE_UNDEFINED;
+ sub_cmd->gfx.stencil_usage = PVR_DEPTH_STENCIL_USAGE_UNDEFINED;
+ sub_cmd->gfx.modifies_depth = false;
+ sub_cmd->gfx.modifies_stencil = false;
+ sub_cmd->gfx.max_tiles_in_flight =
+ PVR_GET_FEATURE_VALUE(&device->pdevice->dev_info,
+ isp_max_tiles_in_flight,
+ 1);
+ sub_cmd->gfx.hw_render_idx = state->render_pass_info.current_hw_subpass;
+ sub_cmd->gfx.framebuffer = state->render_pass_info.framebuffer;
+ sub_cmd->gfx.empty_cmd = true;
+
+ pvr_reset_graphics_dirty_state(state, true);
+ pvr_csb_init(device,
+ PVR_CMD_STREAM_TYPE_GRAPHICS,
+ &sub_cmd->gfx.control_stream);
+ break;
+
+ case PVR_SUB_CMD_TYPE_COMPUTE:
+ pvr_csb_init(device,
+ PVR_CMD_STREAM_TYPE_COMPUTE,
+ &sub_cmd->compute.control_stream);
+ break;
+
+ case PVR_SUB_CMD_TYPE_TRANSFER:
+ list_inithead(&sub_cmd->transfer.transfer_cmds);
+ break;
+
+ default:
+ pvr_finishme("Unsupported sub-command type %d", type);
+ break;
+ }
+
+ list_addtail(&sub_cmd->link, &cmd_buffer->sub_cmds);
+ state->current_sub_cmd = sub_cmd;
+
+ return VK_SUCCESS;
+}
+
+VkResult pvr_cmd_buffer_alloc_mem(struct pvr_cmd_buffer *cmd_buffer,
+ struct pvr_winsys_heap *heap,
+ uint64_t size,
+ uint32_t flags,
+ struct pvr_bo **const pvr_bo_out)
+{
+ const uint32_t cache_line_size =
+ rogue_get_slc_cache_line_size(&cmd_buffer->device->pdevice->dev_info);
+ struct pvr_bo *pvr_bo;
+ VkResult result;
+
+ result = pvr_bo_alloc(cmd_buffer->device,
+ heap,
+ size,
+ cache_line_size,
+ flags,
+ &pvr_bo);
+ if (result != VK_SUCCESS) {
+ cmd_buffer->state.status = result;
+ return result;
+ }
+
+ list_add(&pvr_bo->link, &cmd_buffer->bo_list);
+
+ *pvr_bo_out = pvr_bo;
+
+ return VK_SUCCESS;
+}
+
+VkResult pvr_ResetCommandBuffer(VkCommandBuffer commandBuffer,
+ VkCommandBufferResetFlags flags)
+{
+ assert(!"Unimplemented");
+ return VK_SUCCESS;
+}
+
+static void pvr_cmd_bind_compute_pipeline(
+ const struct pvr_compute_pipeline *const compute_pipeline,
+ struct pvr_cmd_buffer *const cmd_buffer)
+{
+ cmd_buffer->state.compute_pipeline = compute_pipeline;
+ cmd_buffer->state.dirty.compute_pipeline_binding = true;
+}
+
+static void pvr_cmd_bind_graphics_pipeline(
+ const struct pvr_graphics_pipeline *const gfx_pipeline,
+ struct pvr_cmd_buffer *const cmd_buffer)
+{
+ struct pvr_dynamic_state *const dest_state =
+ &cmd_buffer->state.dynamic.common;
+ const struct pvr_dynamic_state *const src_state =
+ &gfx_pipeline->dynamic_state;
+ struct pvr_cmd_buffer_state *const cmd_buffer_state = &cmd_buffer->state;
+ const uint32_t state_mask = src_state->mask;
+
+ cmd_buffer_state->gfx_pipeline = gfx_pipeline;
+ cmd_buffer_state->dirty.gfx_pipeline_binding = true;
+
+ /* FIXME: Handle PVR_DYNAMIC_STATE_BIT_VIEWPORT. */
+ if (!(state_mask & PVR_DYNAMIC_STATE_BIT_VIEWPORT)) {
+ assert(!"Unimplemented");
+ }
+
+ /* FIXME: Handle PVR_DYNAMIC_STATE_BIT_SCISSOR. */
+ if (!(state_mask & PVR_DYNAMIC_STATE_BIT_SCISSOR)) {
+ assert(!"Unimplemented");
+ }
+
+ if (!(state_mask & PVR_DYNAMIC_STATE_BIT_LINE_WIDTH)) {
+ dest_state->line_width = src_state->line_width;
+
+ cmd_buffer_state->dirty.line_width = true;
+ }
+
+ if (!(state_mask & PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS)) {
+ memcpy(&dest_state->depth_bias,
+ &src_state->depth_bias,
+ sizeof(src_state->depth_bias));
+
+ cmd_buffer_state->dirty.depth_bias = true;
+ }
+
+ if (!(state_mask & PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS)) {
+ STATIC_ASSERT(
+ __same_type(dest_state->blend_constants, src_state->blend_constants));
+
+ typed_memcpy(dest_state->blend_constants,
+ src_state->blend_constants,
+ ARRAY_SIZE(dest_state->blend_constants));
+
+ cmd_buffer_state->dirty.blend_constants = true;
+ }
+
+ if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK)) {
+ dest_state->compare_mask.front = src_state->compare_mask.front;
+ dest_state->compare_mask.back = src_state->compare_mask.back;
+
+ cmd_buffer_state->dirty.compare_mask = true;
+ }
+
+ if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK)) {
+ dest_state->write_mask.front = src_state->write_mask.front;
+ dest_state->write_mask.back = src_state->write_mask.back;
+
+ cmd_buffer_state->dirty.write_mask = true;
+ }
+
+ if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE)) {
+ dest_state->reference.front = src_state->reference.front;
+ dest_state->reference.back = src_state->reference.back;
+
+ cmd_buffer_state->dirty.reference = true;
+ }
+}
+
+void pvr_CmdBindPipeline(VkCommandBuffer commandBuffer,
+ VkPipelineBindPoint pipelineBindPoint,
+ VkPipeline _pipeline)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ PVR_FROM_HANDLE(pvr_pipeline, pipeline, _pipeline);
+
+ switch (pipelineBindPoint) {
+ case VK_PIPELINE_BIND_POINT_COMPUTE:
+ pvr_cmd_bind_compute_pipeline(to_pvr_compute_pipeline(pipeline),
+ cmd_buffer);
+ break;
+
+ case VK_PIPELINE_BIND_POINT_GRAPHICS:
+ pvr_cmd_bind_graphics_pipeline(to_pvr_graphics_pipeline(pipeline),
+ cmd_buffer);
+ break;
+
+ default:
+ unreachable("Invalid bind point.");
+ break;
+ }
+}
+
+#if defined(DEBUG)
+static void check_viewport_quirk_70165(const struct pvr_device *device,
+ const VkViewport *pViewport)
+{
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ float min_vertex_x, max_vertex_x, min_vertex_y, max_vertex_y;
+ float min_screen_space_value, max_screen_space_value;
+ float sign_to_unsigned_offset, fixed_point_max;
+ float guardband_width, guardband_height;
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+ /* Max representable value in 13.4 fixed point format.
+ * Round-down to avoid precision issues.
+ * Calculated as (2 ** 13) - 2*(2 ** -4)
+ */
+ fixed_point_max = 8192.0f - 2.0f / 16.0f;
+
+ if (PVR_HAS_FEATURE(dev_info, screen_size8K)) {
+ if (pViewport->width <= 4096 && pViewport->height <= 4096) {
+ guardband_width = pViewport->width / 4.0f;
+ guardband_height = pViewport->height / 4.0f;
+
+ /* 2k of the range is negative */
+ sign_to_unsigned_offset = 2048.0f;
+ } else {
+ guardband_width = 0.0f;
+ guardband_height = 0.0f;
+
+ /* For > 4k renders, the entire range is positive */
+ sign_to_unsigned_offset = 0.0f;
+ }
+ } else {
+ guardband_width = pViewport->width / 4.0f;
+ guardband_height = pViewport->height / 4.0f;
+
+ /* 2k of the range is negative */
+ sign_to_unsigned_offset = 2048.0f;
+ }
+ } else {
+ /* Max representable value in 16.8 fixed point format
+ * Calculated as (2 ** 16) - (2 ** -8)
+ */
+ fixed_point_max = 65535.99609375f;
+ guardband_width = pViewport->width / 4.0f;
+ guardband_height = pViewport->height / 4.0f;
+
+ /* 4k/20k of the range is negative */
+ sign_to_unsigned_offset = (float)PVR_MAX_NEG_OFFSCREEN_OFFSET;
+ }
+
+ min_screen_space_value = -sign_to_unsigned_offset;
+ max_screen_space_value = fixed_point_max - sign_to_unsigned_offset;
+
+ min_vertex_x = pViewport->x - guardband_width;
+ max_vertex_x = pViewport->x + pViewport->width + guardband_width;
+ min_vertex_y = pViewport->y - guardband_height;
+ max_vertex_y = pViewport->y + pViewport->height + guardband_height;
+ if (min_vertex_x < min_screen_space_value ||
+ max_vertex_x > max_screen_space_value ||
+ min_vertex_y < min_screen_space_value ||
+ max_vertex_y > max_screen_space_value) {
+ mesa_logw("Viewport is affected by BRN70165, geometry outside "
+ "the viewport could be corrupted");
+ }
+}
+#endif
+
+void pvr_CmdSetViewport(VkCommandBuffer commandBuffer,
+ uint32_t firstViewport,
+ uint32_t viewportCount,
+ const VkViewport *pViewports)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ const uint32_t total_count = firstViewport + viewportCount;
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+ assert(firstViewport < PVR_MAX_VIEWPORTS && viewportCount > 0);
+ assert(total_count >= 1 && total_count <= PVR_MAX_VIEWPORTS);
+
+ PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+#if defined(DEBUG)
+ if (PVR_HAS_QUIRK(&cmd_buffer->device->pdevice->dev_info, 70165)) {
+ for (uint32_t viewport = 0; viewport < viewportCount; viewport++) {
+ check_viewport_quirk_70165(cmd_buffer->device, &pViewports[viewport]);
+ }
+ }
+#endif
+
+ if (state->dynamic.common.viewport.count < total_count)
+ state->dynamic.common.viewport.count = total_count;
+
+ memcpy(&state->dynamic.common.viewport.viewports[firstViewport],
+ pViewports,
+ viewportCount * sizeof(*pViewports));
+
+ state->dirty.viewport = true;
+}
+
+void pvr_CmdSetScissor(VkCommandBuffer commandBuffer,
+ uint32_t firstScissor,
+ uint32_t scissorCount,
+ const VkRect2D *pScissors)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ const uint32_t total_count = firstScissor + scissorCount;
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+ assert(firstScissor < PVR_MAX_VIEWPORTS && scissorCount > 0);
+ assert(total_count >= 1 && total_count <= PVR_MAX_VIEWPORTS);
+
+ PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+ if (state->dynamic.common.scissor.count < total_count)
+ state->dynamic.common.scissor.count = total_count;
+
+ memcpy(&state->dynamic.common.scissor.scissors[firstScissor],
+ pScissors,
+ scissorCount * sizeof(*pScissors));
+
+ state->dirty.scissor = true;
+}
+
+void pvr_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+ state->dynamic.common.line_width = lineWidth;
+ state->dirty.line_width = true;
+}
+
+void pvr_CmdSetDepthBias(VkCommandBuffer commandBuffer,
+ float depthBiasConstantFactor,
+ float depthBiasClamp,
+ float depthBiasSlopeFactor)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+ state->dynamic.common.depth_bias.constant_factor = depthBiasConstantFactor;
+ state->dynamic.common.depth_bias.clamp = depthBiasClamp;
+ state->dynamic.common.depth_bias.slope_factor = depthBiasSlopeFactor;
+ state->dirty.depth_bias = true;
+}
+
+void pvr_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
+ const float blendConstants[4])
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+ STATIC_ASSERT(ARRAY_SIZE(state->dynamic.common.blend_constants) == 4);
+ memcpy(state->dynamic.common.blend_constants,
+ blendConstants,
+ sizeof(state->dynamic.common.blend_constants));
+
+ state->dirty.blend_constants = true;
+}
+
+void pvr_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
+ float minDepthBounds,
+ float maxDepthBounds)
+{
+ mesa_logd("No support for depth bounds testing.");
+}
+
+void pvr_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
+ VkStencilFaceFlags faceMask,
+ uint32_t compareMask)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+ if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+ state->dynamic.common.compare_mask.front = compareMask;
+
+ if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+ state->dynamic.common.compare_mask.back = compareMask;
+
+ state->dirty.compare_mask = true;
+}
+
+void pvr_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
+ VkStencilFaceFlags faceMask,
+ uint32_t writeMask)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+ if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+ state->dynamic.common.write_mask.front = writeMask;
+
+ if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+ state->dynamic.common.write_mask.back = writeMask;
+
+ state->dirty.write_mask = true;
+}
+
+void pvr_CmdSetStencilReference(VkCommandBuffer commandBuffer,
+ VkStencilFaceFlags faceMask,
+ uint32_t reference)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+ if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+ state->dynamic.common.reference.front = reference;
+
+ if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+ state->dynamic.common.reference.back = reference;
+
+ state->dirty.reference = true;
+}
+
+void pvr_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
+ VkPipelineBindPoint pipelineBindPoint,
+ VkPipelineLayout _layout,
+ uint32_t firstSet,
+ uint32_t descriptorSetCount,
+ const VkDescriptorSet *pDescriptorSets,
+ uint32_t dynamicOffsetCount,
+ const uint32_t *pDynamicOffsets)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ struct pvr_descriptor_state *descriptor_state;
+
+ assert(firstSet + descriptorSetCount <= PVR_MAX_DESCRIPTOR_SETS);
+
+ PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+ switch (pipelineBindPoint) {
+ case VK_PIPELINE_BIND_POINT_GRAPHICS:
+ case VK_PIPELINE_BIND_POINT_COMPUTE:
+ break;
+
+ default:
+ unreachable("Unsupported bind point.");
+ break;
+ }
+
+ if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
+ descriptor_state = &cmd_buffer->state.gfx_desc_state;
+ cmd_buffer->state.dirty.gfx_desc_dirty = true;
+ } else {
+ descriptor_state = &cmd_buffer->state.compute_desc_state;
+ cmd_buffer->state.dirty.compute_desc_dirty = true;
+ }
+
+ for (uint32_t i = 0; i < descriptorSetCount; i++) {
+ PVR_FROM_HANDLE(pvr_descriptor_set, set, pDescriptorSets[i]);
+ uint32_t index = firstSet + i;
+
+ if (descriptor_state->descriptor_sets[index] != set) {
+ descriptor_state->descriptor_sets[index] = set;
+ descriptor_state->valid_mask |= (1u << index);
+ }
+ }
+}
+
+void pvr_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
+ uint32_t firstBinding,
+ uint32_t bindingCount,
+ const VkBuffer *pBuffers,
+ const VkDeviceSize *pOffsets)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ struct pvr_vertex_binding *const vb = cmd_buffer->state.vertex_bindings;
+
+ /* We have to defer setting up vertex buffer since we need the buffer
+ * stride from the pipeline.
+ */
+
+ assert(firstBinding < PVR_MAX_VERTEX_INPUT_BINDINGS &&
+ bindingCount <= PVR_MAX_VERTEX_INPUT_BINDINGS);
+
+ PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+ for (uint32_t i = 0; i < bindingCount; i++) {
+ vb[firstBinding + i].buffer = pvr_buffer_from_handle(pBuffers[i]);
+ vb[firstBinding + i].offset = pOffsets[i];
+ }
+
+ cmd_buffer->state.dirty.vertex_bindings = true;
+}
+
+void pvr_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
+ VkBuffer buffer,
+ VkDeviceSize offset,
+ VkIndexType indexType)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ PVR_FROM_HANDLE(pvr_buffer, index_buffer, buffer);
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+ assert(offset < index_buffer->size);
+ assert(indexType == VK_INDEX_TYPE_UINT32 ||
+ indexType == VK_INDEX_TYPE_UINT16);
+
+ PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+ state->index_buffer_binding.buffer = index_buffer;
+ state->index_buffer_binding.offset = offset;
+ state->index_buffer_binding.type = indexType;
+ state->dirty.index_buffer_binding = true;
+}
+
+void pvr_CmdPushConstants(VkCommandBuffer commandBuffer,
+ VkPipelineLayout layout,
+ VkShaderStageFlags stageFlags,
+ uint32_t offset,
+ uint32_t size,
+ const void *pValues)
+{
+#if defined(DEBUG)
+ const uint64_t ending = (uint64_t)offset + (uint64_t)size;
+#endif
+
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+ PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+ pvr_assert(ending <= PVR_MAX_PUSH_CONSTANTS_SIZE);
+
+ memcpy(&state->push_constants.data[offset], pValues, size);
+
+ state->push_constants.dirty_stages |= stageFlags;
+}
+
+static VkResult
+pvr_cmd_buffer_setup_attachments(struct pvr_cmd_buffer *cmd_buffer,
+ const struct pvr_render_pass *pass,
+ const struct pvr_framebuffer *framebuffer)
+{
+ struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+ struct pvr_render_pass_info *info = &state->render_pass_info;
+
+ assert(pass->attachment_count == framebuffer->attachment_count);
+
+ /* Free any previously allocated attachments. */
+ vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.attachments);
+
+ if (pass->attachment_count == 0) {
+ info->attachments = NULL;
+ return VK_SUCCESS;
+ }
+
+ info->attachments =
+ vk_zalloc(&cmd_buffer->vk.pool->alloc,
+ pass->attachment_count * sizeof(*info->attachments),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!info->attachments) {
+ /* Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */
+ state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return state->status;
+ }
+
+ if (framebuffer) {
+ for (uint32_t i = 0; i < pass->attachment_count; i++)
+ info->attachments[i] = framebuffer->attachments[i];
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult pvr_init_render_targets(struct pvr_device *device,
+ struct pvr_render_pass *pass,
+ struct pvr_framebuffer *framebuffer)
+{
+ for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
+ struct pvr_render_target *render_target =
+ pvr_get_render_target(pass, framebuffer, i);
+
+ pthread_mutex_lock(&render_target->mutex);
+
+ if (!render_target->valid) {
+ const struct pvr_renderpass_hwsetup_render *hw_render =
+ &pass->hw_setup->renders[i];
+ VkResult result;
+
+ result = pvr_render_target_dataset_create(device,
+ framebuffer->width,
+ framebuffer->height,
+ hw_render->sample_count,
+ framebuffer->layers,
+ &render_target->rt_dataset);
+ if (result != VK_SUCCESS) {
+ pthread_mutex_unlock(&render_target->mutex);
+ return result;
+ }
+
+ render_target->valid = true;
+ }
+
+ pthread_mutex_unlock(&render_target->mutex);
+ }
+
+ return VK_SUCCESS;
+}
+
+static const struct pvr_renderpass_hwsetup_subpass *
+pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass)
+{
+ const struct pvr_renderpass_hw_map *map =
+ &pass->hw_setup->subpass_map[subpass];
+
+ return &pass->hw_setup->renders[map->render].subpasses[map->subpass];
+}
+
+static void pvr_perform_start_of_render_attachment_clear(
+ struct pvr_cmd_buffer *cmd_buffer,
+ const struct pvr_framebuffer *framebuffer,
+ uint32_t index,
+ bool is_depth_stencil,
+ uint32_t *index_list_clear_mask)
+{
+ struct pvr_render_pass_info *info = &cmd_buffer->state.render_pass_info;
+ const struct pvr_render_pass *pass = info->pass;
+ const struct pvr_renderpass_hwsetup_render *hw_render;
+ const struct pvr_renderpass_hwsetup *hw_setup;
+ struct pvr_image_view *iview;
+ uint32_t view_idx;
+ uint32_t height;
+ uint32_t width;
+
+ hw_setup = pass->hw_setup;
+ hw_render =
+ &hw_setup->renders[hw_setup->subpass_map[info->subpass_idx].render];
+
+ if (is_depth_stencil) {
+ bool stencil_clear;
+ bool depth_clear;
+ bool is_stencil;
+ bool is_depth;
+
+ assert(hw_render->ds_surface_id != -1);
+ assert(index == 0);
+
+ view_idx = hw_render->ds_surface_id;
+
+ is_depth = vk_format_has_depth(pass->attachments[view_idx].vk_format);
+ is_stencil = vk_format_has_stencil(pass->attachments[view_idx].vk_format);
+ depth_clear = hw_render->depth_init == RENDERPASS_SURFACE_INITOP_CLEAR;
+ stencil_clear = hw_render->stencil_init ==
+ RENDERPASS_SURFACE_INITOP_CLEAR;
+
+ /* Attempt to clear the ds attachment. Do not erroneously discard an
+ * attachment that has no depth clear but has a stencil attachment.
+ */
+ /* if not (a ∧ c) ∨ (b ∧ d) */
+ if (!((is_depth && depth_clear) || (is_stencil && stencil_clear)))
+ return;
+ } else if (hw_render->color_init[index].op !=
+ RENDERPASS_SURFACE_INITOP_CLEAR) {
+ return;
+ } else {
+ view_idx = hw_render->color_init[index].driver_id;
+ }
+
+ iview = info->attachments[view_idx];
+ width = iview->vk.extent.width;
+ height = iview->vk.extent.height;
+
+ /* FIXME: It would be nice if this function and pvr_sub_cmd_gfx_job_init()
+ * were doing the same check (even if it's just an assert) to determine if a
+ * clear is needed.
+ */
+ /* If this is single-layer fullscreen, we already do the clears in
+ * pvr_sub_cmd_gfx_job_init().
+ */
+ if (info->render_area.offset.x == 0 && info->render_area.offset.y == 0 &&
+ info->render_area.extent.width == width &&
+ info->render_area.extent.height == height && framebuffer->layers == 1) {
+ return;
+ }
+
+ pvr_finishme("Unimplemented path!");
+}
+
+static void
+pvr_perform_start_of_render_clears(struct pvr_cmd_buffer *cmd_buffer)
+{
+ struct pvr_render_pass_info *info = &cmd_buffer->state.render_pass_info;
+ const struct pvr_framebuffer *framebuffer = info->framebuffer;
+ const struct pvr_render_pass *pass = info->pass;
+ const struct pvr_renderpass_hwsetup *hw_setup = pass->hw_setup;
+ const struct pvr_renderpass_hwsetup_render *hw_render;
+
+ /* Mask of attachment clears using index lists instead of background object
+ * to clear.
+ */
+ uint32_t index_list_clear_mask = 0;
+
+ hw_render =
+ &hw_setup->renders[hw_setup->subpass_map[info->subpass_idx].render];
+ if (!hw_render) {
+ info->process_empty_tiles = false;
+ info->enable_bg_tag = false;
+ return;
+ }
+
+ for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
+ pvr_perform_start_of_render_attachment_clear(cmd_buffer,
+ framebuffer,
+ i,
+ false,
+ &index_list_clear_mask);
+ }
+
+ info->enable_bg_tag = !!hw_render->color_init_count;
+
+ /* If we're not using index list for all clears/loads then we need to run
+ * the background object on empty tiles.
+ */
+ if (hw_render->color_init_count &&
+ index_list_clear_mask != ((1u << hw_render->color_init_count) - 1u)) {
+ info->process_empty_tiles = true;
+ } else {
+ info->process_empty_tiles = false;
+ }
+
+ if (hw_render->ds_surface_id != -1) {
+ uint32_t ds_index_list = 0;
+
+ pvr_perform_start_of_render_attachment_clear(cmd_buffer,
+ framebuffer,
+ 0,
+ true,
+ &ds_index_list);
+ }
+
+ if (index_list_clear_mask)
+ pvr_finishme("Add support for generating loadops shaders!");
+}
+
+static void pvr_stash_depth_format(struct pvr_cmd_buffer_state *state)
+{
+ const struct pvr_render_pass *pass = state->render_pass_info.pass;
+ const struct pvr_renderpass_hwsetup_render *hw_render =
+ &pass->hw_setup->renders[state->current_sub_cmd->gfx.hw_render_idx];
+
+ if (hw_render->ds_surface_id != -1) {
+ struct pvr_image_view **iviews = state->render_pass_info.attachments;
+
+ state->depth_format = iviews[hw_render->ds_surface_id]->vk.format;
+ }
+}
+
+static bool pvr_loadops_contain_clear(struct pvr_renderpass_hwsetup *hw_setup)
+{
+ for (uint32_t i = 0; i < hw_setup->render_count; i++) {
+ struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i];
+ uint32_t render_targets_count =
+ hw_render->init_setup.render_targets_count;
+
+ for (uint32_t j = 0;
+ j < (hw_render->color_init_count * render_targets_count);
+ j += render_targets_count) {
+ for (uint32_t k = 0; k < hw_render->init_setup.render_targets_count;
+ k++) {
+ if (hw_render->color_init[j + k].op ==
+ RENDERPASS_SURFACE_INITOP_CLEAR) {
+ return true;
+ }
+ }
+ }
+ if (hw_render->depth_init == RENDERPASS_SURFACE_INITOP_CLEAR ||
+ hw_render->stencil_init == RENDERPASS_SURFACE_INITOP_CLEAR) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+static VkResult
+pvr_cmd_buffer_set_clear_values(struct pvr_cmd_buffer *cmd_buffer,
+ const VkRenderPassBeginInfo *pRenderPassBegin)
+{
+ struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+
+ /* Free any previously allocated clear values. */
+ vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.clear_values);
+
+ if (pRenderPassBegin->clearValueCount) {
+ const size_t size = pRenderPassBegin->clearValueCount *
+ sizeof(*state->render_pass_info.clear_values);
+
+ state->render_pass_info.clear_values =
+ vk_zalloc(&cmd_buffer->vk.pool->alloc,
+ size,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!state->render_pass_info.clear_values) {
+ state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return state->status;
+ }
+
+ memcpy(state->render_pass_info.clear_values,
+ pRenderPassBegin->pClearValues,
+ size);
+ } else {
+ state->render_pass_info.clear_values = NULL;
+ }
+
+ state->render_pass_info.clear_value_count =
+ pRenderPassBegin->clearValueCount;
+
+ return VK_SUCCESS;
+}
+
+void pvr_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
+ const VkRenderPassBeginInfo *pRenderPassBeginInfo,
+ const VkSubpassBeginInfoKHR *pSubpassBeginInfo)
+{
+ PVR_FROM_HANDLE(pvr_framebuffer,
+ framebuffer,
+ pRenderPassBeginInfo->framebuffer);
+ PVR_FROM_HANDLE(pvr_render_pass, pass, pRenderPassBeginInfo->renderPass);
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ const struct pvr_renderpass_hwsetup_subpass *hw_subpass;
+ struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+ VkResult result;
+
+ PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+ assert(!state->render_pass_info.pass);
+ assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+
+ /* FIXME: Create a separate function for everything using pass->subpasses,
+ * look at cmd_buffer_begin_subpass() for example. */
+ state->render_pass_info.pass = pass;
+ state->render_pass_info.framebuffer = framebuffer;
+ state->render_pass_info.subpass_idx = 0;
+ state->render_pass_info.render_area = pRenderPassBeginInfo->renderArea;
+ state->render_pass_info.current_hw_subpass = 0;
+ state->render_pass_info.pipeline_bind_point =
+ pass->subpasses[0].pipeline_bind_point;
+ state->render_pass_info.userpass_spawn = pass->subpasses[0].userpass_spawn;
+ state->dirty.userpass_spawn = true;
+
+ result = pvr_cmd_buffer_setup_attachments(cmd_buffer, pass, framebuffer);
+ if (result != VK_SUCCESS)
+ return;
+
+ state->status =
+ pvr_init_render_targets(cmd_buffer->device, pass, framebuffer);
+ if (state->status != VK_SUCCESS)
+ return;
+
+ result = pvr_cmd_buffer_set_clear_values(cmd_buffer, pRenderPassBeginInfo);
+ if (result != VK_SUCCESS)
+ return;
+
+ assert(pass->subpasses[0].pipeline_bind_point ==
+ VK_PIPELINE_BIND_POINT_GRAPHICS);
+
+ result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS);
+ if (result != VK_SUCCESS)
+ return;
+
+ /* Run subpass 0 "soft" background object after the actual background
+ * object.
+ */
+ hw_subpass = pvr_get_hw_subpass(pass, 0);
+ if (hw_subpass->client_data)
+ pvr_finishme("Unimplemented path!");
+
+ pvr_perform_start_of_render_clears(cmd_buffer);
+ pvr_stash_depth_format(&cmd_buffer->state);
+
+ if (!pvr_loadops_contain_clear(pass->hw_setup)) {
+ state->dynamic.scissor_accum_state = PVR_SCISSOR_ACCUM_CHECK_FOR_CLEAR;
+ state->dynamic.scissor_accum_bounds.offset.x = 0;
+ state->dynamic.scissor_accum_bounds.offset.y = 0;
+ state->dynamic.scissor_accum_bounds.extent.width = 0;
+ state->dynamic.scissor_accum_bounds.extent.height = 0;
+ } else {
+ state->dynamic.scissor_accum_state = PVR_SCISSOR_ACCUM_DISABLED;
+ }
+}
+
+static void pvr_cmd_buffer_reset(struct pvr_cmd_buffer *cmd_buffer)
+{
+ if (cmd_buffer->status != PVR_CMD_BUFFER_STATUS_INITIAL) {
+ /* FIXME: For now we always free all resources as if
+ * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT was set.
+ */
+ pvr_cmd_buffer_free_sub_cmds(cmd_buffer);
+
+ list_for_each_entry_safe (struct pvr_bo, bo, &cmd_buffer->bo_list, link) {
+ list_del(&bo->link);
+ pvr_bo_free(cmd_buffer->device, bo);
+ }
+
+ util_dynarray_clear(&cmd_buffer->scissor_array);
+ util_dynarray_clear(&cmd_buffer->depth_bias_array);
+
+ cmd_buffer->state.status = VK_SUCCESS;
+ cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INITIAL;
+ }
+}
+
+VkResult pvr_BeginCommandBuffer(VkCommandBuffer commandBuffer,
+ const VkCommandBufferBeginInfo *pBeginInfo)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ struct pvr_cmd_buffer_state *state;
+ VkResult result;
+
+ pvr_cmd_buffer_reset(cmd_buffer);
+
+ cmd_buffer->usage_flags = pBeginInfo->flags;
+ state = &cmd_buffer->state;
+
+ /* VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT must be ignored for
+ * primary level command buffers.
+ *
+ * From the Vulkan 1.0 spec:
+ *
+ * VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT specifies that a
+ * secondary command buffer is considered to be entirely inside a render
+ * pass. If this is a primary command buffer, then this bit is ignored.
+ */
+ if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
+ cmd_buffer->usage_flags &=
+ ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
+ }
+
+ if (cmd_buffer->usage_flags &
+ VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
+ const VkCommandBufferInheritanceInfo *inheritance_info =
+ pBeginInfo->pInheritanceInfo;
+ struct pvr_render_pass *pass;
+
+ pass = pvr_render_pass_from_handle(inheritance_info->renderPass);
+ state->render_pass_info.pass = pass;
+ state->render_pass_info.framebuffer =
+ pvr_framebuffer_from_handle(inheritance_info->framebuffer);
+ state->render_pass_info.subpass_idx = inheritance_info->subpass;
+ state->render_pass_info.userpass_spawn =
+ pass->subpasses[inheritance_info->subpass].userpass_spawn;
+
+ result =
+ pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ memset(state->barriers_needed,
+ 0xFF,
+ sizeof(*state->barriers_needed) * ARRAY_SIZE(state->barriers_needed));
+
+ cmd_buffer->status = PVR_CMD_BUFFER_STATUS_RECORDING;
+
+ return VK_SUCCESS;
+}
+
+VkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer,
+ struct pvr_transfer_cmd *transfer_cmd)
+{
+ VkResult result;
+
+ result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_TRANSFER);
+ if (result != VK_SUCCESS)
+ return result;
+
+ list_addtail(&transfer_cmd->link,
+ &cmd_buffer->state.current_sub_cmd->transfer.transfer_cmds);
+
+ return VK_SUCCESS;
+}
+
+void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
+ uint32_t groupCountX,
+ uint32_t groupCountY,
+ uint32_t groupCountZ)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
+ VkBuffer _buffer,
+ VkDeviceSize offset)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdDraw(VkCommandBuffer commandBuffer,
+ uint32_t vertexCount,
+ uint32_t instanceCount,
+ uint32_t firstVertex,
+ uint32_t firstInstance)
+{
+ assert(!"Unimplemented");
+}
+
+static void
+pvr_update_draw_state(struct pvr_cmd_buffer_state *const state,
+ const struct pvr_cmd_buffer_draw_state *const draw_state)
+{
+ /* We don't have a state to tell us that base_instance is being used so it
+ * gets used as a boolean - 0 means we'll use a pds program that skips the
+ * base instance addition. If the base_instance gets used (and the last
+ * draw's base_instance was 0) then we switch to the BASE_INSTANCE attrib
+ * program.
+ *
+ * If base_instance changes then we only need to update the data section.
+ *
+ * The only draw call state that doesn't really matter is the start vertex
+ * as that is handled properly in the VDM state in all cases.
+ */
+ if ((state->draw_state.draw_indexed != draw_state->draw_indexed) ||
+ (state->draw_state.draw_indirect != draw_state->draw_indirect) ||
+ (state->draw_state.base_instance == 0 &&
+ draw_state->base_instance != 0)) {
+ state->dirty.draw_variant = true;
+ } else if (state->draw_state.base_instance != draw_state->base_instance) {
+ state->dirty.draw_base_instance = true;
+ }
+
+ state->draw_state = *draw_state;
+}
+
+static uint32_t pvr_calc_shared_regs_count(
+ const struct pvr_graphics_pipeline *const gfx_pipeline)
+{
+ const struct pvr_pipeline_stage_state *const vertex_state =
+ &gfx_pipeline->vertex_shader_state.stage_state;
+ uint32_t shared_regs = vertex_state->const_shared_reg_count +
+ vertex_state->const_shared_reg_offset;
+
+ if (gfx_pipeline->fragment_shader_state.bo) {
+ const struct pvr_pipeline_stage_state *const fragment_state =
+ &gfx_pipeline->fragment_shader_state.stage_state;
+ uint32_t fragment_regs = fragment_state->const_shared_reg_count +
+ fragment_state->const_shared_reg_offset;
+
+ shared_regs = MAX2(shared_regs, fragment_regs);
+ }
+
+ return shared_regs;
+}
+
+#define PVR_WRITE(_buffer, _value, _offset, _max) \
+ do { \
+ __typeof__(_value) __value = _value; \
+ uint64_t __offset = _offset; \
+ uint32_t __nr_dwords = sizeof(__value) / sizeof(uint32_t); \
+ static_assert(__same_type(*_buffer, __value), \
+ "Buffer and value type mismatch"); \
+ assert((__offset + __nr_dwords) <= (_max)); \
+ assert((__offset % __nr_dwords) == 0U); \
+ _buffer[__offset / __nr_dwords] = __value; \
+ } while (0)
+
+static VkResult
+pvr_setup_vertex_buffers(struct pvr_cmd_buffer *cmd_buffer,
+ const struct pvr_graphics_pipeline *const gfx_pipeline)
+{
+ const struct pvr_vertex_shader_state *const vertex_state =
+ &gfx_pipeline->vertex_shader_state;
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+ const struct pvr_pds_info *const pds_info = state->pds_shader.info;
+ const uint8_t *entries;
+ uint32_t *dword_buffer;
+ uint64_t *qword_buffer;
+ struct pvr_bo *pvr_bo;
+ VkResult result;
+
+ result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
+ cmd_buffer->device->heaps.pds_heap,
+ pds_info->data_size_in_dwords,
+ PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+ &pvr_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ dword_buffer = (uint32_t *)pvr_bo->bo->map;
+ qword_buffer = (uint64_t *)pvr_bo->bo->map;
+
+ entries = (uint8_t *)pds_info->entries;
+
+ for (uint32_t i = 0; i < pds_info->entry_count; i++) {
+ const struct pvr_const_map_entry *const entry_header =
+ (struct pvr_const_map_entry *)entries;
+
+ switch (entry_header->type) {
+ case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: {
+ const struct pvr_const_map_entry_literal32 *const literal =
+ (struct pvr_const_map_entry_literal32 *)entries;
+
+ PVR_WRITE(dword_buffer,
+ literal->literal_value,
+ literal->const_offset,
+ pds_info->data_size_in_dwords);
+
+ entries += sizeof(*literal);
+ break;
+ }
+
+ case PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS: {
+ const struct pvr_const_map_entry_doutu_address *const doutu_addr =
+ (struct pvr_const_map_entry_doutu_address *)entries;
+ pvr_dev_addr_t exec_addr = vertex_state->bo->vma->dev_addr;
+ uint64_t addr = 0ULL;
+
+ exec_addr.addr += vertex_state->entry_offset;
+ pvr_set_usc_execution_address64(&addr, exec_addr.addr);
+
+ PVR_WRITE(qword_buffer,
+ addr | doutu_addr->doutu_control,
+ doutu_addr->const_offset,
+ pds_info->data_size_in_dwords);
+
+ entries += sizeof(*doutu_addr);
+ break;
+ }
+
+ case PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE: {
+ const struct pvr_const_map_entry_base_instance *const base_instance =
+ (struct pvr_const_map_entry_base_instance *)entries;
+
+ PVR_WRITE(dword_buffer,
+ state->draw_state.base_instance,
+ base_instance->const_offset,
+ pds_info->data_size_in_dwords);
+
+ entries += sizeof(*base_instance);
+ break;
+ }
+
+ case PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS: {
+ const struct pvr_const_map_entry_vertex_attribute_address
+ *const attribute =
+ (struct pvr_const_map_entry_vertex_attribute_address *)entries;
+ const struct pvr_vertex_binding *const binding =
+ &state->vertex_bindings[attribute->binding_index];
+ uint64_t addr = binding->buffer->dev_addr.addr;
+
+ addr += binding->offset;
+ addr += attribute->offset;
+
+ PVR_WRITE(qword_buffer,
+ addr,
+ attribute->const_offset,
+ pds_info->data_size_in_dwords);
+
+ entries += sizeof(*attribute);
+ break;
+ }
+
+ default:
+ unreachable("Unsupported data section map");
+ break;
+ }
+ }
+
+ state->pds_vertex_attrib_offset =
+ pvr_bo->vma->dev_addr.addr -
+ cmd_buffer->device->heaps.pds_heap->base_addr.addr;
+
+ pvr_bo_cpu_unmap(cmd_buffer->device, pvr_bo);
+
+ return VK_SUCCESS;
+}
+
+static VkResult pvr_setup_descriptor_mappings(
+ struct pvr_cmd_buffer *const cmd_buffer,
+ enum pvr_stage_allocation stage,
+ const struct pvr_stage_allocation_uniform_state *uniform_state,
+ uint32_t *const uniform_data_offset_out)
+{
+ const struct pvr_pds_info *const pds_info = &uniform_state->pds_info;
+ const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+ const struct pvr_descriptor_state *desc_state;
+ const uint8_t *entries;
+ uint32_t *dword_buffer;
+ uint64_t *qword_buffer;
+ struct pvr_bo *pvr_bo;
+ VkResult result;
+
+ if (!pds_info->data_size_in_dwords)
+ return VK_SUCCESS;
+
+ result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
+ cmd_buffer->device->heaps.pds_heap,
+ pds_info->data_size_in_dwords,
+ PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+ &pvr_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ dword_buffer = (uint32_t *)pvr_bo->bo->map;
+ qword_buffer = (uint64_t *)pvr_bo->bo->map;
+
+ entries = (uint8_t *)pds_info->entries;
+
+ switch (stage) {
+ case PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY:
+ case PVR_STAGE_ALLOCATION_FRAGMENT:
+ desc_state = &cmd_buffer->state.gfx_desc_state;
+ break;
+
+ case PVR_STAGE_ALLOCATION_COMPUTE:
+ desc_state = &cmd_buffer->state.compute_desc_state;
+ break;
+
+ default:
+ unreachable("Unsupported stage.");
+ break;
+ }
+
+ for (uint32_t i = 0; i < pds_info->entry_count; i++) {
+ const struct pvr_const_map_entry *const entry_header =
+ (struct pvr_const_map_entry *)entries;
+
+ switch (entry_header->type) {
+ case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: {
+ const struct pvr_const_map_entry_literal32 *const literal =
+ (struct pvr_const_map_entry_literal32 *)entries;
+
+ PVR_WRITE(dword_buffer,
+ literal->literal_value,
+ literal->const_offset,
+ pds_info->data_size_in_dwords);
+
+ entries += sizeof(*literal);
+ break;
+ }
+
+ case PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER: {
+ const struct pvr_const_map_entry_constant_buffer *const_buffer_entry =
+ (struct pvr_const_map_entry_constant_buffer *)entries;
+ const uint32_t desc_set = const_buffer_entry->desc_set;
+ const uint32_t binding = const_buffer_entry->binding;
+ const struct pvr_descriptor_set *descriptor_set;
+ const struct pvr_descriptor *descriptor;
+ pvr_dev_addr_t buffer_addr;
+
+ /* TODO: Handle push descriptors. */
+
+ assert(desc_set < PVR_MAX_DESCRIPTOR_SETS);
+ descriptor_set = state->gfx_desc_state.descriptor_sets[desc_set];
+
+ /* TODO: Handle dynamic buffers. */
+ descriptor = &descriptor_set->descriptors[binding];
+ assert(descriptor->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
+
+ assert(descriptor->buffer_desc_range ==
+ const_buffer_entry->size_in_dwords * sizeof(uint32_t));
+ assert(descriptor->buffer_create_info_size ==
+ const_buffer_entry->size_in_dwords * sizeof(uint32_t));
+
+ buffer_addr = descriptor->buffer_dev_addr;
+ buffer_addr.addr += const_buffer_entry->offset * sizeof(uint32_t);
+
+ PVR_WRITE(qword_buffer,
+ buffer_addr.addr,
+ const_buffer_entry->const_offset,
+ pds_info->data_size_in_dwords);
+
+ entries += sizeof(*const_buffer_entry);
+ break;
+ }
+
+ case PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET: {
+ const struct pvr_const_map_entry_descriptor_set *desc_set_entry =
+ (struct pvr_const_map_entry_descriptor_set *)entries;
+ const uint32_t desc_set_num = desc_set_entry->descriptor_set;
+ const struct pvr_descriptor_set *descriptor_set;
+ pvr_dev_addr_t desc_set_addr;
+
+ assert(desc_set_num < PVR_MAX_DESCRIPTOR_SETS);
+
+ /* TODO: Remove this when the compiler provides us with usage info?
+ */
+ /* We skip DMAing unbound descriptor sets. */
+ if (!(desc_state->valid_mask & BITFIELD_BIT(desc_set_num))) {
+ const struct pvr_const_map_entry_literal32 *literal;
+ uint32_t zero_literal_value;
+
+ entries += sizeof(*desc_set_entry);
+ literal = (struct pvr_const_map_entry_literal32 *)entries;
+
+ /* TODO: Is there any guarantee that a literal will follow the
+ * descriptor set entry?
+ */
+ assert(literal->type == PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32);
+
+ /* We zero out the DMA size so the DMA isn't performed. */
+ zero_literal_value =
+ literal->literal_value &
+ PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_CLRMSK;
+
+ PVR_WRITE(qword_buffer,
+ 0UL,
+ desc_set_entry->const_offset,
+ pds_info->data_size_in_dwords);
+
+ PVR_WRITE(dword_buffer,
+ zero_literal_value,
+ desc_set_entry->const_offset,
+ pds_info->data_size_in_dwords);
+
+ entries += sizeof(*literal);
+ i++;
+ continue;
+ }
+
+ descriptor_set = desc_state->descriptor_sets[desc_set_num];
+
+ pvr_finishme("Handle push descriptor entry.");
+
+ desc_set_addr = descriptor_set->pvr_bo->vma->dev_addr;
+
+ if (desc_set_entry->primary) {
+ desc_set_addr.addr +=
+ descriptor_set->layout->memory_layout_in_dwords_per_stage[stage]
+ .primary_offset
+ << 2U;
+ } else {
+ desc_set_addr.addr +=
+ descriptor_set->layout->memory_layout_in_dwords_per_stage[stage]
+ .secondary_offset
+ << 2U;
+ }
+
+ desc_set_addr.addr += (uint64_t)desc_set_entry->offset_in_dwords << 2U;
+
+ PVR_WRITE(qword_buffer,
+ desc_set_addr.addr,
+ desc_set_entry->const_offset,
+ pds_info->data_size_in_dwords);
+
+ entries += sizeof(*desc_set_entry);
+ break;
+ }
+
+ default:
+ unreachable("Unsupported map entry type.");
+ }
+ }
+
+ pvr_bo_cpu_unmap(cmd_buffer->device, pvr_bo);
+
+ *uniform_data_offset_out =
+ pvr_bo->vma->dev_addr.addr -
+ cmd_buffer->device->heaps.pds_heap->base_addr.addr;
+
+ return VK_SUCCESS;
+}
+
+#undef PVR_WRITE
+
+static void
+pvr_emit_dirty_pds_state(const struct pvr_cmd_buffer *const cmd_buffer,
+ const uint32_t pds_vertex_uniform_data_offset)
+{
+ const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+ const struct pvr_stage_allocation_uniform_state *const vertex_uniform_state =
+ &state->gfx_pipeline->vertex_shader_state.uniform_state;
+ const struct pvr_pipeline_stage_state *const vertex_stage_state =
+ &state->gfx_pipeline->vertex_shader_state.stage_state;
+ struct pvr_csb *const csb = &state->current_sub_cmd->gfx.control_stream;
+
+ if (!vertex_uniform_state->pds_info.code_size_in_dwords)
+ return;
+
+ pvr_csb_emit (csb, VDMCTRL_PDS_STATE0, state0) {
+ state0.usc_target = PVRX(VDMCTRL_USC_TARGET_ALL);
+
+ state0.usc_common_size =
+ DIV_ROUND_UP(vertex_stage_state->const_shared_reg_count << 2,
+ PVRX(VDMCTRL_PDS_STATE0_USC_COMMON_SIZE_UNIT_SIZE));
+
+ state0.pds_data_size =
+ DIV_ROUND_UP(vertex_uniform_state->pds_info.data_size_in_dwords << 2,
+ PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE));
+ }
+
+ pvr_csb_emit (csb, VDMCTRL_PDS_STATE1, state1) {
+ state1.pds_data_addr.addr = pds_vertex_uniform_data_offset;
+ state1.sd_type = PVRX(VDMCTRL_SD_TYPE_NONE);
+ }
+
+ pvr_csb_emit (csb, VDMCTRL_PDS_STATE2, state2) {
+ state2.pds_code_addr.addr = vertex_uniform_state->pds_code.code_offset;
+ }
+}
+
+static void pvr_setup_output_select(struct pvr_cmd_buffer *const cmd_buffer)
+{
+ struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
+ const struct pvr_graphics_pipeline *const gfx_pipeline =
+ cmd_buffer->state.gfx_pipeline;
+ struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
+ const struct pvr_vertex_shader_state *const vertex_state =
+ &gfx_pipeline->vertex_shader_state;
+ uint32_t output_selects;
+
+ /* TODO: Handle vertex and fragment shader state flags. */
+
+ pvr_csb_pack (&output_selects, TA_OUTPUT_SEL, state) {
+ const VkPrimitiveTopology topology =
+ gfx_pipeline->input_asm_state.topology;
+
+ state.rhw_pres = true;
+ state.vtxsize = DIV_ROUND_UP(vertex_state->vertex_output_size, 4U);
+ state.psprite_size_pres = (topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST);
+ }
+
+ if (ppp_state->output_selects != output_selects) {
+ ppp_state->output_selects = output_selects;
+ emit_state->output_selects = true;
+ }
+
+ if (ppp_state->varying_word[0] != vertex_state->varying[0]) {
+ ppp_state->varying_word[0] = vertex_state->varying[0];
+ emit_state->varying_word0 = true;
+ }
+
+ if (ppp_state->varying_word[1] != vertex_state->varying[1]) {
+ ppp_state->varying_word[1] = vertex_state->varying[1];
+ emit_state->varying_word1 = true;
+ }
+}
+
+/* clang-format off */
+static enum PVRX(TA_OBJTYPE)
+pvr_ppp_state_get_ispa_objtype_from_vk(const VkPrimitiveTopology topology)
+/* clang-format on */
+{
+ switch (topology) {
+ case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
+ return PVRX(TA_OBJTYPE_SPRITE_01UV);
+
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+ return PVRX(TA_OBJTYPE_LINE);
+
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+ return PVRX(TA_OBJTYPE_TRIANGLE);
+
+ default:
+ unreachable("Invalid topology.");
+ return 0;
+ }
+}
+
+static void pvr_setup_isp_faces_and_control(
+ struct pvr_cmd_buffer *const cmd_buffer,
+ struct pvr_cmd_struct(TA_STATE_ISPA) *const ispa_out)
+{
+ struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
+ const struct pvr_graphics_pipeline *const gfx_pipeline =
+ cmd_buffer->state.gfx_pipeline;
+ struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
+ const struct pvr_dynamic_state *const dynamic_state =
+ &cmd_buffer->state.dynamic.common;
+ const struct pvr_render_pass_info *const pass_info =
+ &cmd_buffer->state.render_pass_info;
+ const uint32_t subpass_idx = pass_info->subpass_idx;
+ const uint32_t *depth_stencil_attachment_idx =
+ pass_info->pass->subpasses[subpass_idx].depth_stencil_attachment;
+ const struct pvr_image_view *const attachment =
+ (!depth_stencil_attachment_idx)
+ ? NULL
+ : pass_info->attachments[*depth_stencil_attachment_idx];
+
+ const VkCullModeFlags cull_mode = gfx_pipeline->raster_state.cull_mode;
+ const bool raster_discard_enabled =
+ gfx_pipeline->raster_state.discard_enable;
+ const bool disable_all = raster_discard_enabled || !attachment;
+
+ const VkPrimitiveTopology topology = gfx_pipeline->input_asm_state.topology;
+ const enum PVRX(TA_OBJTYPE)
+ obj_type = pvr_ppp_state_get_ispa_objtype_from_vk(topology);
+
+ const bool disable_stencil_write = disable_all;
+ const bool disable_stencil_test =
+ disable_all || !vk_format_has_stencil(attachment->vk.format);
+
+ const bool disable_depth_write = disable_all;
+ const bool disable_depth_test = disable_all ||
+ !vk_format_has_depth(attachment->vk.format);
+
+ uint32_t ispb_stencil_off;
+ bool is_two_sided = false;
+ uint32_t isp_control;
+
+ uint32_t line_width;
+ uint32_t common_a;
+ uint32_t front_a;
+ uint32_t front_b;
+ uint32_t back_a;
+ uint32_t back_b;
+
+ /* Convert to 4.4 fixed point format. */
+ line_width = util_unsigned_fixed(dynamic_state->line_width, 4);
+
+ /* Subtract 1 to shift values from range [0=0,256=16] to [0=1/16,255=16].
+ * If 0 it stays at 0, otherwise we subtract 1.
+ */
+ line_width = (!!line_width) * (line_width - 1);
+
+ line_width = MIN2(line_width, PVRX(TA_STATE_ISPA_POINTLINEWIDTH_SIZE_MAX));
+
+ /* TODO: Part of the logic in this function is duplicated in another part
+ * of the code. E.g. the dcmpmode, and sop1/2/3. Could we do this earlier?
+ */
+
+ pvr_csb_pack (&common_a, TA_STATE_ISPA, ispa) {
+ ispa.pointlinewidth = line_width;
+
+ if (disable_depth_test)
+ ispa.dcmpmode = PVRX(TA_CMPMODE_ALWAYS);
+ else
+ ispa.dcmpmode = gfx_pipeline->depth_compare_op;
+
+ /* FIXME: Can we just have this and remove the assignment above?
+ * The user provides a depthTestEnable at vkCreateGraphicsPipelines()
+ * should we be using that?
+ */
+ ispa.dcmpmode |= gfx_pipeline->depth_compare_op;
+
+ ispa.dwritedisable = disable_depth_test || disable_depth_write;
+ /* FIXME: Can we just have this and remove the assignment above? */
+ ispa.dwritedisable = ispa.dwritedisable ||
+ gfx_pipeline->depth_write_disable;
+
+ ispa.passtype = gfx_pipeline->fragment_shader_state.pass_type;
+
+ ispa.objtype = obj_type;
+
+ /* Return unpacked ispa structure. dcmpmode, dwritedisable, passtype and
+ * objtype are needed by pvr_setup_triangle_merging_flag.
+ */
+ if (ispa_out)
+ *ispa_out = ispa;
+ }
+
+ /* FIXME: This logic should be redone and improved. Can we also get rid of
+ * the front and back variants?
+ */
+
+ pvr_csb_pack (&front_a, TA_STATE_ISPA, ispa) {
+ ispa.sref = (!disable_stencil_test) * dynamic_state->reference.front;
+ }
+ front_a |= common_a;
+
+ pvr_csb_pack (&back_a, TA_STATE_ISPA, ispa) {
+ ispa.sref = (!disable_stencil_test) * dynamic_state->compare_mask.back;
+ }
+ back_a |= common_a;
+
+ /* TODO: Does this actually represent the ispb control word on stencil off?
+ * If not, rename the variable.
+ */
+ pvr_csb_pack (&ispb_stencil_off, TA_STATE_ISPB, ispb) {
+ ispb.sop3 = PVRX(TA_ISPB_STENCILOP_KEEP);
+ ispb.sop2 = PVRX(TA_ISPB_STENCILOP_KEEP);
+ ispb.sop1 = PVRX(TA_ISPB_STENCILOP_KEEP);
+ ispb.scmpmode = PVRX(TA_CMPMODE_ALWAYS);
+ }
+
+ if (disable_stencil_test) {
+ back_b = front_b = ispb_stencil_off;
+ } else {
+ pvr_csb_pack (&front_b, TA_STATE_ISPB, ispb) {
+ ispb.swmask =
+ (!disable_stencil_write) * dynamic_state->write_mask.front;
+ ispb.scmpmask = dynamic_state->compare_mask.front;
+
+ ispb.sop3 = gfx_pipeline->stencil_front.pass_op;
+ ispb.sop2 = gfx_pipeline->stencil_front.depth_fail_op;
+ ispb.sop1 = gfx_pipeline->stencil_front.fail_op;
+
+ ispb.scmpmode = gfx_pipeline->stencil_front.compare_op;
+ }
+
+ pvr_csb_pack (&back_b, TA_STATE_ISPB, ispb) {
+ ispb.swmask =
+ (!disable_stencil_write) * dynamic_state->write_mask.back;
+ ispb.scmpmask = dynamic_state->compare_mask.back;
+
+ ispb.sop3 = gfx_pipeline->stencil_back.pass_op;
+ ispb.sop2 = gfx_pipeline->stencil_back.depth_fail_op;
+ ispb.sop1 = gfx_pipeline->stencil_back.fail_op;
+
+ ispb.scmpmode = gfx_pipeline->stencil_back.compare_op;
+ }
+ }
+
+ if (front_a != back_a || front_b != back_b) {
+ if (cull_mode & VK_CULL_MODE_BACK_BIT) {
+ /* Single face, using front state. */
+ } else if (cull_mode & VK_CULL_MODE_FRONT_BIT) {
+ /* Single face, using back state. */
+
+ front_a = back_a;
+ front_b = back_b;
+ } else {
+ /* Both faces. */
+
+ emit_state->isp_ba = is_two_sided = true;
+
+ if (gfx_pipeline->raster_state.front_face ==
+ VK_FRONT_FACE_COUNTER_CLOCKWISE) {
+ uint32_t tmp = front_a;
+
+ front_a = back_a;
+ back_a = tmp;
+
+ tmp = front_b;
+ front_b = back_b;
+ back_b = tmp;
+ }
+
+ /* HW defaults to stencil off. */
+ if (back_b != ispb_stencil_off)
+ emit_state->isp_fb = emit_state->isp_bb = true;
+ }
+ }
+
+ if (!disable_stencil_test && front_b != ispb_stencil_off)
+ emit_state->isp_fb = true;
+
+ pvr_csb_pack (&isp_control, TA_STATE_ISPCTL, ispctl) {
+ ispctl.upass = pass_info->userpass_spawn;
+
+ /* TODO: is bo ever NULL? Figure out what to do. */
+ ispctl.tagwritedisable = raster_discard_enabled ||
+ !gfx_pipeline->fragment_shader_state.bo;
+
+ ispctl.two_sided = is_two_sided;
+ ispctl.bpres = emit_state->isp_fb || emit_state->isp_bb;
+
+ ispctl.dbenable = !raster_discard_enabled &&
+ gfx_pipeline->raster_state.depth_bias_enable &&
+ obj_type == PVRX(TA_OBJTYPE_TRIANGLE);
+ ispctl.scenable = !raster_discard_enabled;
+
+ ppp_state->isp.control_struct = ispctl;
+ }
+
+ emit_state->isp = true;
+
+ ppp_state->isp.control = isp_control;
+ ppp_state->isp.front_a = front_a;
+ ppp_state->isp.front_b = front_b;
+ ppp_state->isp.back_a = back_a;
+ ppp_state->isp.back_b = back_b;
+}
+
+static void pvr_get_viewport_scissor_overlap(const VkViewport *const viewport,
+ const VkRect2D *const scissor,
+ VkRect2D *const rect_out)
+{
+ /* TODO: See if we can remove this struct. */
+ struct pvr_rect {
+ int32_t x0, y0;
+ int32_t x1, y1;
+ };
+
+ /* TODO: Worry about overflow? */
+ const struct pvr_rect scissor_rect = {
+ .x0 = scissor->offset.x,
+ .y0 = scissor->offset.y,
+ .x1 = scissor->offset.x + scissor->extent.width,
+ .y1 = scissor->offset.y + scissor->extent.height
+ };
+ struct pvr_rect viewport_rect = { 0 };
+
+ assert(viewport->width >= 0.0f);
+ assert(scissor_rect.x0 >= 0);
+ assert(scissor_rect.y0 >= 0);
+
+ if (scissor->extent.width == 0 || scissor->extent.height == 0) {
+ *rect_out = (VkRect2D){ 0 };
+ return;
+ }
+
+ viewport_rect.x0 = (int32_t)viewport->x;
+ viewport_rect.x1 = (int32_t)viewport->x + (int32_t)viewport->width;
+
+ /* TODO: Is there a mathematical way of doing all this and then clamp at
+ * the end?
+ */
+ /* We flip the y0 and y1 when height is negative. */
+ viewport_rect.y0 = (int32_t)viewport->y + MIN2(0, (int32_t)viewport->height);
+ viewport_rect.y1 = (int32_t)viewport->y + MAX2(0, (int32_t)viewport->height);
+
+ if (scissor_rect.x1 <= viewport_rect.x0 ||
+ scissor_rect.y1 <= viewport_rect.y0 ||
+ scissor_rect.x0 >= viewport_rect.x1 ||
+ scissor_rect.y0 >= viewport_rect.y1) {
+ *rect_out = (VkRect2D){ 0 };
+ return;
+ }
+
+ /* Determine the overlapping rectangle. */
+ viewport_rect.x0 = MAX2(viewport_rect.x0, scissor_rect.x0);
+ viewport_rect.y0 = MAX2(viewport_rect.y0, scissor_rect.y0);
+ viewport_rect.x1 = MIN2(viewport_rect.x1, scissor_rect.x1);
+ viewport_rect.y1 = MIN2(viewport_rect.y1, scissor_rect.y1);
+
+ /* TODO: Is this conversion safe? Is this logic right? */
+ rect_out->offset.x = (uint32_t)viewport_rect.x0;
+ rect_out->offset.y = (uint32_t)viewport_rect.y0;
+ rect_out->extent.height = (uint32_t)(viewport_rect.y1 - viewport_rect.y0);
+ rect_out->extent.width = (uint32_t)(viewport_rect.x1 - viewport_rect.x0);
+}
+
+static inline uint32_t
+pvr_get_geom_region_clip_align_size(struct pvr_device_info *const dev_info)
+{
+ /* TODO: This should come from rogue_ppp.xml. */
+ return 16U + 16U * (!PVR_HAS_FEATURE(dev_info, tile_size_16x16));
+}
+
+/* FIXME: Remove device param when PVR_HAS_FEATURE() accepts const dev_info */
+static void
+pvr_setup_isp_depth_bias_scissor_state(struct pvr_cmd_buffer *const cmd_buffer)
+{
+ struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
+ struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
+ const struct pvr_dynamic_state *const dynamic_state =
+ &cmd_buffer->state.dynamic.common;
+ const struct pvr_cmd_struct(TA_STATE_ISPCTL) *const ispctl =
+ &ppp_state->isp.control_struct;
+ struct pvr_device_info *const dev_info =
+ &cmd_buffer->device->pdevice->dev_info;
+
+ if (ispctl->dbenable)
+ assert(!"Unimplemented");
+
+ if (ispctl->scenable) {
+ const uint32_t region_clip_align_size =
+ pvr_get_geom_region_clip_align_size(dev_info);
+ const VkViewport *const viewport = &dynamic_state->viewport.viewports[0];
+ const VkRect2D *const scissor = &dynamic_state->scissor.scissors[0];
+ VkRect2D overlap_rect;
+ uint32_t scissor_words[2];
+ uint32_t height;
+ uint32_t width;
+ uint32_t x;
+ uint32_t y;
+
+ /* For region clip. */
+ uint32_t bottom;
+ uint32_t right;
+ uint32_t left;
+ uint32_t top;
+
+ /* We don't support multiple viewport calculations. */
+ assert(dynamic_state->viewport.count == 1);
+ /* We don't support multiple scissor calculations. */
+ assert(dynamic_state->scissor.count == 1);
+
+ pvr_get_viewport_scissor_overlap(viewport, scissor, &overlap_rect);
+
+ x = overlap_rect.offset.x;
+ y = overlap_rect.offset.y;
+ width = overlap_rect.extent.width;
+ height = overlap_rect.extent.height;
+
+ pvr_csb_pack (&scissor_words[0], IPF_SCISSOR_WORD_0, word0) {
+ word0.scw0_xmax = x + width;
+ word0.scw0_xmin = x;
+ }
+
+ pvr_csb_pack (&scissor_words[1], IPF_SCISSOR_WORD_1, word1) {
+ word1.scw1_ymax = y + height;
+ word1.scw1_ymin = y;
+ }
+
+ if (cmd_buffer->scissor_array.size &&
+ cmd_buffer->scissor_words[0] == scissor_words[0] &&
+ cmd_buffer->scissor_words[1] == scissor_words[1]) {
+ return;
+ }
+
+ cmd_buffer->scissor_words[0] = scissor_words[0];
+ cmd_buffer->scissor_words[1] = scissor_words[1];
+
+ /* Calculate region clip. */
+
+ left = x / region_clip_align_size;
+ top = y / region_clip_align_size;
+
+ /* We prevent right=-1 with the multiplication. */
+ /* TODO: Is there a better way of doing this? */
+ if ((x + width) != 0U)
+ right = DIV_ROUND_UP(x + width, region_clip_align_size) - 1;
+ else
+ right = 0;
+
+ if ((y + height) != 0U)
+ bottom = DIV_ROUND_UP(y + height, region_clip_align_size) - 1;
+ else
+ bottom = 0U;
+
+ /* Setup region clip to clip everything outside what was calculated. */
+
+ /* FIXME: Should we mask to prevent writing over other words? */
+ pvr_csb_pack (&ppp_state->region_clipping.word0, TA_REGION_CLIP0, word0) {
+ word0.right = right;
+ word0.left = left;
+ word0.mode = PVRX(TA_REGION_CLIP_MODE_OUTSIDE);
+ }
+
+ pvr_csb_pack (&ppp_state->region_clipping.word1, TA_REGION_CLIP1, word1) {
+ word1.bottom = bottom;
+ word1.top = top;
+ }
+
+ ppp_state->depthbias_scissor_indices.scissor_index =
+ util_dynarray_num_elements(&cmd_buffer->scissor_array,
+ __typeof__(cmd_buffer->scissor_words));
+
+ memcpy(util_dynarray_grow_bytes(&cmd_buffer->scissor_array,
+ 1,
+ sizeof(cmd_buffer->scissor_words)),
+ cmd_buffer->scissor_words,
+ sizeof(cmd_buffer->scissor_words));
+
+ emit_state->isp_dbsc = true;
+ emit_state->region_clip = true;
+ }
+}
+
+static void
+pvr_setup_triangle_merging_flag(struct pvr_cmd_buffer *const cmd_buffer,
+ struct pvr_cmd_struct(TA_STATE_ISPA) * ispa)
+{
+ struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
+ struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
+ uint32_t merge_word;
+ uint32_t mask;
+
+ pvr_csb_pack (&merge_word, TA_STATE_PDS_SIZEINFO2, size_info) {
+ /* Disable for lines or punch-through or for DWD and depth compare
+ * always.
+ */
+ if (ispa->objtype == PVRX(TA_OBJTYPE_LINE) ||
+ ispa->passtype == PVRX(TA_PASSTYPE_PUNCH_THROUGH) ||
+ (ispa->dwritedisable && ispa->dcmpmode == PVRX(TA_CMPMODE_ALWAYS))) {
+ size_info.pds_tri_merge_disable = true;
+ }
+ }
+
+ pvr_csb_pack (&mask, TA_STATE_PDS_SIZEINFO2, size_info) {
+ size_info.pds_tri_merge_disable = true;
+ }
+
+ merge_word |= ppp_state->pds.size_info2 & ~mask;
+
+ if (merge_word != ppp_state->pds.size_info2) {
+ ppp_state->pds.size_info2 = merge_word;
+ emit_state->pds_fragment_stateptr0 = true;
+ }
+}
+
+/* TODO: See if this function can be improved once fully implemented. */
+static uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
+ const struct pvr_device_info *dev_info,
+ uint32_t fs_common_size,
+ uint32_t min_tiles_in_flight)
+{
+ uint32_t max_tiles_in_flight;
+ uint32_t num_allocs;
+
+ if (PVR_HAS_FEATURE(dev_info, s8xe)) {
+ num_allocs = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);
+ } else {
+ uint32_t num_phantoms = rogue_get_num_phantoms(dev_info);
+ uint32_t min_cluster_per_phantom;
+
+ if (num_phantoms > 1) {
+ pvr_finishme("Unimplemented path!!");
+ } else {
+ min_cluster_per_phantom =
+ PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);
+ }
+
+ if (num_phantoms > 1)
+ pvr_finishme("Unimplemented path!!");
+
+ if (num_phantoms > 2)
+ pvr_finishme("Unimplemented path!!");
+
+ if (num_phantoms > 3)
+ pvr_finishme("Unimplemented path!!");
+
+ if (min_cluster_per_phantom >= 4)
+ num_allocs = 1;
+ else if (min_cluster_per_phantom == 2)
+ num_allocs = 2;
+ else
+ num_allocs = 4;
+ }
+
+ max_tiles_in_flight =
+ PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
+
+ if (fs_common_size == UINT_MAX) {
+ uint32_t max_common_size;
+
+ num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);
+
+ if (!PVR_HAS_ERN(dev_info, 38748)) {
+ /* Hardware needs space for one extra shared allocation. */
+ num_allocs += 1;
+ }
+
+ max_common_size = rogue_get_reserved_shared_size(dev_info) -
+ rogue_get_max_coeffs(dev_info);
+
+ /* Double resource requirements to deal with fragmentation. */
+ max_common_size /= num_allocs * 2;
+ max_common_size =
+ ROUND_DOWN_TO(max_common_size,
+ PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
+
+ return max_common_size;
+ } else if (fs_common_size == 0) {
+ return max_tiles_in_flight;
+ }
+
+ pvr_finishme("Unimplemented path!!");
+
+ return 0;
+}
+
+static void
+pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer)
+{
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+ const struct pvr_stage_allocation_uniform_state *uniform_shader_state =
+ &state->gfx_pipeline->fragment_shader_state.uniform_state;
+ const struct pvr_pds_upload *pds_coeff_program =
+ &state->gfx_pipeline->fragment_shader_state.pds_coeff_program;
+ const struct pvr_pipeline_stage_state *fragment_state =
+ &state->gfx_pipeline->fragment_shader_state.stage_state;
+ struct pvr_device_info *const dev_info =
+ &cmd_buffer->device->pdevice->dev_info;
+ struct pvr_emit_state *const emit_state = &state->emit_state;
+ struct pvr_ppp_state *const ppp_state = &state->ppp_state;
+ struct pvr_sub_cmd *sub_cmd = state->current_sub_cmd;
+
+ const uint32_t pds_uniform_size =
+ DIV_ROUND_UP(uniform_shader_state->pds_info.data_size_in_dwords,
+ PVRX(TA_STATE_PDS_SIZEINFO1_PDS_UNIFORMSIZE_UNIT_SIZE));
+
+ const uint32_t pds_varying_state_size =
+ DIV_ROUND_UP(pds_coeff_program->data_size,
+ PVRX(TA_STATE_PDS_SIZEINFO1_PDS_VARYINGSIZE_UNIT_SIZE));
+
+ const uint32_t usc_varying_size =
+ DIV_ROUND_UP(fragment_state->coefficient_size,
+ PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE));
+
+ const uint32_t pds_temp_size =
+ DIV_ROUND_UP(fragment_state->temps_count,
+ PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE));
+
+ const uint32_t usc_shared_size =
+ DIV_ROUND_UP(fragment_state->const_shared_reg_count,
+ PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
+
+ const uint32_t max_tiles_in_flight =
+ pvr_calc_fscommon_size_and_tiles_in_flight(
+ dev_info,
+ usc_shared_size *
+ PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE),
+ 1);
+ uint32_t size_info_mask;
+ uint32_t size_info2;
+
+ if (max_tiles_in_flight < sub_cmd->gfx.max_tiles_in_flight)
+ sub_cmd->gfx.max_tiles_in_flight = max_tiles_in_flight;
+
+ pvr_csb_pack (&ppp_state->pds.pixel_shader_base,
+ TA_STATE_PDS_SHADERBASE,
+ shader_base) {
+ const struct pvr_pds_upload *const pds_upload =
+ &state->gfx_pipeline->fragment_shader_state.pds_fragment_program;
+
+ shader_base.addr.addr = pds_upload->data_offset;
+ }
+
+ if (uniform_shader_state->pds_code.pvr_bo) {
+ pvr_csb_pack (&ppp_state->pds.texture_uniform_code_base,
+ TA_STATE_PDS_TEXUNICODEBASE,
+ tex_base) {
+ tex_base.addr.addr = uniform_shader_state->pds_code.code_offset;
+ }
+ } else {
+ ppp_state->pds.texture_uniform_code_base = 0U;
+ }
+
+ pvr_csb_pack (&ppp_state->pds.size_info1, TA_STATE_PDS_SIZEINFO1, info1) {
+ info1.pds_uniformsize = pds_uniform_size;
+ info1.pds_texturestatesize = 0U;
+ info1.pds_varyingsize = pds_varying_state_size;
+ info1.usc_varyingsize = usc_varying_size;
+ info1.pds_tempsize = pds_temp_size;
+ }
+
+ pvr_csb_pack (&size_info_mask, TA_STATE_PDS_SIZEINFO2, mask) {
+ mask.pds_tri_merge_disable = true;
+ }
+
+ ppp_state->pds.size_info2 &= size_info_mask;
+
+ pvr_csb_pack (&size_info2, TA_STATE_PDS_SIZEINFO2, info2) {
+ info2.usc_sharedsize = usc_shared_size;
+ }
+
+ ppp_state->pds.size_info2 |= size_info2;
+
+ if (pds_coeff_program->pvr_bo) {
+ state->emit_state.pds_fragment_stateptr1 = true;
+
+ pvr_csb_pack (&ppp_state->pds.varying_base,
+ TA_STATE_PDS_VARYINGBASE,
+ base) {
+ base.addr.addr = pds_coeff_program->data_offset;
+ }
+ } else {
+ ppp_state->pds.varying_base = 0U;
+ }
+
+ pvr_csb_pack (&ppp_state->pds.uniform_state_data_base,
+ TA_STATE_PDS_UNIFORMDATABASE,
+ base) {
+ base.addr.addr = state->pds_fragment_uniform_data_offset;
+ }
+
+ emit_state->pds_fragment_stateptr0 = true;
+ emit_state->pds_fragment_stateptr3 = true;
+}
+
+static void pvr_setup_viewport(struct pvr_cmd_buffer *const cmd_buffer)
+{
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+ struct pvr_emit_state *const emit_state = &state->emit_state;
+ struct pvr_ppp_state *const ppp_state = &state->ppp_state;
+
+ if (ppp_state->viewport_count != state->dynamic.common.viewport.count) {
+ ppp_state->viewport_count = state->dynamic.common.viewport.count;
+ emit_state->viewport = true;
+ }
+
+ if (state->gfx_pipeline->raster_state.discard_enable) {
+ /* We don't want to emit any viewport data as it'll just get thrown
+ * away. It's after the previous condition because we still want to
+ * stash the viewport_count as it's our trigger for when
+ * rasterizer discard gets disabled.
+ */
+ emit_state->viewport = false;
+ return;
+ }
+
+ for (uint32_t i = 0; i < ppp_state->viewport_count; i++) {
+ VkViewport *viewport = &state->dynamic.common.viewport.viewports[i];
+ uint32_t x_scale = fui(viewport->width * 0.5f);
+ uint32_t y_scale = fui(viewport->height * 0.5f);
+ uint32_t z_scale = fui(viewport->maxDepth - viewport->minDepth);
+ uint32_t x_center = fui(viewport->x + viewport->width * 0.5f);
+ uint32_t y_center = fui(viewport->y + viewport->height * 0.5f);
+ uint32_t z_center = fui(viewport->minDepth);
+
+ if (ppp_state->viewports[i].a0 != x_center ||
+ ppp_state->viewports[i].m0 != x_scale ||
+ ppp_state->viewports[i].a1 != y_center ||
+ ppp_state->viewports[i].m1 != y_scale ||
+ ppp_state->viewports[i].a2 != z_center ||
+ ppp_state->viewports[i].m2 != z_scale) {
+ ppp_state->viewports[i].a0 = x_center;
+ ppp_state->viewports[i].m0 = x_scale;
+ ppp_state->viewports[i].a1 = y_center;
+ ppp_state->viewports[i].m1 = y_scale;
+ ppp_state->viewports[i].a2 = z_center;
+ ppp_state->viewports[i].m2 = z_scale;
+
+ emit_state->viewport = true;
+ }
+ }
+}
+
+static void pvr_setup_ppp_control(struct pvr_cmd_buffer *const cmd_buffer)
+{
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+ const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
+ struct pvr_emit_state *const emit_state = &state->emit_state;
+ struct pvr_ppp_state *const ppp_state = &state->ppp_state;
+ uint32_t ppp_control;
+
+ pvr_csb_pack (&ppp_control, TA_STATE_PPP_CTRL, control) {
+ const struct pvr_raster_state *raster_state = &gfx_pipeline->raster_state;
+ VkPrimitiveTopology topology = gfx_pipeline->input_asm_state.topology;
+ control.drawclippededges = true;
+ control.wclampen = true;
+
+ if (topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN)
+ control.flatshade_vtx = PVRX(TA_FLATSHADE_VTX_VERTEX_1);
+ else
+ control.flatshade_vtx = PVRX(TA_FLATSHADE_VTX_VERTEX_0);
+
+ if (raster_state->depth_clamp_enable)
+ control.clip_mode = PVRX(TA_CLIP_MODE_NO_FRONT_OR_REAR);
+ else
+ control.clip_mode = PVRX(TA_CLIP_MODE_FRONT_REAR);
+
+ /* +--- FrontIsCCW?
+ * | +--- Cull Front?
+ * v v
+ * 0|0 CULLMODE_CULL_CCW,
+ * 0|1 CULLMODE_CULL_CW,
+ * 1|0 CULLMODE_CULL_CW,
+ * 1|1 CULLMODE_CULL_CCW,
+ */
+ switch (raster_state->cull_mode) {
+ case VK_CULL_MODE_BACK_BIT:
+ case VK_CULL_MODE_FRONT_BIT:
+ if ((raster_state->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE) ^
+ (raster_state->cull_mode == VK_CULL_MODE_FRONT_BIT)) {
+ control.cullmode = PVRX(TA_CULLMODE_CULL_CW);
+ } else {
+ control.cullmode = PVRX(TA_CULLMODE_CULL_CCW);
+ }
+
+ break;
+
+ case VK_CULL_MODE_NONE:
+ control.cullmode = PVRX(TA_CULLMODE_NO_CULLING);
+ break;
+
+ default:
+ unreachable("Unsupported cull mode!");
+ }
+ }
+
+ if (ppp_control != ppp_state->ppp_control) {
+ ppp_state->ppp_control = ppp_control;
+ emit_state->ppp_control = true;
+ }
+}
+
+/* Largest valid PPP State update in words = 31
+ * 1 - Header
+ * 3 - Stream Out Config words 0, 1 and 2
+ * 1 - PPP Control word
+ * 3 - Varying Config words 0, 1 and 2
+ * 1 - Output Select
+ * 1 - WClamp
+ * 6 - Viewport Transform words
+ * 2 - Region Clip words
+ * 3 - PDS State for fragment phase (PDSSTATEPTR 1-3)
+ * 4 - PDS State for fragment phase (PDSSTATEPTR0)
+ * 6 - ISP Control Words
+ */
+#define PVR_MAX_PPP_STATE_DWORDS 31
+
+static VkResult pvr_emit_ppp_state(struct pvr_cmd_buffer *const cmd_buffer)
+{
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+ struct pvr_emit_state *const emit_state = &state->emit_state;
+ struct pvr_ppp_state *const ppp_state = &state->ppp_state;
+ struct pvr_csb *const control_stream =
+ &state->current_sub_cmd->gfx.control_stream;
+ uint32_t ppp_state_words[PVR_MAX_PPP_STATE_DWORDS];
+ uint32_t ppp_state_words_count;
+ uint32_t ppp_state_header;
+ bool deferred_secondary;
+ struct pvr_bo *pvr_bo;
+ uint32_t *buffer_ptr;
+ VkResult result;
+
+ buffer_ptr = ppp_state_words;
+
+ pvr_csb_pack (&ppp_state_header, TA_STATE_HEADER, header) {
+ header.view_port_count = (ppp_state->viewport_count == 0)
+ ? 0U
+ : (ppp_state->viewport_count - 1);
+
+ /* Skip over header. */
+ buffer_ptr++;
+
+ /* Set ISP state. */
+ if (emit_state->isp) {
+ header.pres_ispctl = true;
+ *buffer_ptr++ = ppp_state->isp.control;
+ header.pres_ispctl_fa = true;
+ *buffer_ptr++ = ppp_state->isp.front_a;
+
+ if (emit_state->isp_fb) {
+ header.pres_ispctl_fb = true;
+ *buffer_ptr++ = ppp_state->isp.front_b;
+ }
+
+ if (emit_state->isp_ba) {
+ header.pres_ispctl_ba = true;
+ *buffer_ptr++ = ppp_state->isp.back_a;
+ }
+
+ if (emit_state->isp_bb) {
+ header.pres_ispctl_bb = true;
+ *buffer_ptr++ = ppp_state->isp.back_b;
+ }
+ }
+
+ /* Depth bias / scissor
+ * If deferred_secondary is true then we do a separate state update
+ * which gets patched in ExecuteDeferredCommandBuffer.
+ */
+ /* TODO: Update above comment when we port ExecuteDeferredCommandBuffer.
+ */
+ deferred_secondary =
+ cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
+ cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
+
+ if (emit_state->isp_dbsc && !deferred_secondary) {
+ header.pres_ispctl_dbsc = true;
+
+ pvr_csb_pack (buffer_ptr++, TA_STATE_ISPDBSC, ispdbsc) {
+ ispdbsc.dbindex =
+ ppp_state->depthbias_scissor_indices.depthbias_index;
+ ispdbsc.scindex =
+ ppp_state->depthbias_scissor_indices.scissor_index;
+ }
+ }
+
+ /* PDS state. */
+ if (emit_state->pds_fragment_stateptr0) {
+ header.pres_pds_state_ptr0 = true;
+
+ *buffer_ptr++ = ppp_state->pds.pixel_shader_base;
+ *buffer_ptr++ = ppp_state->pds.texture_uniform_code_base;
+ *buffer_ptr++ = ppp_state->pds.size_info1;
+ *buffer_ptr++ = ppp_state->pds.size_info2;
+ }
+
+ if (emit_state->pds_fragment_stateptr1) {
+ header.pres_pds_state_ptr1 = true;
+ *buffer_ptr++ = ppp_state->pds.varying_base;
+ }
+
+ /* We don't use the pds_fragment_stateptr2 (texture state programs)
+ * control word, but this doesn't mean we need to set it to 0. This is
+ * because the hardware runs the texture state program only when the
+ * pds_texture state field of PDS_SIZEINFO1 is non-zero.
+ */
+
+ if (emit_state->pds_fragment_stateptr3) {
+ header.pres_pds_state_ptr3 = true;
+ *buffer_ptr++ = ppp_state->pds.uniform_state_data_base;
+ }
+
+ /* Region clip. */
+ if (emit_state->region_clip) {
+ header.pres_region_clip = true;
+ *buffer_ptr++ = ppp_state->region_clipping.word0;
+ *buffer_ptr++ = ppp_state->region_clipping.word1;
+ }
+
+ /* Viewport. */
+ if (emit_state->viewport) {
+ const uint32_t viewports = MAX2(1, ppp_state->viewport_count);
+
+ header.pres_viewport = true;
+ for (uint32_t i = 0; i < viewports; i++) {
+ *buffer_ptr++ = ppp_state->viewports[i].a0;
+ *buffer_ptr++ = ppp_state->viewports[i].m0;
+ *buffer_ptr++ = ppp_state->viewports[i].a1;
+ *buffer_ptr++ = ppp_state->viewports[i].m1;
+ *buffer_ptr++ = ppp_state->viewports[i].a2;
+ *buffer_ptr++ = ppp_state->viewports[i].m2;
+ }
+ }
+
+ /* W clamp. */
+ if (emit_state->wclamp) {
+ const float wclamp = 0.00001f;
+
+ header.pres_wclamp = true;
+ *buffer_ptr++ = fui(wclamp);
+ }
+
+ /* Output selects. */
+ if (emit_state->output_selects) {
+ header.pres_outselects = true;
+ *buffer_ptr++ = ppp_state->output_selects;
+ }
+
+ /* Varying words. */
+ if (emit_state->varying_word0) {
+ header.pres_varying_word0 = true;
+ *buffer_ptr++ = ppp_state->varying_word[0];
+ }
+
+ if (emit_state->varying_word1) {
+ header.pres_varying_word1 = true;
+ *buffer_ptr++ = ppp_state->varying_word[1];
+ }
+
+ if (emit_state->varying_word2) {
+ /* We only emit this on the first draw of a render job to prevent us
+ * from inheriting a non-zero value set elsewhere.
+ */
+ header.pres_varying_word2 = true;
+ *buffer_ptr++ = 0;
+ }
+
+ /* PPP control. */
+ if (emit_state->ppp_control) {
+ header.pres_ppp_ctrl = true;
+ *buffer_ptr++ = ppp_state->ppp_control;
+ }
+
+ if (emit_state->stream_out) {
+ /* We only emit this on the first draw of a render job to prevent us
+ * from inheriting a non-zero value set elsewhere.
+ */
+ header.pres_stream_out_size = true;
+ *buffer_ptr++ = 0;
+ }
+ }
+
+ if (!ppp_state_header)
+ return VK_SUCCESS;
+
+ ppp_state_words_count = buffer_ptr - ppp_state_words;
+ ppp_state_words[0] = ppp_state_header;
+
+ result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
+ cmd_buffer->device->heaps.general_heap,
+ ppp_state_words_count * sizeof(uint32_t),
+ PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+ &pvr_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ memcpy(pvr_bo->bo->map,
+ ppp_state_words,
+ ppp_state_words_count * sizeof(uint32_t));
+
+ /* Write the VDM state update into the VDM control stream. */
+ pvr_csb_emit (control_stream, VDMCTRL_PPP_STATE0, state0) {
+ state0.word_count = ppp_state_words_count;
+ state0.addrmsb = pvr_bo->vma->dev_addr;
+ }
+
+ pvr_csb_emit (control_stream, VDMCTRL_PPP_STATE1, state1) {
+ state1.addrlsb = pvr_bo->vma->dev_addr;
+ }
+
+ if (emit_state->isp_dbsc &&
+ cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
+ pvr_finishme("Unimplemented path!!");
+ }
+
+ state->emit_state_bits = 0;
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+pvr_emit_dirty_ppp_state(struct pvr_cmd_buffer *const cmd_buffer)
+{
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+ const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
+ const bool dirty_stencil = state->dirty.compare_mask ||
+ state->dirty.write_mask || state->dirty.reference;
+ VkResult result;
+
+ if (!(dirty_stencil || state->dirty.depth_bias ||
+ state->dirty.fragment_descriptors || state->dirty.line_width ||
+ state->dirty.gfx_pipeline_binding || state->dirty.scissor ||
+ state->dirty.userpass_spawn || state->dirty.viewport ||
+ state->emit_state_bits)) {
+ return VK_SUCCESS;
+ }
+
+ if (state->dirty.gfx_pipeline_binding) {
+ struct pvr_cmd_struct(TA_STATE_ISPA) ispa;
+
+ pvr_setup_output_select(cmd_buffer);
+ pvr_setup_isp_faces_and_control(cmd_buffer, &ispa);
+ pvr_setup_triangle_merging_flag(cmd_buffer, &ispa);
+ } else if (dirty_stencil || state->dirty.line_width ||
+ state->dirty.userpass_spawn) {
+ pvr_setup_isp_faces_and_control(cmd_buffer, NULL);
+ }
+
+ if (!gfx_pipeline->raster_state.discard_enable &&
+ state->dirty.fragment_descriptors &&
+ gfx_pipeline->fragment_shader_state.bo) {
+ pvr_setup_fragment_state_pointers(cmd_buffer);
+ }
+
+ pvr_setup_isp_depth_bias_scissor_state(cmd_buffer);
+
+ if (state->dirty.viewport)
+ pvr_setup_viewport(cmd_buffer);
+
+ pvr_setup_ppp_control(cmd_buffer);
+
+ if (gfx_pipeline->raster_state.cull_mode == VK_CULL_MODE_FRONT_AND_BACK) {
+ /* FIXME: Port SetNegativeViewport(). */
+ }
+
+ result = pvr_emit_ppp_state(cmd_buffer);
+ if (result != VK_SUCCESS)
+ return result;
+
+ return VK_SUCCESS;
+}
+
+static void
+pvr_validate_push_descriptors(struct pvr_cmd_buffer *cmd_buffer,
+ bool *const push_descriptors_dirty_out)
+{
+ /* TODO: Implement this function, based on ValidatePushDescriptors. */
+ pvr_finishme("Add support for push descriptors!");
+ *push_descriptors_dirty_out = false;
+}
+
+static void
+pvr_calculate_vertex_cam_size(const struct pvr_device_info *dev_info,
+ const uint32_t vs_output_size,
+ const bool raster_enable,
+ uint32_t *const cam_size_out,
+ uint32_t *const vs_max_instances_out)
+{
+ /* First work out the size of a vertex in the UVS and multiply by 4 for
+ * column ordering.
+ */
+ const uint32_t uvs_vertex_vector_size_in_dwords =
+ (vs_output_size + 1U + raster_enable * 4U) * 4U;
+ const uint32_t vdm_cam_size =
+ PVR_GET_FEATURE_VALUE(dev_info, vdm_cam_size, 32U);
+
+ /* This is a proxy for 8XE. */
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
+ vdm_cam_size < 96U) {
+ /* Comparisons are based on size including scratch per vertex vector. */
+ if (uvs_vertex_vector_size_in_dwords < (14U * 4U)) {
+ *cam_size_out = MIN2(31U, vdm_cam_size - 1U);
+ *vs_max_instances_out = 16U;
+ } else if (uvs_vertex_vector_size_in_dwords < (20U * 4U)) {
+ *cam_size_out = 15U;
+ *vs_max_instances_out = 16U;
+ } else if (uvs_vertex_vector_size_in_dwords < (28U * 4U)) {
+ *cam_size_out = 11U;
+ *vs_max_instances_out = 12U;
+ } else if (uvs_vertex_vector_size_in_dwords < (44U * 4U)) {
+ *cam_size_out = 7U;
+ *vs_max_instances_out = 8U;
+ } else if (PVR_HAS_FEATURE(dev_info,
+ simple_internal_parameter_format_v2) ||
+ uvs_vertex_vector_size_in_dwords < (64U * 4U)) {
+ *cam_size_out = 7U;
+ *vs_max_instances_out = 4U;
+ } else {
+ *cam_size_out = 3U;
+ *vs_max_instances_out = 2U;
+ }
+ } else {
+ /* Comparisons are based on size including scratch per vertex vector. */
+ if (uvs_vertex_vector_size_in_dwords <= (32U * 4U)) {
+ /* output size <= 27 + 5 scratch. */
+ *cam_size_out = MIN2(95U, vdm_cam_size - 1U);
+ *vs_max_instances_out = 0U;
+ } else if (uvs_vertex_vector_size_in_dwords <= 48U * 4U) {
+ /* output size <= 43 + 5 scratch */
+ *cam_size_out = 63U;
+ if (PVR_GET_FEATURE_VALUE(dev_info, uvs_vtx_entries, 144U) < 288U)
+ *vs_max_instances_out = 16U;
+ else
+ *vs_max_instances_out = 0U;
+ } else if (uvs_vertex_vector_size_in_dwords <= 64U * 4U) {
+ /* output size <= 59 + 5 scratch. */
+ *cam_size_out = 31U;
+ if (PVR_GET_FEATURE_VALUE(dev_info, uvs_vtx_entries, 144U) < 288U)
+ *vs_max_instances_out = 16U;
+ else
+ *vs_max_instances_out = 0U;
+ } else {
+ *cam_size_out = 15U;
+ *vs_max_instances_out = 16U;
+ }
+ }
+}
+
+static void
+pvr_emit_dirty_vdm_state(const struct pvr_cmd_buffer *const cmd_buffer)
+{
+ /* FIXME: Assume all state is dirty for the moment. */
+ struct pvr_device_info *const dev_info =
+ &cmd_buffer->device->pdevice->dev_info;
+ ASSERTED const uint32_t max_user_vertex_output_components =
+ pvr_get_max_user_vertex_output_components(dev_info);
+ struct pvr_cmd_struct(VDMCTRL_VDM_STATE0)
+ header = { pvr_cmd_header(VDMCTRL_VDM_STATE0) };
+ const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+ const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
+ struct pvr_csb *const csb = &state->current_sub_cmd->gfx.control_stream;
+ uint32_t vs_output_size;
+ uint32_t max_instances;
+ uint32_t cam_size;
+
+ assert(gfx_pipeline);
+
+ /* CAM Calculations and HW state take vertex size aligned to DWORDS. */
+ vs_output_size =
+ DIV_ROUND_UP(gfx_pipeline->vertex_shader_state.vertex_output_size,
+ PVRX(VDMCTRL_VDM_STATE4_VS_OUTPUT_SIZE_UNIT_SIZE));
+
+ assert(vs_output_size <= max_user_vertex_output_components);
+
+ pvr_calculate_vertex_cam_size(dev_info,
+ vs_output_size,
+ true,
+ &cam_size,
+ &max_instances);
+
+ pvr_csb_emit (csb, VDMCTRL_VDM_STATE0, state0) {
+ state0.cam_size = cam_size;
+
+ if (gfx_pipeline->input_asm_state.primitive_restart) {
+ state0.cut_index_enable = true;
+ state0.cut_index_present = true;
+ }
+
+ switch (gfx_pipeline->input_asm_state.topology) {
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+ state0.flatshade_control = PVRX(VDMCTRL_FLATSHADE_CONTROL_VERTEX_1);
+ break;
+
+ default:
+ state0.flatshade_control = PVRX(VDMCTRL_FLATSHADE_CONTROL_VERTEX_0);
+ break;
+ }
+
+ /* If we've bound a different vertex buffer, or this draw-call requires
+ * a different PDS attrib data-section from the last draw call (changed
+ * base_instance) then we need to specify a new data section. This is
+ * also the case if we've switched pipeline or attrib program as the
+ * data-section layout will be different.
+ */
+ state0.vs_data_addr_present =
+ state->dirty.gfx_pipeline_binding || state->dirty.vertex_bindings ||
+ state->dirty.draw_base_instance || state->dirty.draw_variant;
+
+ /* Need to specify new PDS Attrib program if we've bound a different
+ * pipeline or we needed a different PDS Attrib variant for this
+ * draw-call.
+ */
+ state0.vs_other_present = state->dirty.gfx_pipeline_binding ||
+ state->dirty.draw_variant;
+
+ /* UVB_SCRATCH_SELECT_ONE with no rasterization is only valid when
+ * stream output is enabled. We use UVB_SCRATCH_SELECT_FIVE because
+ * Vulkan doesn't support stream output and the vertex position is
+ * always emitted to the UVB.
+ */
+ state0.uvs_scratch_size_select =
+ PVRX(VDMCTRL_UVS_SCRATCH_SIZE_SELECT_FIVE);
+
+ header = state0;
+ }
+
+ if (header.cut_index_present) {
+ pvr_csb_emit (csb, VDMCTRL_VDM_STATE1, state1) {
+ switch (state->index_buffer_binding.type) {
+ case VK_INDEX_TYPE_UINT32:
+ /* FIXME: Defines for these? These seem to come from the Vulkan
+ * spec. for VkPipelineInputAssemblyStateCreateInfo
+ * primitiveRestartEnable.
+ */
+ state1.cut_index = 0xFFFFFFFF;
+ break;
+
+ case VK_INDEX_TYPE_UINT16:
+ state1.cut_index = 0xFFFF;
+ break;
+
+ default:
+ unreachable(!"Invalid index type");
+ }
+ }
+ }
+
+ if (header.vs_data_addr_present) {
+ pvr_csb_emit (csb, VDMCTRL_VDM_STATE2, state2) {
+ state2.vs_pds_data_base_addr.addr = state->pds_vertex_attrib_offset;
+ }
+ }
+
+ if (header.vs_other_present) {
+ const uint32_t usc_unified_store_size_in_bytes =
+ gfx_pipeline->vertex_shader_state.vertex_input_size << 2;
+
+ pvr_csb_emit (csb, VDMCTRL_VDM_STATE3, state3) {
+ state3.vs_pds_code_base_addr.addr = state->pds_shader.code_offset;
+ }
+
+ pvr_csb_emit (csb, VDMCTRL_VDM_STATE4, state4) {
+ state4.vs_output_size = vs_output_size;
+ }
+
+ pvr_csb_emit (csb, VDMCTRL_VDM_STATE5, state5) {
+ state5.vs_max_instances = max_instances;
+ state5.vs_usc_common_size = 0U;
+ state5.vs_usc_unified_size = DIV_ROUND_UP(
+ usc_unified_store_size_in_bytes,
+ PVRX(VDMCTRL_VDM_STATE5_VS_USC_UNIFIED_SIZE_UNIT_SIZE));
+ state5.vs_pds_temp_size =
+ DIV_ROUND_UP(state->pds_shader.info->temps_required << 2,
+ PVRX(VDMCTRL_VDM_STATE5_VS_PDS_TEMP_SIZE_UNIT_SIZE));
+ state5.vs_pds_data_size =
+ DIV_ROUND_UP(state->pds_shader.info->data_size_in_dwords << 2,
+ PVRX(VDMCTRL_VDM_STATE5_VS_PDS_DATA_SIZE_UNIT_SIZE));
+ }
+ }
+}
+
+static VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer)
+{
+ struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+ const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
+ const struct pvr_pipeline_stage_state *const fragment_state =
+ &gfx_pipeline->fragment_shader_state.stage_state;
+ struct pvr_sub_cmd *sub_cmd;
+ bool fstencil_writemask_zero;
+ bool bstencil_writemask_zero;
+ bool push_descriptors_dirty;
+ bool fstencil_keep;
+ bool bstencil_keep;
+ VkResult result;
+
+ pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS);
+
+ sub_cmd = state->current_sub_cmd;
+ sub_cmd->gfx.empty_cmd = false;
+
+ /* Determine pipeline depth/stencil usage. If a pipeline uses depth or
+ * stencil testing, those attachments are using their loaded values, and
+ * the loadOps cannot be optimized out.
+ */
+ /* Pipeline uses depth testing. */
+ if (sub_cmd->gfx.depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED &&
+ gfx_pipeline->depth_compare_op != VK_COMPARE_OP_ALWAYS) {
+ sub_cmd->gfx.depth_usage = PVR_DEPTH_STENCIL_USAGE_NEEDED;
+ }
+
+ /* Pipeline uses stencil testing. */
+ if (sub_cmd->gfx.stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED &&
+ (gfx_pipeline->stencil_front.compare_op != VK_COMPARE_OP_ALWAYS ||
+ gfx_pipeline->stencil_back.compare_op != VK_COMPARE_OP_ALWAYS)) {
+ sub_cmd->gfx.stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEEDED;
+ }
+
+ if (PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info,
+ compute_overlap)) {
+ uint32_t coefficient_size =
+ DIV_ROUND_UP(fragment_state->coefficient_size,
+ PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE));
+
+ if (coefficient_size >
+ PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_MAX_SIZE))
+ sub_cmd->gfx.disable_compute_overlap = true;
+ }
+
+ sub_cmd->gfx.frag_uses_atomic_ops |= fragment_state->uses_atomic_ops;
+ sub_cmd->gfx.frag_has_side_effects |= fragment_state->has_side_effects;
+ sub_cmd->gfx.frag_uses_texture_rw |= fragment_state->uses_texture_rw;
+ sub_cmd->gfx.vertex_uses_texture_rw |=
+ gfx_pipeline->vertex_shader_state.stage_state.uses_texture_rw;
+
+ fstencil_keep =
+ (gfx_pipeline->stencil_front.fail_op == VK_STENCIL_OP_KEEP) &&
+ (gfx_pipeline->stencil_front.pass_op == VK_STENCIL_OP_KEEP);
+ bstencil_keep = (gfx_pipeline->stencil_back.fail_op == VK_STENCIL_OP_KEEP) &&
+ (gfx_pipeline->stencil_back.pass_op == VK_STENCIL_OP_KEEP);
+ fstencil_writemask_zero = (state->dynamic.common.write_mask.front == 0);
+ bstencil_writemask_zero = (state->dynamic.common.write_mask.back == 0);
+
+ /* Set stencil modified flag if:
+ * - Neither front nor back-facing stencil has a fail_op/pass_op of KEEP.
+ * - Neither front nor back-facing stencil has a write_mask of zero.
+ */
+ if (!(fstencil_keep && bstencil_keep) &&
+ !(fstencil_writemask_zero && bstencil_writemask_zero)) {
+ sub_cmd->gfx.modifies_stencil = true;
+ }
+
+ /* Set depth modified flag if depth write is enabled. */
+ if (!gfx_pipeline->depth_write_disable)
+ sub_cmd->gfx.modifies_depth = true;
+
+ /* If either the data or code changes for pds vertex attribs, regenerate the
+ * data segment.
+ */
+ if (state->dirty.vertex_bindings || state->dirty.gfx_pipeline_binding ||
+ state->dirty.draw_variant || state->dirty.draw_base_instance) {
+ enum pvr_pds_vertex_attrib_program_type prog_type;
+ const struct pvr_pds_attrib_program *program;
+
+ if (state->draw_state.draw_indirect)
+ prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT;
+ else if (state->draw_state.base_instance)
+ prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE;
+ else
+ prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC;
+
+ program =
+ &gfx_pipeline->vertex_shader_state.pds_attrib_programs[prog_type];
+ state->pds_shader.info = &program->info;
+ state->pds_shader.code_offset = program->program.code_offset;
+
+ state->max_shared_regs =
+ MAX2(state->max_shared_regs, pvr_calc_shared_regs_count(gfx_pipeline));
+
+ pvr_setup_vertex_buffers(cmd_buffer, gfx_pipeline);
+ }
+
+ /* TODO: Check for dirty push constants */
+
+ pvr_validate_push_descriptors(cmd_buffer, &push_descriptors_dirty);
+
+ state->dirty.vertex_descriptors = push_descriptors_dirty ||
+ state->dirty.gfx_pipeline_binding;
+ state->dirty.fragment_descriptors = state->dirty.vertex_descriptors;
+
+ if (state->dirty.fragment_descriptors) {
+ result = pvr_setup_descriptor_mappings(
+ cmd_buffer,
+ PVR_STAGE_ALLOCATION_FRAGMENT,
+ &state->gfx_pipeline->fragment_shader_state.uniform_state,
+ &state->pds_fragment_uniform_data_offset);
+ if (result != VK_SUCCESS) {
+ mesa_loge("Could not setup fragment descriptor mappings.");
+ return result;
+ }
+ }
+
+ if (state->dirty.vertex_descriptors) {
+ uint32_t pds_vertex_uniform_data_offset;
+
+ result = pvr_setup_descriptor_mappings(
+ cmd_buffer,
+ PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY,
+ &state->gfx_pipeline->vertex_shader_state.uniform_state,
+ &pds_vertex_uniform_data_offset);
+ if (result != VK_SUCCESS) {
+ mesa_loge("Could not setup vertex descriptor mappings.");
+ return result;
+ }
+
+ pvr_emit_dirty_pds_state(cmd_buffer, pds_vertex_uniform_data_offset);
+ }
+
+ pvr_emit_dirty_ppp_state(cmd_buffer);
+ pvr_emit_dirty_vdm_state(cmd_buffer);
+
+ state->dirty.gfx_desc_dirty = false;
+ state->dirty.blend_constants = false;
+ state->dirty.compare_mask = false;
+ state->dirty.depth_bias = false;
+ state->dirty.draw_base_instance = false;
+ state->dirty.draw_variant = false;
+ state->dirty.fragment_descriptors = false;
+ state->dirty.line_width = false;
+ state->dirty.gfx_pipeline_binding = false;
+ state->dirty.reference = false;
+ state->dirty.scissor = false;
+ state->dirty.userpass_spawn = false;
+ state->dirty.vertex_bindings = false;
+ state->dirty.viewport = false;
+ state->dirty.write_mask = false;
+
+ return VK_SUCCESS;
+}
+
+static uint32_t pvr_get_hw_primitive_topology(VkPrimitiveTopology topology)
+{
+ switch (topology) {
+ case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
+ return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_POINT_LIST);
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
+ return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_LIST);
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
+ return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_STRIP);
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
+ return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_LIST);
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
+ return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_STRIP);
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+ return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_FAN);
+ case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+ return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ);
+ case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+ return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ);
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+ return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_LIST_ADJ);
+ case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+ return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_STRIP_ADJ);
+ case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
+ return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_PATCH_LIST);
+ default:
+ unreachable("Undefined primitive topology");
+ }
+}
+
+static void pvr_emit_vdm_index_list(struct pvr_cmd_buffer *cmd_buffer,
+ VkPrimitiveTopology topology,
+ uint32_t first_vertex,
+ uint32_t vertex_count,
+ uint32_t first_index,
+ uint32_t index_count,
+ uint32_t instance_count)
+{
+ struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+ struct pvr_csb *const csb = &state->current_sub_cmd->gfx.control_stream;
+ struct pvr_cmd_struct(VDMCTRL_INDEX_LIST0)
+ list_hdr = { pvr_cmd_header(VDMCTRL_INDEX_LIST0) };
+ pvr_dev_addr_t index_buffer_addr = { 0 };
+ unsigned int index_stride = 0;
+
+ pvr_csb_emit (csb, VDMCTRL_INDEX_LIST0, list0) {
+ list0.primitive_topology = pvr_get_hw_primitive_topology(topology);
+
+ /* First instance is not handled in the VDM state, it's implemented as
+ * an addition in the PDS vertex fetch.
+ */
+ list0.index_count_present = true;
+
+ if (instance_count > 1)
+ list0.index_instance_count_present = true;
+
+ if (first_vertex != 0)
+ list0.index_offset_present = true;
+
+ if (state->draw_state.draw_indexed) {
+ struct pvr_buffer *buffer = state->index_buffer_binding.buffer;
+
+ switch (state->index_buffer_binding.type) {
+ default:
+ unreachable("Invalid index type");
+ FALLTHROUGH;
+
+ case VK_INDEX_TYPE_UINT32:
+ list0.index_size = PVRX(VDMCTRL_INDEX_SIZE_B32);
+ index_stride = 4;
+ break;
+
+ case VK_INDEX_TYPE_UINT16:
+ list0.index_size = PVRX(VDMCTRL_INDEX_SIZE_B16);
+ index_stride = 2;
+ break;
+ }
+
+ list0.index_addr_present = true;
+ index_buffer_addr.addr = buffer->dev_addr.addr;
+ index_buffer_addr.addr += state->index_buffer_binding.offset;
+ index_buffer_addr.addr += first_index * index_stride;
+ list0.index_base_addrmsb = index_buffer_addr;
+ }
+
+ list_hdr = list0;
+ }
+
+ if (list_hdr.index_addr_present) {
+ pvr_csb_emit (csb, VDMCTRL_INDEX_LIST1, list1) {
+ list1.index_base_addrlsb = index_buffer_addr;
+ }
+ }
+
+ if (list_hdr.index_count_present) {
+ pvr_csb_emit (csb, VDMCTRL_INDEX_LIST2, list2) {
+ list2.index_count = vertex_count | index_count;
+ }
+ }
+
+ if (list_hdr.index_instance_count_present) {
+ pvr_csb_emit (csb, VDMCTRL_INDEX_LIST3, list3) {
+ list3.instance_count = instance_count - 1;
+ }
+ }
+
+ if (list_hdr.index_offset_present) {
+ pvr_csb_emit (csb, VDMCTRL_INDEX_LIST4, list4) {
+ list4.index_offset = first_vertex;
+ }
+ }
+
+ /* TODO: See if we need list_words[5-9]. */
+}
+
+void pvr_CmdDrawIndexed(VkCommandBuffer commandBuffer,
+ uint32_t indexCount,
+ uint32_t instanceCount,
+ uint32_t firstIndex,
+ int32_t vertexOffset,
+ uint32_t firstInstance)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+ struct pvr_cmd_buffer_draw_state draw_state;
+ VkResult result;
+
+ PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+ draw_state.base_vertex = vertexOffset;
+ draw_state.base_instance = firstInstance;
+ draw_state.draw_indirect = false;
+ draw_state.draw_indexed = true;
+ pvr_update_draw_state(&cmd_buffer->state, &draw_state);
+
+ result = pvr_validate_draw_state(cmd_buffer);
+ if (result != VK_SUCCESS)
+ return;
+
+ /* Write the VDM control stream for the primitive. */
+ pvr_emit_vdm_index_list(cmd_buffer,
+ state->gfx_pipeline->input_asm_state.topology,
+ vertexOffset,
+ 0,
+ firstIndex,
+ indexCount,
+ instanceCount);
+}
+
+void pvr_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
+ VkBuffer _buffer,
+ VkDeviceSize offset,
+ uint32_t drawCount,
+ uint32_t stride)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdDrawIndirect(VkCommandBuffer commandBuffer,
+ VkBuffer _buffer,
+ VkDeviceSize offset,
+ uint32_t drawCount,
+ uint32_t stride)
+{
+ assert(!"Unimplemented");
+}
+
+static VkResult
+pvr_resolve_unemitted_resolve_attachments(struct pvr_cmd_buffer *cmd_buffer)
+{
+ pvr_finishme("Add attachment resolve support!");
+ return pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
+}
+
+void pvr_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
+ const VkSubpassEndInfoKHR *pSubpassEndInfo)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+ struct pvr_image_view **attachments;
+ VkClearValue *clear_values;
+ VkResult result;
+
+ PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+ assert(state->render_pass_info.pass);
+ assert(state->render_pass_info.framebuffer);
+
+ /* TODO: Investigate why pvr_cmd_buffer_end_sub_cmd/EndSubCommand is called
+ * twice in this path, one here and one from
+ * pvr_resolve_unemitted_resolve_attachments.
+ */
+ result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
+ if (result != VK_SUCCESS)
+ return;
+
+ result = pvr_resolve_unemitted_resolve_attachments(cmd_buffer);
+ if (result != VK_SUCCESS)
+ return;
+
+ /* Save the required fields before clearing render_pass_info struct. */
+ attachments = state->render_pass_info.attachments;
+ clear_values = state->render_pass_info.clear_values;
+
+ memset(&state->render_pass_info, 0, sizeof(state->render_pass_info));
+
+ state->render_pass_info.attachments = attachments;
+ state->render_pass_info.clear_values = clear_values;
+}
+
+void pvr_CmdExecuteCommands(VkCommandBuffer commandBuffer,
+ uint32_t commandBufferCount,
+ const VkCommandBuffer *pCommandBuffers)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdNextSubpass2(VkCommandBuffer commandBuffer,
+ const VkSubpassBeginInfo *pSubpassBeginInfo,
+ const VkSubpassEndInfo *pSubpassEndInfo)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdPipelineBarrier2KHR(VkCommandBuffer commandBuffer,
+ const VkDependencyInfoKHR *pDependencyInfo)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdResetEvent2KHR(VkCommandBuffer commandBuffer,
+ VkEvent _event,
+ VkPipelineStageFlags2KHR stageMask)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdSetEvent2KHR(VkCommandBuffer commandBuffer,
+ VkEvent _event,
+ const VkDependencyInfoKHR *pDependencyInfo)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdWaitEvents2KHR(VkCommandBuffer commandBuffer,
+ uint32_t eventCount,
+ const VkEvent *pEvents,
+ const VkDependencyInfoKHR *pDependencyInfos)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdWriteTimestamp2KHR(VkCommandBuffer commandBuffer,
+ VkPipelineStageFlags2KHR stage,
+ VkQueryPool queryPool,
+ uint32_t query)
+{
+ unreachable("Timestamp queries are not supported.");
+}
+
+VkResult pvr_EndCommandBuffer(VkCommandBuffer commandBuffer)
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+ VkResult result;
+
+ /* From the Vulkan 1.0 spec:
+ *
+ * CommandBuffer must be in the recording state.
+ */
+ assert(cmd_buffer->status == PVR_CMD_BUFFER_STATUS_RECORDING);
+
+ if (state->status != VK_SUCCESS)
+ return state->status;
+
+ result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
+ if (result != VK_SUCCESS)
+ return result;
+
+ cmd_buffer->status = PVR_CMD_BUFFER_STATUS_EXECUTABLE;
+
+ return VK_SUCCESS;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * based in part on v3dv_cl.c which is:
+ * Copyright © 2019 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_bo.h"
+#include "pvr_csb.h"
+#include "pvr_device_info.h"
+#include "pvr_private.h"
+#include "vk_log.h"
+
+/**
+ * \file pvr_csb.c
+ *
+ * \brief Contains functions to manage Control Stream Builder (csb) object.
+ *
+ * A csb object can be used to create a primary/main control stream, referred
+ * as control stream hereafter, or a secondary control stream, also referred as
+ * a sub control stream. The main difference between these is that, the control
+ * stream is the one directly submitted to the GPU and is terminated using
+ * STREAM_TERMINATE. Whereas, the secondary control stream can be thought of as
+ * an independent set of commands that can be referenced by a primary control
+ * stream to avoid duplication and is instead terminated using STREAM_RETURN,
+ * which means the control stream parser should return to the main stream it
+ * came from.
+ *
+ * Note: Sub control stream is only supported for PVR_CMD_STREAM_TYPE_GRAPHICS
+ * type control streams.
+ */
+
+/**
+ * \brief Size of the individual csb buffer object.
+ */
+#define PVR_CMD_BUFFER_CSB_BO_SIZE 4096
+
+/**
+ * \brief Initializes the csb object.
+ *
+ * \param[in] device Logical device pointer.
+ * \param[in] csb Control Stream Builder object to initialize.
+ *
+ * \sa #pvr_csb_finish()
+ */
+void pvr_csb_init(struct pvr_device *device,
+ enum pvr_cmd_stream_type stream_type,
+ struct pvr_csb *csb)
+{
+ csb->start = NULL;
+ csb->next = NULL;
+ csb->pvr_bo = NULL;
+ csb->end = NULL;
+ csb->device = device;
+ csb->stream_type = stream_type;
+ csb->status = VK_SUCCESS;
+ list_inithead(&csb->pvr_bo_list);
+}
+
+/**
+ * \brief Frees the resources associated with the csb object.
+ *
+ * \param[in] csb Control Stream Builder object to free.
+ *
+ * \sa #pvr_csb_init()
+ */
+void pvr_csb_finish(struct pvr_csb *csb)
+{
+ list_for_each_entry_safe (struct pvr_bo, pvr_bo, &csb->pvr_bo_list, link) {
+ list_del(&pvr_bo->link);
+ pvr_bo_free(csb->device, pvr_bo);
+ }
+
+ /* Leave the csb in a reset state to catch use after destroy instances */
+ pvr_csb_init(NULL, PVR_CMD_STREAM_TYPE_INVALID, csb);
+}
+
+/**
+ * \brief Helper function to extend csb memory.
+ *
+ * Allocates a new buffer object and links it with the previous buffer object
+ * using STREAM_LINK dwords and updates csb object to use the new buffer.
+ *
+ * To make sure that we have enough space to emit STREAM_LINK dwords in the
+ * current buffer, a few bytes are reserved at the end, every time a buffer is
+ * created. Every time we allocate a new buffer we fix the current buffer in use
+ * to emit the stream link dwords. This makes sure that when
+ * #pvr_csb_alloc_dwords() is called from #pvr_csb_emit() to add STREAM_LINK0
+ * and STREAM_LINK1, it succeeds without trying to allocate new pages.
+ *
+ * \param[in] csb Control Stream Builder object to extend.
+ * \return true on success and false otherwise.
+ */
+static bool pvr_csb_buffer_extend(struct pvr_csb *csb)
+{
+ const uint8_t stream_link_space = (pvr_cmd_length(VDMCTRL_STREAM_LINK0) +
+ pvr_cmd_length(VDMCTRL_STREAM_LINK1)) *
+ 4;
+ const uint32_t cache_line_size =
+ rogue_get_slc_cache_line_size(&csb->device->pdevice->dev_info);
+ struct pvr_bo *pvr_bo;
+ VkResult result;
+
+ /* Make sure extra space allocated for stream links is sufficient for both
+ * stream types.
+ */
+ STATIC_ASSERT((pvr_cmd_length(VDMCTRL_STREAM_LINK0) +
+ pvr_cmd_length(VDMCTRL_STREAM_LINK1)) ==
+ (pvr_cmd_length(CDMCTRL_STREAM_LINK0) +
+ pvr_cmd_length(CDMCTRL_STREAM_LINK1)));
+
+ result = pvr_bo_alloc(csb->device,
+ csb->device->heaps.general_heap,
+ PVR_CMD_BUFFER_CSB_BO_SIZE,
+ cache_line_size,
+ PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+ &pvr_bo);
+ if (result != VK_SUCCESS) {
+ vk_error(csb->device, result);
+ csb->status = result;
+ return false;
+ }
+
+ /* Chain to the old BO if this is not the first BO in csb */
+ if (csb->pvr_bo) {
+ csb->end += stream_link_space;
+ assert(csb->next + stream_link_space <= csb->end);
+
+ switch (csb->stream_type) {
+ case PVR_CMD_STREAM_TYPE_GRAPHICS:
+ pvr_csb_emit (csb, VDMCTRL_STREAM_LINK0, link) {
+ link.link_addrmsb = pvr_bo->vma->dev_addr;
+ }
+
+ pvr_csb_emit (csb, VDMCTRL_STREAM_LINK1, link) {
+ link.link_addrlsb = pvr_bo->vma->dev_addr;
+ }
+
+ break;
+
+ case PVR_CMD_STREAM_TYPE_COMPUTE:
+ pvr_csb_emit (csb, CDMCTRL_STREAM_LINK0, link) {
+ link.link_addrmsb = pvr_bo->vma->dev_addr;
+ }
+
+ pvr_csb_emit (csb, CDMCTRL_STREAM_LINK1, link) {
+ link.link_addrlsb = pvr_bo->vma->dev_addr;
+ }
+
+ break;
+
+ default:
+ unreachable("Unknown stream type");
+ break;
+ }
+ }
+
+ csb->pvr_bo = pvr_bo;
+ csb->start = pvr_bo->bo->map;
+
+ /* Reserve stream link size at the end to make sure we don't run out of
+ * space when a stream link is required.
+ */
+ csb->end = csb->start + pvr_bo->bo->size - stream_link_space;
+ csb->next = csb->start;
+
+ list_addtail(&pvr_bo->link, &csb->pvr_bo_list);
+
+ return true;
+}
+
+/**
+ * \brief Provides a chunk of memory from the current csb buffer. In cases where
+ * the buffer is not able to fulfill the required amount of memory,
+ * #pvr_csb_buffer_extend() is called to allocate a new buffer. Maximum size
+ * allocable in bytes is #PVR_CMD_BUFFER_CSB_BO_SIZE - size of STREAM_LINK0
+ * and STREAM_LINK1 dwords.
+ *
+ * \param[in] csb Control Stream Builder object to allocate from.
+ * \param[in] num_dwords Number of dwords to allocate.
+ * \return Valid host virtual address or NULL otherwise.
+ */
+void *pvr_csb_alloc_dwords(struct pvr_csb *csb, uint32_t num_dwords)
+{
+ const uint32_t required_space = num_dwords * 4;
+
+ if (csb->status != VK_SUCCESS)
+ return NULL;
+
+ if (csb->next + required_space > csb->end) {
+ bool ret = pvr_csb_buffer_extend(csb);
+ if (!ret)
+ return NULL;
+ }
+
+ void *p = csb->next;
+
+ csb->next += required_space;
+ assert(csb->next <= csb->end);
+
+ return p;
+}
+
+/**
+ * \brief Adds VDMCTRL_STREAM_RETURN dword into the control stream pointed by
+ * csb object. Given a VDMCTRL_STREAM_RETURN marks the end of the sub control
+ * stream, we return the status of the control stream as well.
+ *
+ * \param[in] csb Control Stream Builder object to add VDMCTRL_STREAM_RETURN to.
+ * \return VK_SUCCESS on success, or error code otherwise.
+ */
+VkResult pvr_csb_emit_return(struct pvr_csb *csb)
+{
+ /* STREAM_RETURN is only supported by graphics control stream. */
+ assert(csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS);
+
+ /* clang-format off */
+ pvr_csb_emit(csb, VDMCTRL_STREAM_RETURN, ret);
+ /* clang-format on */
+
+ return csb->status;
+}
+
+/**
+ * \brief Adds STREAM_TERMINATE dword into the control stream pointed by csb
+ * object. Given a STREAM_TERMINATE marks the end of the control stream, we
+ * return the status of the control stream as well.
+ *
+ * \param[in] csb Control Stream Builder object to terminate.
+ * \return VK_SUCCESS on success, or error code otherwise.
+ */
+VkResult pvr_csb_emit_terminate(struct pvr_csb *csb)
+{
+ switch (csb->stream_type) {
+ case PVR_CMD_STREAM_TYPE_GRAPHICS:
+ /* clang-format off */
+ pvr_csb_emit(csb, VDMCTRL_STREAM_TERMINATE, terminate);
+ /* clang-format on */
+ break;
+
+ case PVR_CMD_STREAM_TYPE_COMPUTE:
+ /* clang-format off */
+ pvr_csb_emit(csb, CDMCTRL_STREAM_TERMINATE, terminate);
+ /* clang-format on */
+ break;
+
+ default:
+ unreachable("Unknown stream type");
+ break;
+ }
+
+ return csb->status;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * based in part on v3dv_cl.h which is:
+ * Copyright © 2019 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_CSB_H
+#define PVR_CSB_H
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_bo.h"
+#include "pvr_winsys.h"
+#include "util/list.h"
+
+#define __pvr_address_type pvr_dev_addr_t
+#define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr).addr
+
+#include "csbgen/rogue_hwdefs.h"
+
+struct pvr_device;
+
+enum pvr_cmd_stream_type {
+ PVR_CMD_STREAM_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */
+ PVR_CMD_STREAM_TYPE_GRAPHICS,
+ PVR_CMD_STREAM_TYPE_COMPUTE,
+};
+
+struct pvr_csb {
+ struct pvr_device *device;
+
+ /* Pointer to current csb buffer object */
+ struct pvr_bo *pvr_bo;
+
+ /* pointers to current bo memory */
+ void *start;
+ void *end;
+ void *next;
+
+ /* List of csb buffer objects */
+ struct list_head pvr_bo_list;
+
+ enum pvr_cmd_stream_type stream_type;
+
+ /* Current error status of the command buffer. Used to track inconsistent
+ * or incomplete command buffer states that are the consequence of run-time
+ * errors such as out of memory scenarios. We want to track this in the
+ * csb because the command buffer object is not visible to some parts
+ * of the driver.
+ */
+ VkResult status;
+};
+
+/**
+ * \brief Gets the status of the csb.
+ *
+ * \param[in] csb Control Stream Builder object.
+ * \return VK_SUCCESS if the csb hasn't encountered any error or error code
+ * otherwise.
+ */
+static inline VkResult pvr_csb_get_status(struct pvr_csb *csb)
+{
+ return csb->status;
+}
+
+/**
+ * \brief Checks if the control stream is empty or not.
+ *
+ * \param[in] csb Control Stream Builder object.
+ * \return true if csb is empty false otherwise.
+ */
+static inline bool pvr_csb_is_empty(struct pvr_csb *csb)
+{
+ return list_is_empty(&csb->pvr_bo_list);
+}
+
+static inline pvr_dev_addr_t pvr_csb_get_start_address(struct pvr_csb *csb)
+{
+ if (!pvr_csb_is_empty(csb)) {
+ struct pvr_bo *pvr_bo =
+ list_first_entry(&csb->pvr_bo_list, struct pvr_bo, link);
+
+ return pvr_bo->vma->dev_addr;
+ }
+
+ return PVR_DEV_ADDR_INVALID;
+}
+
+void pvr_csb_init(struct pvr_device *device,
+ enum pvr_cmd_stream_type stream_type,
+ struct pvr_csb *csb);
+void pvr_csb_finish(struct pvr_csb *csb);
+void *pvr_csb_alloc_dwords(struct pvr_csb *csb, uint32_t num_dwords);
+VkResult pvr_csb_emit_return(struct pvr_csb *csb);
+VkResult pvr_csb_emit_terminate(struct pvr_csb *csb);
+
+#define PVRX(x) ROGUE_##x
+#define pvr_cmd_struct(x) PVRX(x)
+#define pvr_cmd_length(x) PVRX(x##_length)
+#define pvr_cmd_header(x) PVRX(x##_header)
+#define pvr_cmd_pack(x) PVRX(x##_pack)
+
+/**
+ * \brief Packs a command/state into one or more dwords and stores them in the
+ * memory pointed to by _dst.
+ *
+ * \param[out] _dst Pointer to store the packed command/state.
+ * \param[in] cmd Command/state type.
+ * \param[in,out] name Name to give to the command/state structure variable,
+ * which contains the information to be packed and emitted.
+ * This can be used by the caller to modify the command or
+ * state information before it's packed.
+ */
+#define pvr_csb_pack(_dst, cmd, name) \
+ for (struct pvr_cmd_struct(cmd) name = { pvr_cmd_header(cmd) }, \
+ *_loop_terminate = &name; \
+ __builtin_expect(_loop_terminate != NULL, 1); \
+ ({ \
+ pvr_cmd_pack(cmd)((_dst), &name); \
+ _loop_terminate = NULL; \
+ }))
+
+/**
+ * \brief Merges dwords0 and dwords1 arrays and stores the result into the
+ * control stream pointed by the csb object.
+ *
+ * \param[in] csb Control Stream Builder object.
+ * \param[in] dwords0 Dwords0 array.
+ * \param[in] dwords1 Dwords1 array.
+ */
+#define pvr_csb_emit_merge(csb, dwords0, dwords1) \
+ do { \
+ uint32_t *dw; \
+ STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1)); \
+ dw = pvr_csb_alloc_dwords(csb, ARRAY_SIZE(dwords0)); \
+ if (!dw) \
+ break; \
+ for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \
+ dw[i] = (dwords0)[i] | (dwords1)[i]; \
+ } while (0)
+
+/**
+ * \brief Packs a command/state into one or more dwords and stores them into
+ * the control stream pointed by the csb object.
+ *
+ * \param[in] csb Control Stream Builder object.
+ * \param[in] cmd Command/state type.
+ * \param[in,out] name Name to give to the command/state structure variable,
+ * which contains the information to be packed. This can be
+ * used by the caller to modify the command or state
+ * information before it's packed.
+ */
+#define pvr_csb_emit(csb, cmd, name) \
+ for (struct pvr_cmd_struct(cmd) \
+ name = { pvr_cmd_header(cmd) }, \
+ *_dst = pvr_csb_alloc_dwords(csb, pvr_cmd_length(cmd)); \
+ __builtin_expect(_dst != NULL, 1); \
+ ({ \
+ pvr_cmd_pack(cmd)(_dst, &name); \
+ _dst = NULL; \
+ }))
+
+/**
+ * \brief Stores dword into the control stream pointed by the csb object.
+ *
+ * \param[in] csb Control Stream Builder object.
+ * \param[in] dword Dword to store into control stream.
+ */
+#define pvr_csb_emit_dword(csb, dword) \
+ do { \
+ uint32_t *dw; \
+ STATIC_ASSERT(sizeof(dword) == sizeof(uint32_t)); \
+ dw = pvr_csb_alloc_dwords(csb, 1U); \
+ if (!dw) \
+ break; \
+ *dw = dword; \
+ } while (0)
+
+#endif /* PVR_CSB_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_bo.h"
+#include "pvr_private.h"
+#include "util/compiler.h"
+#include "util/list.h"
+#include "util/log.h"
+#include "util/macros.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+#include "vk_object.h"
+#include "vk_util.h"
+
+#if defined(DEBUG)
+static const struct {
+ const char *raw;
+ const char *primary;
+ const char *secondary;
+ const char *primary_dynamic;
+ const char *secondary_dynamic;
+} stage_names[] = {
+ { "Vertex",
+ "Vertex Primary",
+ "Vertex Secondary",
+ "Vertex Dynamic Primary",
+ "Vertex Dynamic Secondary" },
+ { "Fragment",
+ "Fragment Primary",
+ "Fragment Secondary",
+ "Fragment Dynamic Primary",
+ "Fragment Dynamic Secondary" },
+ { "Compute",
+ "Compute Primary",
+ "Compute Secondary",
+ "Compute Dynamic Primary",
+ "Compute Dynamic Secondary" },
+};
+
+static const char *descriptor_names[] = { "VK SAMPLER",
+ "VK COMBINED_IMAGE_SAMPLER",
+ "VK SAMPLED_IMAGE",
+ "VK STORAGE_IMAGE",
+ "VK UNIFORM_TEXEL_BUFFER",
+ "VK STORAGE_TEXEL_BUFFER",
+ "VK UNIFORM_BUFFER",
+ "VK STORAGE_BUFFER",
+ "VK UNIFORM_BUFFER_DYNAMIC",
+ "VK STORAGE_BUFFER_DYNAMIC",
+ "VK INPUT_ATTACHMENT" };
+#endif
+
+static void pvr_descriptor_size_info_init(
+ const struct pvr_device *device,
+ VkDescriptorType type,
+ struct pvr_descriptor_size_info *const size_info_out)
+{
+ /* UINT_MAX is a place holder. These values will be filled by calling the
+ * init function, and set appropriately based on device features.
+ */
+ static const struct pvr_descriptor_size_info template_size_infos[] = {
+ /* VK_DESCRIPTOR_TYPE_SAMPLER */
+ { 4, 0, 4 },
+ /* VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER */
+ { 8, UINT_MAX, 4 },
+ /* VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE */
+ { 4, UINT_MAX, 4 },
+ /* VK_DESCRIPTOR_TYPE_STORAGE_IMAGE */
+ { 4, UINT_MAX, 4 },
+ /* VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER */
+ { 4, UINT_MAX, 4 },
+ /* VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER */
+ { 4, UINT_MAX, 4 },
+ /* VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER */
+ { 2, UINT_MAX, 2 },
+ /* VK_DESCRIPTOR_TYPE_STORAGE_BUFFER */
+ { 2, 1, 2 },
+ /* VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC */
+ { 2, UINT_MAX, 2 },
+ /* VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC */
+ { 2, 1, 2 },
+ /* VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT */
+ { 8, UINT_MAX, 4 }
+ };
+
+ *size_info_out = template_size_infos[type];
+
+ switch (type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ break;
+
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
+ const uint32_t image_secondary_offset_arraybase = 0;
+ const uint32_t image_secondary_size_arraybase = 2;
+ const uint32_t image_secondary_size_arraystride = 1;
+
+ const uint32_t image_secondary_offset_arraystride =
+ image_secondary_offset_arraybase + image_secondary_size_arraybase;
+
+ const uint32_t image_secondary_offset_arraymaxindex =
+ (PVR_HAS_FEATURE(&device->pdevice->dev_info, tpu_array_textures))
+ ? 0
+ : image_secondary_offset_arraystride +
+ image_secondary_size_arraystride;
+
+ const uint32_t image_secondary_size_arraymaxindex = 1;
+
+ const uint32_t image_secondary_size_width = 1;
+ const uint32_t image_secondary_size_height = 1;
+ const uint32_t image_secondary_size_depth = 1;
+
+ const uint32_t image_secondary_offset_width =
+ image_secondary_offset_arraymaxindex +
+ image_secondary_size_arraymaxindex;
+ const uint32_t image_secondary_offset_height =
+ image_secondary_offset_width + image_secondary_size_width;
+ const uint32_t image_secondary_offset_depth =
+ image_secondary_offset_height + image_secondary_size_height;
+ const uint32_t image_secondary_total_size =
+ image_secondary_offset_depth + image_secondary_size_depth;
+
+ size_info_out->secondary = image_secondary_total_size;
+ break;
+ }
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ size_info_out->secondary = (uint32_t)device->features.robustBufferAccess;
+ break;
+
+ default:
+ unreachable("Unknown descriptor type");
+ }
+}
+
+static bool pvr_stage_matches_vk_flags(enum pvr_stage_allocation pvr_stage,
+ VkShaderStageFlags flags)
+{
+ VkShaderStageFlags flags_per_stage;
+
+ switch (pvr_stage) {
+ case PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY:
+ flags_per_stage = VK_SHADER_STAGE_VERTEX_BIT |
+ VK_SHADER_STAGE_GEOMETRY_BIT;
+ break;
+ case PVR_STAGE_ALLOCATION_FRAGMENT:
+ flags_per_stage = VK_SHADER_STAGE_FRAGMENT_BIT;
+ break;
+ case PVR_STAGE_ALLOCATION_COMPUTE:
+ flags_per_stage = VK_SHADER_STAGE_COMPUTE_BIT;
+ break;
+ default:
+ unreachable("Unrecognized allocation stage.");
+ }
+
+ return !!(flags_per_stage & flags);
+}
+
+/* If allocator == NULL, the internal one will be used. */
+static struct pvr_descriptor_set_layout *
+pvr_descriptor_set_layout_allocate(struct pvr_device *device,
+ const VkAllocationCallbacks *allocator,
+ uint32_t binding_count,
+ uint32_t immutable_sampler_count,
+ uint32_t supported_descriptors_count)
+{
+ struct pvr_descriptor_set_layout_binding *bindings;
+ struct pvr_descriptor_set_layout *layout;
+ __typeof__(layout->per_stage_descriptor_count) counts;
+ struct pvr_sampler **immutable_samplers;
+
+ VK_MULTIALLOC(ma);
+ vk_multialloc_add(&ma, &layout, __typeof__(*layout), 1);
+ vk_multialloc_add(&ma, &bindings, __typeof__(*bindings), binding_count);
+ vk_multialloc_add(&ma,
+ &immutable_samplers,
+ __typeof__(*immutable_samplers),
+ immutable_sampler_count);
+
+ for (uint32_t stage = 0; stage < ARRAY_SIZE(counts); stage++) {
+ vk_multialloc_add(&ma,
+ &counts[stage],
+ __typeof__(*counts[0]),
+ supported_descriptors_count);
+ }
+
+ /* pvr_CreateDescriptorSetLayout() relies on this being zero allocated. */
+ if (!vk_multialloc_zalloc2(&ma,
+ &device->vk.alloc,
+ allocator,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) {
+ return NULL;
+ }
+
+ layout->bindings = bindings;
+ layout->immutable_samplers = immutable_samplers;
+
+ memcpy(&layout->per_stage_descriptor_count, &counts, sizeof(counts));
+
+ vk_object_base_init(&device->vk,
+ &layout->base,
+ VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT);
+
+ return layout;
+}
+
+/* If allocator == NULL, the internal one will be used. */
+static void
+pvr_descriptor_set_layout_free(struct pvr_device *device,
+ const VkAllocationCallbacks *allocator,
+ struct pvr_descriptor_set_layout *layout)
+{
+ vk_object_base_finish(&layout->base);
+ vk_free2(&device->vk.alloc, allocator, layout);
+}
+
+static int pvr_binding_compare(const void *a, const void *b)
+{
+ uint32_t binding_a = ((VkDescriptorSetLayoutBinding *)a)->binding;
+ uint32_t binding_b = ((VkDescriptorSetLayoutBinding *)b)->binding;
+
+ if (binding_a < binding_b)
+ return -1;
+
+ if (binding_a > binding_b)
+ return 1;
+
+ return 0;
+}
+
+/* If allocator == NULL, the internal one will be used. */
+static VkDescriptorSetLayoutBinding *
+pvr_create_sorted_bindings(struct pvr_device *device,
+ const VkAllocationCallbacks *allocator,
+ const VkDescriptorSetLayoutBinding *bindings,
+ uint32_t binding_count)
+{
+ VkDescriptorSetLayoutBinding *sorted_bindings =
+ vk_alloc2(&device->vk.alloc,
+ allocator,
+ binding_count * sizeof(*sorted_bindings),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!sorted_bindings)
+ return NULL;
+
+ memcpy(sorted_bindings, bindings, binding_count * sizeof(*sorted_bindings));
+
+ qsort(sorted_bindings,
+ binding_count,
+ sizeof(*sorted_bindings),
+ pvr_binding_compare);
+
+ return sorted_bindings;
+}
+
+struct pvr_register_usage {
+ uint32_t primary;
+ uint32_t primary_dynamic;
+ uint32_t secondary;
+ uint32_t secondary_dynamic;
+};
+
+static void pvr_setup_in_memory_layout_sizes(
+ struct pvr_descriptor_set_layout *layout,
+ const struct pvr_register_usage reg_usage[PVR_STAGE_ALLOCATION_COUNT])
+{
+ for (uint32_t stage = 0;
+ stage < ARRAY_SIZE(layout->memory_layout_in_dwords_per_stage);
+ stage++) {
+ layout->total_size_in_dwords = ALIGN_POT(layout->total_size_in_dwords, 4);
+
+ layout->memory_layout_in_dwords_per_stage[stage].primary_offset =
+ layout->total_size_in_dwords;
+ layout->memory_layout_in_dwords_per_stage[stage].primary_size =
+ reg_usage[stage].primary;
+
+ layout->total_size_in_dwords += reg_usage[stage].primary;
+ layout->total_size_in_dwords = ALIGN_POT(layout->total_size_in_dwords, 4);
+
+ layout->memory_layout_in_dwords_per_stage[stage].secondary_offset =
+ layout->total_size_in_dwords;
+ layout->memory_layout_in_dwords_per_stage[stage].secondary_size =
+ reg_usage[stage].secondary;
+
+ layout->total_size_in_dwords += reg_usage[stage].secondary;
+
+ layout->memory_layout_in_dwords_per_stage[stage].primary_dynamic_size =
+ reg_usage[stage].primary_dynamic;
+ layout->memory_layout_in_dwords_per_stage[stage].secondary_dynamic_size =
+ reg_usage[stage].secondary_dynamic;
+ }
+}
+
+#if defined(DEBUG)
+static void
+pvr_dump_in_memory_layout_sizes(const struct pvr_descriptor_set_layout *layout)
+{
+ mesa_logd("=== SET LAYOUT ===");
+ mesa_logd("----------------------------------------------");
+ mesa_logd(" in memory:");
+ mesa_logd("----------------------------------------------");
+
+ for (uint32_t stage = 0;
+ stage < ARRAY_SIZE(layout->memory_layout_in_dwords_per_stage);
+ stage++) {
+ mesa_logd(
+ "| %-18s @ %04u |",
+ stage_names[stage].primary,
+ layout->memory_layout_in_dwords_per_stage[stage].primary_offset);
+ mesa_logd("----------------------------------------------");
+
+ /* Print primaries. */
+ for (uint32_t i = 0; i < layout->binding_count; i++) {
+ const struct pvr_descriptor_set_layout_binding *const binding =
+ &layout->bindings[i];
+
+ if (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+ binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+ continue;
+
+ mesa_logd("| %s %04u | %-26s[%3u] |",
+ (binding->shader_stage_mask & (1U << stage)) ? " " : "X",
+ binding->per_stage_offset_in_dwords[stage].primary,
+ descriptor_names[binding->type],
+ binding->descriptor_count);
+ }
+
+ /* Print dynamic primaries. */
+ for (uint32_t i = 0; i < layout->binding_count; i++) {
+ const struct pvr_descriptor_set_layout_binding *const binding =
+ &layout->bindings[i];
+
+ if (binding->type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
+ binding->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+ continue;
+
+ mesa_logd("| * %s %04u | %-26s[%3u] |",
+ (binding->shader_stage_mask & (1U << stage)) ? " " : "X",
+ binding->per_stage_offset_in_dwords[stage].primary,
+ descriptor_names[binding->type],
+ binding->descriptor_count);
+ }
+
+ mesa_logd("----------------------------------------------");
+ mesa_logd(
+ "| %-18s @ %04u |",
+ stage_names[stage].secondary,
+ layout->memory_layout_in_dwords_per_stage[stage].secondary_offset);
+ mesa_logd("----------------------------------------------");
+
+ /* Print secondaries. */
+ for (uint32_t i = 0; i < layout->binding_count; i++) {
+ const struct pvr_descriptor_set_layout_binding *const binding =
+ &layout->bindings[i];
+
+ if (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+ binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+ continue;
+
+ mesa_logd("| %s %04u | %-26s[%3u] |",
+ (binding->shader_stage_mask & (1U << stage)) ? " " : "X",
+ binding->per_stage_offset_in_dwords[stage].secondary,
+ descriptor_names[binding->type],
+ binding->descriptor_count);
+ }
+
+ /* Print dynamic secondaries. */
+ for (uint32_t i = 0; i < layout->binding_count; i++) {
+ const struct pvr_descriptor_set_layout_binding *const binding =
+ &layout->bindings[i];
+
+ if (binding->type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
+ binding->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+ continue;
+
+ mesa_logd("| * %s %04u | %-26s[%3u] |",
+ (binding->shader_stage_mask & (1U << stage)) ? " " : "X",
+ binding->per_stage_offset_in_dwords[stage].secondary,
+ descriptor_names[binding->type],
+ binding->descriptor_count);
+ }
+
+ mesa_logd("==============================================");
+ }
+}
+#endif
+
+VkResult pvr_CreateDescriptorSetLayout(
+ VkDevice _device,
+ const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDescriptorSetLayout *pSetLayout)
+{
+ /* Used to accumulate sizes and set each descriptor's offsets per stage. */
+ struct pvr_register_usage reg_usage[PVR_STAGE_ALLOCATION_COUNT] = { 0 };
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ struct pvr_descriptor_set_layout *layout;
+ VkDescriptorSetLayoutBinding *bindings;
+ uint32_t immutable_sampler_count;
+
+ assert(pCreateInfo->sType ==
+ VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
+
+ vk_foreach_struct (ext, pCreateInfo->pNext) {
+ pvr_debug_ignored_stype(ext->sType);
+ }
+
+ /* TODO: Add support for push descriptors. */
+
+ if (pCreateInfo->bindingCount == 0) {
+ layout = pvr_descriptor_set_layout_allocate(device, pAllocator, 0, 0, 0);
+ if (!layout)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ *pSetLayout = pvr_descriptor_set_layout_to_handle(layout);
+ return VK_SUCCESS;
+ }
+
+ /* TODO: Instead of sorting, maybe do what anvil does? */
+ bindings = pvr_create_sorted_bindings(device,
+ pAllocator,
+ pCreateInfo->pBindings,
+ pCreateInfo->bindingCount);
+ if (!bindings)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ immutable_sampler_count = 0;
+ for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
+ /* From the Vulkan 1.1.97 spec for VkDescriptorSetLayoutBinding:
+ *
+ * "If descriptorType specifies a VK_DESCRIPTOR_TYPE_SAMPLER or
+ * VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER type descriptor, then
+ * pImmutableSamplers can be used to initialize a set of immutable
+ * samplers. [...] If descriptorType is not one of these descriptor
+ * types, then pImmutableSamplers is ignored.
+ *
+ * We need to be careful here and only parse pImmutableSamplers if we
+ * have one of the right descriptor types.
+ */
+ const VkDescriptorType descriptor_type = bindings[i].descriptorType;
+ if ((descriptor_type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+ descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) &&
+ bindings[i].pImmutableSamplers)
+ immutable_sampler_count += bindings[i].descriptorCount;
+ }
+
+ /* From the Vulkan 1.2.190 spec for VkDescriptorSetLayoutCreateInfo:
+ *
+ * "The VkDescriptorSetLayoutBinding::binding members of the elements
+ * of the pBindings array must each have different values."
+ *
+ * So we don't worry about duplicates and just allocate for bindingCount
+ * amount of bindings.
+ */
+ layout = pvr_descriptor_set_layout_allocate(
+ device,
+ pAllocator,
+ pCreateInfo->bindingCount,
+ immutable_sampler_count,
+ PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT);
+ if (!layout) {
+ vk_free2(&device->vk.alloc, pAllocator, bindings);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ layout->binding_count = pCreateInfo->bindingCount;
+
+ for (uint32_t bind_num = 0; bind_num < layout->binding_count; bind_num++) {
+ const VkDescriptorSetLayoutBinding *const binding = &bindings[bind_num];
+ struct pvr_descriptor_set_layout_binding *const internal_binding =
+ &layout->bindings[bind_num];
+ VkShaderStageFlags shader_stages = 0;
+
+ internal_binding->type = binding->descriptorType;
+ /* The binding_numbers can be non-contiguous so we ignore the user
+ * specified binding numbers and make them contiguous ourselves.
+ */
+ internal_binding->binding_number = bind_num;
+
+ /* From Vulkan spec 1.2.189:
+ *
+ * "If descriptorCount is zero this binding entry is reserved and the
+ * resource must not be accessed from any stage via this binding"
+ *
+ * So do not use bindings->stageFlags, use shader_stages instead.
+ */
+ if (binding->descriptorCount) {
+ shader_stages = binding->stageFlags;
+
+ internal_binding->descriptor_count = binding->descriptorCount;
+ internal_binding->descriptor_index = layout->descriptor_count;
+ layout->descriptor_count += binding->descriptorCount;
+ }
+
+ switch (binding->descriptorType) {
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ if (binding->pImmutableSamplers && binding->descriptorCount > 0) {
+ internal_binding->immutable_samplers_index =
+ layout->immutable_sampler_count;
+
+ for (uint32_t j = 0; j < binding->descriptorCount; j++) {
+ PVR_FROM_HANDLE(pvr_sampler,
+ sampler,
+ bindings->pImmutableSamplers[j]);
+ const uint32_t next = j + layout->immutable_sampler_count;
+
+ layout->immutable_samplers[next] = sampler;
+ }
+
+ layout->immutable_sampler_count += binding->descriptorCount;
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ layout->dynamic_buffer_count += binding->descriptorCount;
+ break;
+
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ break;
+
+ default:
+ unreachable("Unknown descriptor type");
+ break;
+ }
+
+ if (!shader_stages)
+ continue;
+
+ internal_binding->shader_stages = shader_stages;
+ layout->shader_stages |= shader_stages;
+
+ for (uint32_t stage = 0;
+ stage < ARRAY_SIZE(layout->bindings[0].per_stage_offset_in_dwords);
+ stage++) {
+ const VkDescriptorType descriptor_type = binding->descriptorType;
+
+ if (!pvr_stage_matches_vk_flags(stage, shader_stages))
+ continue;
+
+ internal_binding->shader_stage_mask |= (1U << stage);
+
+ /* TODO: Do we have to allocate them at the end? We could speed it
+ * by allocating them here if not. */
+ /* We allocate dynamics primary and secondaries at the end. */
+ if (descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
+ descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
+ struct pvr_descriptor_size_info size_info;
+
+ pvr_descriptor_size_info_init(device, descriptor_type, &size_info);
+
+ STATIC_ASSERT(
+ ARRAY_SIZE(reg_usage) ==
+ ARRAY_SIZE(layout->bindings[0].per_stage_offset_in_dwords));
+
+ reg_usage[stage].primary =
+ ALIGN_POT(reg_usage[stage].primary, size_info.alignment);
+
+ internal_binding->per_stage_offset_in_dwords[stage].primary =
+ reg_usage[stage].primary;
+ reg_usage[stage].primary +=
+ size_info.primary * internal_binding->descriptor_count;
+
+ internal_binding->per_stage_offset_in_dwords[stage].secondary =
+ reg_usage[stage].secondary;
+ reg_usage[stage].secondary +=
+ size_info.secondary * internal_binding->descriptor_count;
+ }
+
+ STATIC_ASSERT(
+ ARRAY_SIZE(layout->per_stage_descriptor_count) ==
+ ARRAY_SIZE(layout->bindings[0].per_stage_offset_in_dwords));
+
+ layout->per_stage_descriptor_count[stage][descriptor_type] +=
+ internal_binding->descriptor_count;
+ }
+ }
+
+ for (uint32_t bind_num = 0; bind_num < layout->binding_count; bind_num++) {
+ struct pvr_descriptor_set_layout_binding *const internal_binding =
+ &layout->bindings[bind_num];
+ const VkDescriptorType descriptor_type = internal_binding->type;
+
+ if (descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
+ descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+ continue;
+
+ for (uint32_t stage = 0;
+ stage < ARRAY_SIZE(layout->bindings[0].per_stage_offset_in_dwords);
+ stage++) {
+ struct pvr_descriptor_size_info size_info;
+ const VkShaderStageFlags shader_stages =
+ internal_binding->shader_stages;
+
+ if (!pvr_stage_matches_vk_flags(stage, shader_stages))
+ continue;
+
+ pvr_descriptor_size_info_init(device, descriptor_type, &size_info);
+
+ /* TODO: align primary like we did with other descriptors? */
+ internal_binding->per_stage_offset_in_dwords[stage].primary =
+ reg_usage[stage].primary_dynamic;
+ reg_usage[stage].primary_dynamic +=
+ size_info.primary * internal_binding->descriptor_count;
+
+ internal_binding->per_stage_offset_in_dwords[stage].secondary =
+ reg_usage[stage].secondary_dynamic;
+ reg_usage[stage].secondary_dynamic +=
+ size_info.secondary * internal_binding->descriptor_count;
+ }
+ }
+
+ pvr_setup_in_memory_layout_sizes(layout, reg_usage);
+
+#if defined(DEBUG)
+ pvr_dump_in_memory_layout_sizes(layout);
+#endif
+
+ vk_free2(&device->vk.alloc, pAllocator, bindings);
+
+ *pSetLayout = pvr_descriptor_set_layout_to_handle(layout);
+
+ return VK_SUCCESS;
+}
+
+void pvr_DestroyDescriptorSetLayout(VkDevice _device,
+ VkDescriptorSetLayout _set_layout,
+ const VkAllocationCallbacks *pAllocator)
+{
+ PVR_FROM_HANDLE(pvr_descriptor_set_layout, layout, _set_layout);
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+
+ pvr_descriptor_set_layout_free(device, pAllocator, layout);
+}
+
+#if defined(DEBUG)
+static void
+pvr_dump_in_register_layout_sizes(const struct pvr_device *device,
+ const struct pvr_pipeline_layout *layout)
+{
+ mesa_logd("=== SET LAYOUT ===");
+ mesa_logd("----------------------------------------------------");
+ mesa_logd(" in registers:");
+ mesa_logd("----------------------------------------------------");
+
+ for (uint32_t stage = 0;
+ stage < ARRAY_SIZE(layout->register_layout_in_dwords_per_stage);
+ stage++) {
+ uint32_t dynamic_offset = 0;
+
+ mesa_logd("| %-48s |", stage_names[stage].primary_dynamic);
+ mesa_logd("----------------------------------------------------");
+
+ /* Print dynamic primaries. */
+ for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+ const struct pvr_descriptor_set_layout *const set_layout =
+ layout->set_layout[set_num];
+
+ for (uint32_t i = 0; i < set_layout->binding_count; i++) {
+ const struct pvr_descriptor_set_layout_binding *const binding =
+ &set_layout->bindings[i];
+ bool valid = !!(binding->shader_stage_mask & (1U << stage));
+
+ if (binding->type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
+ binding->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+ continue;
+
+ mesa_logd("| %s %04u | %u:%03u | %-26s[%3u] |",
+ (valid) ? " " : "X",
+ dynamic_offset,
+ set_num,
+ i,
+ descriptor_names[binding->type],
+ binding->descriptor_count);
+
+ if (valid) {
+ struct pvr_descriptor_size_info size_info;
+
+ pvr_descriptor_size_info_init(device, binding->type, &size_info);
+
+ dynamic_offset += size_info.primary;
+ }
+ }
+ }
+
+ mesa_logd("----------------------------------------------------");
+ mesa_logd("| %-48s |", stage_names[stage].secondary_dynamic);
+ mesa_logd("----------------------------------------------------");
+
+ /* Print dynamic secondaries. */
+ for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+ const struct pvr_descriptor_set_layout *const set_layout =
+ layout->set_layout[set_num];
+
+ for (uint32_t i = 0; i < set_layout->binding_count; i++) {
+ const struct pvr_descriptor_set_layout_binding *const binding =
+ &set_layout->bindings[i];
+ bool valid = !!(binding->shader_stage_mask & (1U << stage));
+
+ if (binding->type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
+ binding->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+ continue;
+
+ mesa_logd("| %s %04u | %u:%03u | %-26s[%3u] |",
+ (valid) ? " " : "X",
+ dynamic_offset,
+ set_num,
+ i,
+ descriptor_names[binding->type],
+ binding->descriptor_count);
+
+ if (valid) {
+ struct pvr_descriptor_size_info size_info;
+
+ pvr_descriptor_size_info_init(device, binding->type, &size_info);
+
+ dynamic_offset += size_info.secondary;
+ }
+ }
+ }
+
+ mesa_logd("----------------------------------------------------");
+ mesa_logd("| %-48s |", stage_names[stage].primary);
+ mesa_logd("----------------------------------------------------");
+
+ /* Print primaries. */
+ for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+ const struct pvr_descriptor_set_layout *const set_layout =
+ layout->set_layout[set_num];
+ const uint32_t base =
+ layout->register_layout_in_dwords_per_stage[stage][set_num]
+ .primary_offset;
+
+ for (uint32_t i = 0; i < set_layout->binding_count; i++) {
+ const struct pvr_descriptor_set_layout_binding *const binding =
+ &set_layout->bindings[i];
+
+ if (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+ binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+ continue;
+
+ mesa_logd("| %s %04u | %u:%03u | %-26s[%3u] |",
+ (binding->shader_stage_mask & (1U << stage)) ? " " : "X",
+ base + binding->per_stage_offset_in_dwords[stage].primary,
+ set_num,
+ i,
+ descriptor_names[binding->type],
+ binding->descriptor_count);
+ }
+ }
+
+ mesa_logd("----------------------------------------------------");
+ mesa_logd("| %-48s |", stage_names[stage].secondary);
+ mesa_logd("----------------------------------------------------");
+
+ /* Print secondaries. */
+ for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+ const struct pvr_descriptor_set_layout *const set_layout =
+ layout->set_layout[set_num];
+ const uint32_t base =
+ layout->register_layout_in_dwords_per_stage[stage][set_num]
+ .secondary_offset;
+
+ for (uint32_t i = 0; i < set_layout->binding_count; i++) {
+ const struct pvr_descriptor_set_layout_binding *const binding =
+ &set_layout->bindings[i];
+
+ if (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+ binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+ continue;
+
+ mesa_logd("| %s %04u | %u:%03u | %-26s[%3u] |",
+ (binding->shader_stage_mask & (1U << stage)) ? " " : "X",
+ base +
+ binding->per_stage_offset_in_dwords[stage].secondary,
+ set_num,
+ i,
+ descriptor_names[binding->type],
+ binding->descriptor_count);
+ }
+ }
+
+ mesa_logd("====================================================");
+ }
+}
+#endif
+
+/* Pipeline layouts. These have nothing to do with the pipeline. They are
+ * just multiple descriptor set layouts pasted together.
+ */
+VkResult pvr_CreatePipelineLayout(VkDevice _device,
+ const VkPipelineLayoutCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipelineLayout *pPipelineLayout)
+{
+ uint32_t next_free_reg[PVR_STAGE_ALLOCATION_COUNT];
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ struct pvr_pipeline_layout *layout;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO);
+ assert(pCreateInfo->setLayoutCount <= PVR_MAX_DESCRIPTOR_SETS);
+
+ layout = vk_object_alloc(&device->vk,
+ pAllocator,
+ sizeof(*layout),
+ VK_OBJECT_TYPE_PIPELINE_LAYOUT);
+ if (!layout)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ layout->set_count = pCreateInfo->setLayoutCount;
+ layout->shader_stages = 0;
+ for (uint32_t stage = 0; stage < PVR_STAGE_ALLOCATION_COUNT; stage++) {
+ uint32_t descriptor_counts
+ [PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT] = { 0 };
+ struct pvr_pipeline_layout_reg_info *const reg_info =
+ &layout->per_stage_reg_info[stage];
+
+ *reg_info = (struct pvr_pipeline_layout_reg_info){ 0 };
+
+ layout->per_stage_descriptor_masks[stage] = 0;
+
+ for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+ /* So we don't write these again and again. Just do it once. */
+ if (stage == 0) {
+ PVR_FROM_HANDLE(pvr_descriptor_set_layout,
+ set_layout,
+ pCreateInfo->pSetLayouts[set_num]);
+
+ layout->set_layout[set_num] = set_layout;
+ layout->shader_stages |= set_layout->shader_stages;
+ }
+
+ const struct pvr_descriptor_set_layout_mem_layout *const mem_layout =
+ &layout->set_layout[set_num]
+ ->memory_layout_in_dwords_per_stage[stage];
+
+ /* Allocate registers counts for dynamic descriptors. */
+ reg_info->primary_dynamic_size_in_dwords +=
+ mem_layout->primary_dynamic_size;
+ reg_info->secondary_dynamic_size_in_dwords +=
+ mem_layout->secondary_dynamic_size;
+
+ for (VkDescriptorType type = 0;
+ type < PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT;
+ type++) {
+ uint32_t descriptor_count;
+
+ layout->descriptor_offsets[set_num][stage][type] =
+ descriptor_counts[type];
+
+ descriptor_count = layout->set_layout[set_num]
+ ->per_stage_descriptor_count[stage][type];
+
+ if (!descriptor_count)
+ continue;
+
+ switch (type) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ layout->per_stage_descriptor_masks[stage] |= 1U << set_num;
+ descriptor_counts[type] += descriptor_count;
+ break;
+
+ /* We don't need to keep track of the counts or masks for other
+ * descriptor types so there is no assert() here since other
+ * types are not invalid or unsupported.
+ */
+ /* TODO: Improve the comment above to specify why, when we find
+ * out.
+ */
+ default:
+ break;
+ }
+ }
+ }
+
+ next_free_reg[stage] = reg_info->primary_dynamic_size_in_dwords +
+ reg_info->secondary_dynamic_size_in_dwords;
+ }
+
+ /* Allocate registers counts for primary and secondary descriptors. */
+ for (uint32_t stage = 0; stage < PVR_STAGE_ALLOCATION_COUNT; stage++) {
+ for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+ const struct pvr_descriptor_set_layout_mem_layout *const mem_layout =
+ &layout->set_layout[set_num]
+ ->memory_layout_in_dwords_per_stage[stage];
+ struct pvr_descriptor_set_layout_mem_layout *const reg_layout =
+ &layout->register_layout_in_dwords_per_stage[stage][set_num];
+
+ next_free_reg[stage] = ALIGN_POT(next_free_reg[stage], 4);
+
+ reg_layout->primary_offset = next_free_reg[stage];
+ reg_layout->primary_size = mem_layout->primary_size;
+
+ next_free_reg[stage] += reg_layout->primary_size;
+ }
+
+ /* To optimize the total shared layout allocation used by the shader,
+ * secondary descriptors come last since they're less likely to be used.
+ */
+ for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+ const struct pvr_descriptor_set_layout_mem_layout *const mem_layout =
+ &layout->set_layout[set_num]
+ ->memory_layout_in_dwords_per_stage[stage];
+ struct pvr_descriptor_set_layout_mem_layout *const reg_layout =
+ &layout->register_layout_in_dwords_per_stage[stage][set_num];
+
+ /* Should we be aligning next_free_reg like it's done with the
+ * primary descriptors?
+ */
+
+ reg_layout->secondary_offset = next_free_reg[stage];
+ reg_layout->secondary_size = mem_layout->secondary_size;
+
+ next_free_reg[stage] += reg_layout->secondary_size;
+ }
+ }
+
+ layout->push_constants_shader_stages = 0;
+ for (uint32_t i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) {
+ const VkPushConstantRange *range = &pCreateInfo->pPushConstantRanges[i];
+
+ layout->push_constants_shader_stages |= range->stageFlags;
+ }
+
+#if defined(DEBUG)
+ pvr_dump_in_register_layout_sizes(device, layout);
+#endif
+
+ *pPipelineLayout = pvr_pipeline_layout_to_handle(layout);
+
+ return VK_SUCCESS;
+}
+
+void pvr_DestroyPipelineLayout(VkDevice _device,
+ VkPipelineLayout _pipelineLayout,
+ const VkAllocationCallbacks *pAllocator)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_pipeline_layout, layout, _pipelineLayout);
+
+ vk_object_free(&device->vk, pAllocator, layout);
+}
+
+VkResult pvr_CreateDescriptorPool(VkDevice _device,
+ const VkDescriptorPoolCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDescriptorPool *pDescriptorPool)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ struct pvr_descriptor_pool *pool;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO);
+
+ pool = vk_object_alloc(&device->vk,
+ pAllocator,
+ sizeof(*pool),
+ VK_OBJECT_TYPE_DESCRIPTOR_POOL);
+ if (!pool)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ if (pAllocator)
+ pool->alloc = *pAllocator;
+ else
+ pool->alloc = device->vk.alloc;
+
+ pool->max_sets = pCreateInfo->maxSets;
+ list_inithead(&pool->descriptor_sets);
+
+ pool->total_size_in_dwords = 0;
+ for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) {
+ struct pvr_descriptor_size_info size_info;
+ const uint32_t descriptor_count =
+ pCreateInfo->pPoolSizes[i].descriptorCount;
+
+ pvr_descriptor_size_info_init(device,
+ pCreateInfo->pPoolSizes[i].type,
+ &size_info);
+
+ const uint32_t secondary = ALIGN_POT(size_info.secondary, 4);
+ const uint32_t primary = ALIGN_POT(size_info.primary, 4);
+
+ pool->total_size_in_dwords += descriptor_count * (primary + secondary);
+ }
+ pool->total_size_in_dwords *= PVR_STAGE_ALLOCATION_COUNT;
+ pool->current_size_in_dwords = 0;
+
+ pvr_finishme("Entry tracker for allocations?");
+
+ *pDescriptorPool = pvr_descriptor_pool_to_handle(pool);
+
+ return VK_SUCCESS;
+}
+
+static void pvr_free_descriptor_set(struct pvr_device *device,
+ struct pvr_descriptor_pool *pool,
+ struct pvr_descriptor_set *set)
+{
+ list_del(&set->link);
+ pvr_bo_free(device, set->pvr_bo);
+ vk_object_free(&device->vk, &pool->alloc, set);
+}
+
+void pvr_DestroyDescriptorPool(VkDevice _device,
+ VkDescriptorPool _pool,
+ const VkAllocationCallbacks *pAllocator)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_descriptor_pool, pool, _pool);
+
+ if (!pool)
+ return;
+
+ list_for_each_entry_safe (struct pvr_descriptor_set,
+ set,
+ &pool->descriptor_sets,
+ link) {
+ pvr_free_descriptor_set(device, pool, set);
+ }
+
+ vk_object_free(&device->vk, pAllocator, pool);
+}
+
+VkResult pvr_ResetDescriptorPool(VkDevice _device,
+ VkDescriptorPool descriptorPool,
+ VkDescriptorPoolResetFlags flags)
+{
+ assert(!"Unimplemented");
+ return VK_SUCCESS;
+}
+
+static uint16_t pvr_get_descriptor_primary_offset(
+ const struct pvr_device *device,
+ const struct pvr_descriptor_set_layout *layout,
+ const struct pvr_descriptor_set_layout_binding *binding,
+ const uint32_t stage,
+ const uint32_t desc_idx)
+{
+ struct pvr_descriptor_size_info size_info;
+ uint32_t offset;
+
+ assert(stage < ARRAY_SIZE(layout->memory_layout_in_dwords_per_stage));
+ assert(desc_idx < binding->descriptor_count);
+
+ pvr_descriptor_size_info_init(device, binding->type, &size_info);
+
+ offset = layout->memory_layout_in_dwords_per_stage[stage].primary_offset;
+ offset += binding->per_stage_offset_in_dwords[stage].primary;
+ offset += (desc_idx * size_info.primary);
+
+ /* Offset must be less than 16bits. */
+ assert(offset < UINT16_MAX);
+
+ return (uint16_t)offset;
+}
+
+static uint16_t pvr_get_descriptor_secondary_offset(
+ const struct pvr_device *device,
+ const struct pvr_descriptor_set_layout *layout,
+ const struct pvr_descriptor_set_layout_binding *binding,
+ const uint32_t stage,
+ const uint32_t desc_idx)
+{
+ struct pvr_descriptor_size_info size_info;
+ uint32_t offset;
+
+ assert(stage < ARRAY_SIZE(layout->memory_layout_in_dwords_per_stage));
+ assert(desc_idx < binding->descriptor_count);
+
+ pvr_descriptor_size_info_init(device, binding->type, &size_info);
+
+ offset = layout->memory_layout_in_dwords_per_stage[stage].secondary_offset;
+ offset += binding->per_stage_offset_in_dwords[stage].secondary;
+ offset += (desc_idx * size_info.secondary);
+
+ /* Offset must be less than 16bits. */
+ assert(offset < UINT16_MAX);
+
+ return (uint16_t)offset;
+}
+
+static void pvr_write_sampler_descriptor(uint32_t *primary,
+ const struct pvr_sampler *sampler)
+{
+ /* TODO: Implement based on WriteSamplerDescriptor. */
+ pvr_finishme("Implement after vkCreateSampler API.");
+}
+
+#define PVR_MAX_DESCRIPTOR_MEM_SIZE_IN_DWORDS (4 * 1024)
+
+static VkResult
+pvr_descriptor_set_create(struct pvr_device *device,
+ struct pvr_descriptor_pool *pool,
+ const struct pvr_descriptor_set_layout *layout,
+ struct pvr_descriptor_set **const descriptor_set_out)
+{
+ struct pvr_descriptor_set *set;
+ VkResult result;
+ size_t size;
+ void *map;
+
+ size = sizeof(*set) + sizeof(set->descriptors[0]) * layout->descriptor_count;
+
+ /* TODO: Add support to allocate descriptors from descriptor pool, also
+ * check the required descriptors must not exceed max allowed descriptors.
+ */
+ set = vk_object_zalloc(&device->vk,
+ &pool->alloc,
+ size,
+ VK_OBJECT_TYPE_DESCRIPTOR_SET);
+ if (!set)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* TODO: Add support to allocate device memory from a common pool. Look at
+ * something like anv. Also we can allocate a whole chunk of device memory
+ * for max descriptors supported by pool as done by v3dv. Also check the
+ * possibility if this can be removed from here and done on need basis.
+ */
+ if (layout->binding_count > 0) {
+ const uint32_t cache_line_size =
+ rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+ uint64_t bo_size = MIN2(pool->total_size_in_dwords,
+ PVR_MAX_DESCRIPTOR_MEM_SIZE_IN_DWORDS) *
+ sizeof(uint32_t);
+
+ result = pvr_bo_alloc(device,
+ device->heaps.general_heap,
+ bo_size,
+ cache_line_size,
+ PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+ &set->pvr_bo);
+ if (result != VK_SUCCESS)
+ goto err_free_descriptor_set;
+ }
+
+ set->layout = layout;
+ set->pool = pool;
+
+ map = set->pvr_bo->bo->map;
+ for (uint32_t i = 0; i < layout->binding_count; i++) {
+ const struct pvr_descriptor_set_layout_binding *binding =
+ &layout->bindings[i];
+
+ if (binding->descriptor_count == 0 ||
+ (binding->type != VK_DESCRIPTOR_TYPE_SAMPLER &&
+ binding->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER))
+ continue;
+
+ for (uint32_t stage = 0;
+ stage < ARRAY_SIZE(binding->per_stage_offset_in_dwords);
+ stage++) {
+ if (!(binding->shader_stage_mask & (1U << stage)))
+ continue;
+
+ for (uint32_t j = 0; j < binding->descriptor_count; j++) {
+ uint32_t idx = binding->immutable_samplers_index + j;
+ struct pvr_sampler *sampler = layout->immutable_samplers[idx];
+ unsigned int offset_in_dwords =
+ pvr_get_descriptor_primary_offset(device,
+ layout,
+ binding,
+ stage,
+ j);
+
+ if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+ offset_in_dwords += 4;
+
+ pvr_write_sampler_descriptor(map +
+ offset_in_dwords * sizeof(uint32_t),
+ sampler);
+ }
+ }
+ }
+
+ list_addtail(&set->link, &pool->descriptor_sets);
+
+ *descriptor_set_out = set;
+
+ return VK_SUCCESS;
+
+err_free_descriptor_set:
+ vk_object_free(&device->vk, &pool->alloc, set);
+
+ return result;
+}
+
+VkResult
+pvr_AllocateDescriptorSets(VkDevice _device,
+ const VkDescriptorSetAllocateInfo *pAllocateInfo,
+ VkDescriptorSet *pDescriptorSets)
+{
+ PVR_FROM_HANDLE(pvr_descriptor_pool, pool, pAllocateInfo->descriptorPool);
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ VkResult result;
+ uint32_t i;
+
+ vk_foreach_struct (ext, pAllocateInfo->pNext) {
+ pvr_debug_ignored_stype(ext->sType);
+ }
+
+ for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
+ PVR_FROM_HANDLE(pvr_descriptor_set_layout,
+ layout,
+ pAllocateInfo->pSetLayouts[i]);
+ struct pvr_descriptor_set *set = NULL;
+
+ result = pvr_descriptor_set_create(device, pool, layout, &set);
+ if (result != VK_SUCCESS)
+ goto err_free_descriptor_sets;
+
+ pDescriptorSets[i] = pvr_descriptor_set_to_handle(set);
+ }
+
+ return VK_SUCCESS;
+
+err_free_descriptor_sets:
+ pvr_FreeDescriptorSets(_device,
+ pAllocateInfo->descriptorPool,
+ i,
+ pDescriptorSets);
+
+ for (i = 0; i < pAllocateInfo->descriptorSetCount; i++)
+ pDescriptorSets[i] = VK_NULL_HANDLE;
+
+ return result;
+}
+
+VkResult pvr_FreeDescriptorSets(VkDevice _device,
+ VkDescriptorPool descriptorPool,
+ uint32_t count,
+ const VkDescriptorSet *pDescriptorSets)
+{
+ PVR_FROM_HANDLE(pvr_descriptor_pool, pool, descriptorPool);
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+
+ for (uint32_t i = 0; i < count; i++) {
+ struct pvr_descriptor_set *set;
+
+ if (!pDescriptorSets[i])
+ continue;
+
+ set = pvr_descriptor_set_from_handle(pDescriptorSets[i]);
+ pvr_free_descriptor_set(device, pool, set);
+ }
+
+ return VK_SUCCESS;
+}
+
+static int pvr_compare_layout_binding(const void *a, const void *b)
+{
+ uint32_t binding_a;
+ uint32_t binding_b;
+
+ binding_a = ((struct pvr_descriptor_set_layout_binding *)a)->binding_number;
+ binding_b = ((struct pvr_descriptor_set_layout_binding *)b)->binding_number;
+
+ if (binding_a < binding_b)
+ return -1;
+
+ if (binding_a > binding_b)
+ return 1;
+
+ return 0;
+}
+
+/* This function does not assume that the binding will always exist for a
+ * particular binding_num. Caller should check before using the return pointer.
+ */
+static struct pvr_descriptor_set_layout_binding *
+pvr_get_descriptor_binding(const struct pvr_descriptor_set_layout *layout,
+ const uint32_t binding_num)
+{
+ struct pvr_descriptor_set_layout_binding binding;
+ binding.binding_number = binding_num;
+
+ return bsearch(&binding,
+ layout->bindings,
+ layout->binding_count,
+ sizeof(binding),
+ pvr_compare_layout_binding);
+}
+
+static void
+pvr_descriptor_update_buffer_info(const struct pvr_device *device,
+ const VkWriteDescriptorSet *write_set,
+ struct pvr_descriptor_set *set,
+ uint32_t *mem_ptr,
+ uint32_t start_stage,
+ uint32_t end_stage)
+{
+ const struct pvr_descriptor_set_layout_binding *binding;
+ struct pvr_descriptor_size_info size_info;
+ bool is_dynamic;
+
+ binding = pvr_get_descriptor_binding(set->layout, write_set->dstBinding);
+ /* Binding should not be NULL. */
+ assert(binding);
+
+ is_dynamic = (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) ||
+ (binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC);
+
+ pvr_descriptor_size_info_init(device, binding->type, &size_info);
+
+ /* Only need to update the buffer if it is actually being used. If it was
+ * not present in any stage, then the shader_stage_mask would be 0 and we
+ * can skip this update.
+ */
+ if (binding->shader_stage_mask == 0)
+ return;
+
+ for (uint32_t i = 0; i < write_set->descriptorCount; i++) {
+ const VkDescriptorBufferInfo *buffer_info = &write_set->pBufferInfo[i];
+ PVR_FROM_HANDLE(pvr_buffer, buffer, buffer_info->buffer);
+ const uint32_t desc_idx =
+ binding->descriptor_index + write_set->dstArrayElement + i;
+ uint64_t addr = buffer->dev_addr.addr + buffer_info->offset;
+ uint32_t range = (buffer_info->range == VK_WHOLE_SIZE)
+ ? (buffer->size - buffer_info->offset)
+ : (buffer_info->range);
+
+ set->descriptors[desc_idx].type = write_set->descriptorType;
+ set->descriptors[desc_idx].buffer_dev_addr.addr = addr;
+ set->descriptors[desc_idx].buffer_create_info_size = buffer->size;
+ set->descriptors[desc_idx].buffer_desc_range = range;
+
+ if (is_dynamic)
+ continue;
+
+ /* Update the entries in the descriptor memory for static buffer. */
+ for (uint32_t j = start_stage; j < end_stage; j++) {
+ uint32_t primary_offset;
+ uint32_t secondary_offset;
+
+ if (!(binding->shader_stage_mask & (1U << j)))
+ continue;
+
+ /* Offset calculation functions expect descriptor_index to be
+ * binding relative not layout relative, so we have used
+ * write_set->dstArrayElement + i rather than desc_idx.
+ */
+ primary_offset =
+ pvr_get_descriptor_primary_offset(device,
+ set->layout,
+ binding,
+ j,
+ write_set->dstArrayElement + i);
+ secondary_offset =
+ pvr_get_descriptor_secondary_offset(device,
+ set->layout,
+ binding,
+ j,
+ write_set->dstArrayElement + i);
+
+ memcpy(mem_ptr + primary_offset, &addr, size_info.primary << 2);
+ memcpy(mem_ptr + secondary_offset, &range, size_info.secondary << 2);
+ }
+ }
+}
+
+void pvr_UpdateDescriptorSets(VkDevice _device,
+ uint32_t descriptorWriteCount,
+ const VkWriteDescriptorSet *pDescriptorWrites,
+ uint32_t descriptorCopyCount,
+ const VkCopyDescriptorSet *pDescriptorCopies)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+
+ for (uint32_t i = 0; i < descriptorWriteCount; i++) {
+ const VkWriteDescriptorSet *write_set = &pDescriptorWrites[i];
+ PVR_FROM_HANDLE(pvr_descriptor_set, set, write_set->dstSet);
+ uint32_t *map = set->pvr_bo->bo->map;
+
+ vk_foreach_struct (ext, write_set->pNext) {
+ pvr_debug_ignored_stype(ext->sType);
+ }
+
+ switch (write_set->descriptorType) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ pvr_finishme("Update support missing for %d descriptor type\n",
+ write_set->descriptorType);
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ pvr_descriptor_update_buffer_info(device,
+ write_set,
+ set,
+ map,
+ 0,
+ PVR_STAGE_ALLOCATION_COUNT);
+ break;
+
+ default:
+ unreachable("Unknown descriptor type");
+ break;
+ }
+ }
+
+ if (descriptorCopyCount > 0)
+ pvr_finishme("Descriptor copying support missing\n");
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * based in part on v3dv driver which is:
+ * Copyright © 2019 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <vulkan/vulkan.h>
+#include <xf86drm.h>
+
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_bo.h"
+#include "pvr_csb.h"
+#include "pvr_device_info.h"
+#include "pvr_job_render.h"
+#include "pvr_limits.h"
+#include "pvr_pds.h"
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+#include "rogue/rogue_compiler.h"
+#include "util/build_id.h"
+#include "util/log.h"
+#include "util/mesa-sha1.h"
+#include "util/os_misc.h"
+#include "util/u_math.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+#include "vk_util.h"
+
+#define PVR_GLOBAL_FREE_LIST_INITIAL_SIZE (2U * 1024U * 1024U)
+#define PVR_GLOBAL_FREE_LIST_MAX_SIZE (256U * 1024U * 1024U)
+#define PVR_GLOBAL_FREE_LIST_GROW_SIZE (1U * 1024U * 1024U)
+
+/* The grow threshold is a percentage. This is intended to be 12.5%, but has
+ * been rounded up since the percentage is treated as an integer.
+ */
+#define PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD 13U
+
+#if defined(VK_USE_PLATFORM_DISPLAY_KHR)
+# define PVR_USE_WSI_PLATFORM
+#endif
+
+#define PVR_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
+
+static const struct vk_instance_extension_table pvr_instance_extensions = {
+#if defined(VK_USE_PLATFORM_DISPLAY_KHR)
+ .KHR_display = true,
+#endif
+ .KHR_external_memory_capabilities = true,
+ .KHR_get_physical_device_properties2 = true,
+#if defined(PVR_USE_WSI_PLATFORM)
+ .KHR_surface = true,
+#endif
+ .EXT_debug_report = true,
+ .EXT_debug_utils = true,
+};
+
+static void pvr_physical_device_get_supported_extensions(
+ const struct pvr_physical_device *pdevice,
+ struct vk_device_extension_table *extensions)
+{
+ /* clang-format off */
+ *extensions = (struct vk_device_extension_table){
+ .KHR_external_memory = true,
+ .KHR_external_memory_fd = true,
+#if defined(PVR_USE_WSI_PLATFORM)
+ .KHR_swapchain = true,
+#endif
+ .EXT_external_memory_dma_buf = true,
+ .EXT_private_data = true,
+ };
+ /* clang-format on */
+}
+
+VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion)
+{
+ *pApiVersion = PVR_API_VERSION;
+ return VK_SUCCESS;
+}
+
+VkResult
+pvr_EnumerateInstanceExtensionProperties(const char *pLayerName,
+ uint32_t *pPropertyCount,
+ VkExtensionProperties *pProperties)
+{
+ if (pLayerName)
+ return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
+
+ return vk_enumerate_instance_extension_properties(&pvr_instance_extensions,
+ pPropertyCount,
+ pProperties);
+}
+
+VkResult pvr_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkInstance *pInstance)
+{
+ struct vk_instance_dispatch_table dispatch_table;
+ struct pvr_instance *instance;
+ VkResult result;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
+
+ if (!pAllocator)
+ pAllocator = vk_default_allocator();
+
+ instance = vk_alloc(pAllocator,
+ sizeof(*instance),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+ if (!instance)
+ return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
+ &pvr_instance_entrypoints,
+ true);
+
+ vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
+ &wsi_instance_entrypoints,
+ false);
+
+ result = vk_instance_init(&instance->vk,
+ &pvr_instance_extensions,
+ &dispatch_table,
+ pCreateInfo,
+ pAllocator);
+ if (result != VK_SUCCESS) {
+ vk_free(pAllocator, instance);
+ return vk_error(NULL, result);
+ }
+
+ instance->physical_devices_count = -1;
+
+ VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
+
+ *pInstance = pvr_instance_to_handle(instance);
+
+ return VK_SUCCESS;
+}
+
+static void pvr_physical_device_finish(struct pvr_physical_device *pdevice)
+{
+ /* Be careful here. The device might not have been initialized. This can
+ * happen since initialization is done in vkEnumeratePhysicalDevices() but
+ * finish is done in vkDestroyInstance(). Make sure that you check for NULL
+ * before freeing or that the freeing functions accept NULL pointers.
+ */
+
+ if (pdevice->compiler)
+ rogue_compiler_destroy(pdevice->compiler);
+
+ pvr_wsi_finish(pdevice);
+
+ free(pdevice->name);
+
+ if (pdevice->ws)
+ pvr_winsys_destroy(pdevice->ws);
+
+ if (pdevice->master_fd >= 0) {
+ vk_free(&pdevice->vk.instance->alloc, pdevice->master_path);
+ close(pdevice->master_fd);
+ }
+
+ if (pdevice->render_fd >= 0) {
+ vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
+ close(pdevice->render_fd);
+ }
+ vk_physical_device_finish(&pdevice->vk);
+}
+
+void pvr_DestroyInstance(VkInstance _instance,
+ const VkAllocationCallbacks *pAllocator)
+{
+ PVR_FROM_HANDLE(pvr_instance, instance, _instance);
+
+ if (!instance)
+ return;
+
+ pvr_physical_device_finish(&instance->physical_device);
+
+ VG(VALGRIND_DESTROY_MEMPOOL(instance));
+
+ vk_instance_finish(&instance->vk);
+ vk_free(&instance->vk.alloc, instance);
+}
+
+static VkResult
+pvr_physical_device_init_uuids(struct pvr_physical_device *pdevice)
+{
+ struct mesa_sha1 sha1_ctx;
+ unsigned build_id_len;
+ uint8_t sha1[20];
+ uint64_t bvnc;
+
+ const struct build_id_note *note =
+ build_id_find_nhdr_for_addr(pvr_physical_device_init_uuids);
+ if (!note) {
+ return vk_errorf(pdevice,
+ VK_ERROR_INITIALIZATION_FAILED,
+ "Failed to find build-id");
+ }
+
+ build_id_len = build_id_length(note);
+ if (build_id_len < 20) {
+ return vk_errorf(pdevice,
+ VK_ERROR_INITIALIZATION_FAILED,
+ "Build-id too short. It needs to be a SHA");
+ }
+
+ bvnc = pvr_get_packed_bvnc(&pdevice->dev_info);
+
+ _mesa_sha1_init(&sha1_ctx);
+ _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
+ _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc));
+ _mesa_sha1_final(&sha1_ctx, sha1);
+ memcpy(pdevice->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
+
+ return VK_SUCCESS;
+}
+
+static uint64_t pvr_compute_heap_size(void)
+{
+ /* Query the total ram from the system */
+ uint64_t total_ram;
+ if (!os_get_total_physical_memory(&total_ram))
+ return 0;
+
+ /* We don't want to burn too much ram with the GPU. If the user has 4GiB
+ * or less, we use at most half. If they have more than 4GiB, we use 3/4.
+ */
+ uint64_t available_ram;
+ if (total_ram <= 4ULL * 1024ULL * 1024ULL * 1024ULL)
+ available_ram = total_ram / 2U;
+ else
+ available_ram = total_ram * 3U / 4U;
+
+ return available_ram;
+}
+
+static VkResult pvr_physical_device_init(struct pvr_physical_device *pdevice,
+ struct pvr_instance *instance,
+ drmDevicePtr drm_render_device,
+ drmDevicePtr drm_primary_device)
+{
+ const char *path = drm_render_device->nodes[DRM_NODE_RENDER];
+ struct vk_device_extension_table supported_extensions;
+ struct vk_physical_device_dispatch_table dispatch_table;
+ const char *primary_path;
+ VkResult result;
+ int ret;
+
+ if (!getenv("PVR_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
+ return vk_errorf(instance,
+ VK_ERROR_INCOMPATIBLE_DRIVER,
+ "WARNING: powervr is not a conformant Vulkan "
+ "implementation. Pass "
+ "PVR_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know "
+ "what you're doing.");
+ }
+
+ pvr_physical_device_get_supported_extensions(pdevice, &supported_extensions);
+
+ vk_physical_device_dispatch_table_from_entrypoints(
+ &dispatch_table,
+ &pvr_physical_device_entrypoints,
+ true);
+
+ vk_physical_device_dispatch_table_from_entrypoints(
+ &dispatch_table,
+ &wsi_physical_device_entrypoints,
+ false);
+
+ result = vk_physical_device_init(&pdevice->vk,
+ &instance->vk,
+ &supported_extensions,
+ &dispatch_table);
+ if (result != VK_SUCCESS)
+ return result;
+
+ pdevice->instance = instance;
+
+ pdevice->render_fd = open(path, O_RDWR | O_CLOEXEC);
+ if (pdevice->render_fd < 0) {
+ result = vk_errorf(instance,
+ VK_ERROR_INCOMPATIBLE_DRIVER,
+ "Failed to open device %s",
+ path);
+ goto err_vk_physical_device_finish;
+ }
+
+ pdevice->render_path = vk_strdup(&pdevice->vk.instance->alloc,
+ path,
+ VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+ if (!pdevice->render_path) {
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto err_close_render_fd;
+ }
+
+ if (instance->vk.enabled_extensions.KHR_display) {
+ primary_path = drm_primary_device->nodes[DRM_NODE_PRIMARY];
+
+ pdevice->master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
+ } else {
+ pdevice->master_fd = -1;
+ }
+
+ if (pdevice->master_fd >= 0) {
+ pdevice->master_path = vk_strdup(&pdevice->vk.instance->alloc,
+ primary_path,
+ VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+ if (!pdevice->master_path) {
+ result = VK_ERROR_OUT_OF_HOST_MEMORY;
+ goto err_close_master_fd;
+ }
+ } else {
+ pdevice->master_path = NULL;
+ }
+
+ pdevice->ws = pvr_winsys_create(pdevice->master_fd,
+ pdevice->render_fd,
+ &pdevice->vk.instance->alloc);
+ if (!pdevice->ws) {
+ result = VK_ERROR_INITIALIZATION_FAILED;
+ goto err_vk_free_master_path;
+ }
+
+ ret = pdevice->ws->ops->device_info_init(pdevice->ws, &pdevice->dev_info);
+ if (ret) {
+ result = VK_ERROR_INITIALIZATION_FAILED;
+ goto err_pvr_winsys_destroy;
+ }
+
+ result = pvr_physical_device_init_uuids(pdevice);
+ if (result != VK_SUCCESS)
+ goto err_pvr_winsys_destroy;
+
+ if (asprintf(&pdevice->name,
+ "Imagination PowerVR %s %s",
+ pdevice->dev_info.ident.series_name,
+ pdevice->dev_info.ident.public_name) < 0) {
+ result = vk_errorf(instance,
+ VK_ERROR_OUT_OF_HOST_MEMORY,
+ "Unable to allocate memory to store device name");
+ goto err_pvr_winsys_destroy;
+ }
+
+ /* Setup available memory heaps and types */
+ pdevice->memory.memoryHeapCount = 1;
+ pdevice->memory.memoryHeaps[0].size = pvr_compute_heap_size();
+ pdevice->memory.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
+
+ pdevice->memory.memoryTypeCount = 1;
+ pdevice->memory.memoryTypes[0].propertyFlags =
+ VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+ VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+ VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+ pdevice->memory.memoryTypes[0].heapIndex = 0;
+
+ result = pvr_wsi_init(pdevice);
+ if (result != VK_SUCCESS) {
+ vk_error(instance, result);
+ goto err_free_name;
+ }
+
+ pdevice->compiler = rogue_compiler_create(&pdevice->dev_info);
+ if (!pdevice->compiler) {
+ result = vk_errorf(instance,
+ VK_ERROR_INITIALIZATION_FAILED,
+ "Failed to initialize Rogue compiler");
+ goto err_wsi_finish;
+ }
+
+ return VK_SUCCESS;
+
+err_wsi_finish:
+ pvr_wsi_finish(pdevice);
+
+err_free_name:
+ free(pdevice->name);
+
+err_pvr_winsys_destroy:
+ pvr_winsys_destroy(pdevice->ws);
+
+err_vk_free_master_path:
+ vk_free(&pdevice->vk.instance->alloc, pdevice->master_path);
+
+err_close_master_fd:
+ if (pdevice->master_fd >= 0)
+ close(pdevice->master_fd);
+
+ vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
+
+err_close_render_fd:
+ close(pdevice->render_fd);
+
+err_vk_physical_device_finish:
+ vk_physical_device_finish(&pdevice->vk);
+
+ return result;
+}
+
+static VkResult pvr_enumerate_devices(struct pvr_instance *instance)
+{
+ /* FIXME: It should be possible to query the number of devices via
+ * drmGetDevices2 by passing in NULL for the 'devices' parameter. However,
+ * this was broken by libdrm commit
+ * 8cb12a2528d795c45bba5f03b3486b4040fb0f45, so, until this is fixed in
+ * upstream, hard-code the maximum number of devices.
+ */
+ drmDevicePtr drm_primary_device = NULL;
+ drmDevicePtr drm_render_device = NULL;
+ drmDevicePtr drm_devices[8];
+ int max_drm_devices;
+ VkResult result;
+
+ instance->physical_devices_count = 0;
+
+ max_drm_devices = drmGetDevices2(0, drm_devices, ARRAY_SIZE(drm_devices));
+ if (max_drm_devices < 1)
+ return VK_SUCCESS;
+
+ for (unsigned i = 0; i < (unsigned)max_drm_devices; i++) {
+ if (drm_devices[i]->bustype != DRM_BUS_PLATFORM)
+ continue;
+
+ if (drm_devices[i]->available_nodes & (1 << DRM_NODE_RENDER)) {
+ char **compat;
+
+ compat = drm_devices[i]->deviceinfo.platform->compatible;
+ while (*compat) {
+ if (strncmp(*compat, "mediatek,mt8173-gpu", 19) == 0) {
+ drm_render_device = drm_devices[i];
+
+ mesa_logd("Found compatible render device '%s'.",
+ drm_render_device->nodes[DRM_NODE_RENDER]);
+ break;
+ }
+ compat++;
+ }
+ } else if (drm_devices[i]->available_nodes & 1 << DRM_NODE_PRIMARY) {
+ char **compat;
+
+ compat = drm_devices[i]->deviceinfo.platform->compatible;
+ while (*compat) {
+ if (strncmp(*compat, "mediatek-drm", 12) == 0) {
+ drm_primary_device = drm_devices[i];
+
+ mesa_logd("Found compatible primary device '%s'.",
+ drm_primary_device->nodes[DRM_NODE_PRIMARY]);
+ break;
+ }
+ compat++;
+ }
+ }
+ }
+
+ if (drm_render_device && drm_primary_device) {
+ result = pvr_physical_device_init(&instance->physical_device,
+ instance,
+ drm_render_device,
+ drm_primary_device);
+ if (result == VK_SUCCESS)
+ instance->physical_devices_count = 1;
+ else if (result == VK_ERROR_INCOMPATIBLE_DRIVER)
+ result = VK_SUCCESS;
+ } else {
+ result = VK_SUCCESS;
+ }
+
+ drmFreeDevices(drm_devices, max_drm_devices);
+
+ return result;
+}
+
+VkResult pvr_EnumeratePhysicalDevices(VkInstance _instance,
+ uint32_t *pPhysicalDeviceCount,
+ VkPhysicalDevice *pPhysicalDevices)
+{
+ PVR_FROM_HANDLE(pvr_instance, instance, _instance);
+ VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount);
+ VkResult result;
+
+ if (instance->physical_devices_count < 0) {
+ result = pvr_enumerate_devices(instance);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ if (instance->physical_devices_count == 0)
+ return VK_SUCCESS;
+
+ assert(instance->physical_devices_count == 1);
+ vk_outarray_append (&out, p) {
+ *p = pvr_physical_device_to_handle(&instance->physical_device);
+ }
+
+ return vk_outarray_status(&out);
+}
+
+void pvr_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceFeatures2 *pFeatures)
+{
+ PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
+
+ pFeatures->features = (VkPhysicalDeviceFeatures){
+ .robustBufferAccess =
+ PVR_HAS_FEATURE(&pdevice->dev_info, robust_buffer_access),
+ .fullDrawIndexUint32 = true,
+ .imageCubeArray = true,
+ .independentBlend = true,
+ .geometryShader = false,
+ .tessellationShader = false,
+ .sampleRateShading = true,
+ .dualSrcBlend = false,
+ .logicOp = true,
+ .multiDrawIndirect = true,
+ .drawIndirectFirstInstance = true,
+ .depthClamp = true,
+ .depthBiasClamp = true,
+ .fillModeNonSolid = false,
+ .depthBounds = false,
+ .wideLines = true,
+ .largePoints = true,
+ .alphaToOne = true,
+ .multiViewport = false,
+ .samplerAnisotropy = true,
+ .textureCompressionETC2 = true,
+ .textureCompressionASTC_LDR = PVR_HAS_FEATURE(&pdevice->dev_info, astc),
+ .textureCompressionBC = false,
+ .occlusionQueryPrecise = true,
+ .pipelineStatisticsQuery = false,
+ .vertexPipelineStoresAndAtomics = true,
+ .fragmentStoresAndAtomics = true,
+ .shaderTessellationAndGeometryPointSize = false,
+ .shaderImageGatherExtended = false,
+ .shaderStorageImageExtendedFormats = true,
+ .shaderStorageImageMultisample = false,
+ .shaderStorageImageReadWithoutFormat = true,
+ .shaderStorageImageWriteWithoutFormat = false,
+ .shaderUniformBufferArrayDynamicIndexing = true,
+ .shaderSampledImageArrayDynamicIndexing = true,
+ .shaderStorageBufferArrayDynamicIndexing = true,
+ .shaderStorageImageArrayDynamicIndexing = true,
+ .shaderClipDistance = true,
+ .shaderCullDistance = true,
+ .shaderFloat64 = false,
+ .shaderInt64 = true,
+ .shaderInt16 = true,
+ .shaderResourceResidency = false,
+ .shaderResourceMinLod = false,
+ .sparseBinding = false,
+ .sparseResidencyBuffer = false,
+ .sparseResidencyImage2D = false,
+ .sparseResidencyImage3D = false,
+ .sparseResidency2Samples = false,
+ .sparseResidency4Samples = false,
+ .sparseResidency8Samples = false,
+ .sparseResidency16Samples = false,
+ .sparseResidencyAliased = false,
+ .variableMultisampleRate = false,
+ .inheritedQueries = false,
+ };
+
+ vk_foreach_struct (ext, pFeatures->pNext) {
+ pvr_debug_ignored_stype(ext->sType);
+ }
+}
+
+/* clang-format off */
+/* FIXME: Clang-format places multiple initializers on the same line, fix this
+ * and remove clang-format on/off comments.
+ */
+static const struct pvr_descriptor_limits bvnc_4_V_2_51_descriptor_limits = {
+ .max_per_stage_resources = 456U,
+ .max_per_stage_samplers = 64U,
+ .max_per_stage_uniform_buffers = 96U,
+ .max_per_stage_storage_buffers = 96U,
+ .max_per_stage_sampled_images = 128U,
+ .max_per_stage_storage_images = 64U,
+ .max_per_stage_input_attachments = 8U,
+};
+/* clang-format on */
+
+static const struct pvr_descriptor_limits *
+pvr_get_physical_device_descriptor_limits(struct pvr_physical_device *pdevice)
+{
+ /* Series 6XT - GX6x50 - Clyde */
+ if (pdevice->dev_info.ident.b == 4 && pdevice->dev_info.ident.n == 2)
+ return &bvnc_4_V_2_51_descriptor_limits;
+
+ vk_errorf(pdevice,
+ VK_ERROR_INCOMPATIBLE_DRIVER,
+ "No device ID found for BVNC %d.%d.%d.%d",
+ pdevice->dev_info.ident.b,
+ pdevice->dev_info.ident.v,
+ pdevice->dev_info.ident.n,
+ pdevice->dev_info.ident.c);
+
+ assert(false);
+
+ return NULL;
+}
+
+void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceProperties2 *pProperties)
+{
+ PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
+ const struct pvr_descriptor_limits *descriptor_limits =
+ pvr_get_physical_device_descriptor_limits(pdevice);
+
+ /* Default value based on the minimum value found in all existing cores. */
+ const uint32_t max_multisample =
+ PVR_GET_FEATURE_VALUE(&pdevice->dev_info, max_multisample, 4);
+
+ /* Default value based on the minimum value found in all existing cores. */
+ const uint32_t uvs_banks =
+ PVR_GET_FEATURE_VALUE(&pdevice->dev_info, uvs_banks, 2);
+
+ /* Default value based on the minimum value found in all existing cores. */
+ const uint32_t uvs_pba_entries =
+ PVR_GET_FEATURE_VALUE(&pdevice->dev_info, uvs_pba_entries, 160);
+
+ /* Default value based on the minimum value found in all existing cores. */
+ const uint32_t num_user_clip_planes =
+ PVR_GET_FEATURE_VALUE(&pdevice->dev_info, num_user_clip_planes, 8);
+
+ const uint32_t sub_pixel_precision =
+ PVR_HAS_FEATURE(&pdevice->dev_info, simple_internal_parameter_format)
+ ? 4U
+ : 8U;
+
+ const uint32_t max_render_size =
+ rogue_get_render_size_max(&pdevice->dev_info);
+
+ const uint32_t max_sample_bits = ((max_multisample << 1) - 1);
+
+ const uint32_t max_user_vertex_components =
+ ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
+
+ VkPhysicalDeviceLimits limits = {
+ .maxImageDimension1D = max_render_size,
+ .maxImageDimension2D = max_render_size,
+ .maxImageDimension3D = 2U * 1024U,
+ .maxImageDimensionCube = max_render_size,
+ .maxImageArrayLayers = 2U * 1024U,
+ .maxTexelBufferElements = 64U * 1024U,
+ .maxUniformBufferRange = 128U * 1024U * 1024U,
+ .maxStorageBufferRange = 128U * 1024U * 1024U,
+ .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE,
+ .maxMemoryAllocationCount = UINT32_MAX,
+ .maxSamplerAllocationCount = UINT32_MAX,
+ .bufferImageGranularity = 1U,
+ .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL,
+
+ /* Maximum number of descriptor sets that can be bound at the same time.
+ */
+ .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS,
+
+ .maxPerStageResources = descriptor_limits->max_per_stage_resources,
+ .maxPerStageDescriptorSamplers =
+ descriptor_limits->max_per_stage_samplers,
+ .maxPerStageDescriptorUniformBuffers =
+ descriptor_limits->max_per_stage_uniform_buffers,
+ .maxPerStageDescriptorStorageBuffers =
+ descriptor_limits->max_per_stage_storage_buffers,
+ .maxPerStageDescriptorSampledImages =
+ descriptor_limits->max_per_stage_sampled_images,
+ .maxPerStageDescriptorStorageImages =
+ descriptor_limits->max_per_stage_storage_images,
+ .maxPerStageDescriptorInputAttachments =
+ descriptor_limits->max_per_stage_input_attachments,
+
+ .maxDescriptorSetSamplers = 256U,
+ .maxDescriptorSetUniformBuffers = 256U,
+ .maxDescriptorSetUniformBuffersDynamic = 8U,
+ .maxDescriptorSetStorageBuffers = 256U,
+ .maxDescriptorSetStorageBuffersDynamic = 8U,
+ .maxDescriptorSetSampledImages = 256U,
+ .maxDescriptorSetStorageImages = 256U,
+ .maxDescriptorSetInputAttachments = 256U,
+
+ /* Vertex Shader Limits */
+ .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS,
+ .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS,
+ .maxVertexInputAttributeOffset = 0xFFFF,
+ .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U,
+ .maxVertexOutputComponents = max_user_vertex_components,
+
+ /* Tessellation Limits */
+ .maxTessellationGenerationLevel = 0,
+ .maxTessellationPatchSize = 0,
+ .maxTessellationControlPerVertexInputComponents = 0,
+ .maxTessellationControlPerVertexOutputComponents = 0,
+ .maxTessellationControlPerPatchOutputComponents = 0,
+ .maxTessellationControlTotalOutputComponents = 0,
+ .maxTessellationEvaluationInputComponents = 0,
+ .maxTessellationEvaluationOutputComponents = 0,
+
+ /* Geometry Shader Limits */
+ .maxGeometryShaderInvocations = 32U,
+ .maxGeometryInputComponents = max_user_vertex_components,
+ .maxGeometryOutputComponents = max_user_vertex_components,
+ .maxGeometryOutputVertices = 256U,
+ .maxGeometryTotalOutputComponents = 1024U,
+
+ /* Fragment Shader Limits */
+ .maxFragmentInputComponents = max_user_vertex_components,
+ .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
+ .maxFragmentDualSrcAttachments = 0,
+ .maxFragmentCombinedOutputResources = 8U,
+
+ /* Compute Shader Limits */
+ .maxComputeSharedMemorySize = 16U * 1024U,
+ .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
+ .maxComputeWorkGroupInvocations = 512U,
+ .maxComputeWorkGroupSize = { 512U, 512U, 64U },
+
+ /* Rasterization Limits */
+ .subPixelPrecisionBits = sub_pixel_precision,
+ .subTexelPrecisionBits = 8U,
+ .mipmapPrecisionBits = 4U,
+
+ .maxDrawIndexedIndexValue = UINT32_MAX,
+ .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
+ .maxSamplerLodBias = 15.0f,
+ .maxSamplerAnisotropy = 16.0f,
+ .maxViewports = PVR_MAX_VIEWPORTS,
+
+ .maxViewportDimensions[0] = max_render_size,
+ .maxViewportDimensions[1] = max_render_size,
+ .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size),
+ .viewportBoundsRange[1] = 2U * max_render_size,
+
+ .viewportSubPixelBits = 0,
+ .minMemoryMapAlignment = 64U,
+ .minTexelBufferOffsetAlignment = 16U,
+ .minUniformBufferOffsetAlignment = 4U,
+ .minStorageBufferOffsetAlignment = 4U,
+
+ .minTexelOffset = -8,
+ .maxTexelOffset = 7U,
+ .minTexelGatherOffset = 0,
+ .maxTexelGatherOffset = 0,
+ .minInterpolationOffset = -0.5,
+ .maxInterpolationOffset = 0.5,
+ .subPixelInterpolationOffsetBits = 4U,
+
+ .maxFramebufferWidth = max_render_size,
+ .maxFramebufferHeight = max_render_size,
+ .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS,
+
+ .framebufferColorSampleCounts = max_sample_bits,
+ .framebufferDepthSampleCounts = max_sample_bits,
+ .framebufferStencilSampleCounts = max_sample_bits,
+ .framebufferNoAttachmentsSampleCounts = max_sample_bits,
+ .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS,
+ .sampledImageColorSampleCounts = max_sample_bits,
+ .sampledImageIntegerSampleCounts = max_sample_bits,
+ .sampledImageDepthSampleCounts = max_sample_bits,
+ .sampledImageStencilSampleCounts = max_sample_bits,
+ .storageImageSampleCounts = max_sample_bits,
+ .maxSampleMaskWords = 1U,
+ .timestampComputeAndGraphics = false,
+ .timestampPeriod = 0.0f,
+ .maxClipDistances = num_user_clip_planes,
+ .maxCullDistances = num_user_clip_planes,
+ .maxCombinedClipAndCullDistances = num_user_clip_planes,
+ .discreteQueuePriorities = 2U,
+ .pointSizeRange[0] = 1.0f,
+ .pointSizeRange[1] = 511.0f,
+ .pointSizeGranularity = 0.0625f,
+ .lineWidthRange[0] = 1.0f / 16.0f,
+ .lineWidthRange[1] = 16.0f,
+ .lineWidthGranularity = 1.0f / 16.0f,
+ .strictLines = false,
+ .standardSampleLocations = true,
+ .optimalBufferCopyOffsetAlignment = 4U,
+ .optimalBufferCopyRowPitchAlignment = 4U,
+ .nonCoherentAtomSize = 1U,
+ };
+
+ pProperties->properties = (VkPhysicalDeviceProperties){
+ .apiVersion = PVR_API_VERSION,
+ .driverVersion = vk_get_driver_version(),
+ .vendorID = VK_VENDOR_ID_IMAGINATION,
+ .deviceID = pdevice->dev_info.ident.device_id,
+ .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
+ .limits = limits,
+ .sparseProperties = { 0 },
+ };
+
+ snprintf(pProperties->properties.deviceName,
+ sizeof(pProperties->properties.deviceName),
+ "%s",
+ pdevice->name);
+
+ memcpy(pProperties->properties.pipelineCacheUUID,
+ pdevice->pipeline_cache_uuid,
+ VK_UUID_SIZE);
+
+ vk_foreach_struct (ext, pProperties->pNext) {
+ pvr_debug_ignored_stype(ext->sType);
+ }
+}
+
+const static VkQueueFamilyProperties pvr_queue_family_properties = {
+ .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT |
+ VK_QUEUE_TRANSFER_BIT,
+ .queueCount = PVR_MAX_QUEUES,
+ .timestampValidBits = 0,
+ .minImageTransferGranularity = { 1, 1, 1 },
+};
+
+void pvr_GetPhysicalDeviceQueueFamilyProperties(
+ VkPhysicalDevice physicalDevice,
+ uint32_t *pCount,
+ VkQueueFamilyProperties *pQueueFamilyProperties)
+{
+ VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pCount);
+
+ vk_outarray_append (&out, p) {
+ *p = pvr_queue_family_properties;
+ }
+}
+
+void pvr_GetPhysicalDeviceQueueFamilyProperties2(
+ VkPhysicalDevice physicalDevice,
+ uint32_t *pQueueFamilyPropertyCount,
+ VkQueueFamilyProperties2 *pQueueFamilyProperties)
+{
+ VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount);
+
+ vk_outarray_append (&out, p) {
+ p->queueFamilyProperties = pvr_queue_family_properties;
+
+ vk_foreach_struct (ext, p->pNext) {
+ pvr_debug_ignored_stype(ext->sType);
+ }
+ }
+}
+
+void pvr_GetPhysicalDeviceMemoryProperties2(
+ VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
+{
+ PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
+
+ pMemoryProperties->memoryProperties = pdevice->memory;
+
+ vk_foreach_struct (ext, pMemoryProperties->pNext) {
+ pvr_debug_ignored_stype(ext->sType);
+ }
+}
+
+PFN_vkVoidFunction pvr_GetInstanceProcAddr(VkInstance _instance,
+ const char *pName)
+{
+ PVR_FROM_HANDLE(pvr_instance, instance, _instance);
+ return vk_instance_get_proc_addr(&instance->vk,
+ &pvr_instance_entrypoints,
+ pName);
+}
+
+/* With version 1+ of the loader interface the ICD should expose
+ * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in
+ * apps.
+ */
+PUBLIC
+VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
+{
+ return pvr_GetInstanceProcAddr(instance, pName);
+}
+
+/* With version 4+ of the loader interface the ICD should expose
+ * vk_icdGetPhysicalDeviceProcAddr().
+ */
+PUBLIC
+VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName)
+{
+ PVR_FROM_HANDLE(pvr_instance, instance, _instance);
+ return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
+}
+
+static VkResult pvr_device_init_compute_pds_program(struct pvr_device *device)
+{
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+ struct pvr_pds_compute_shader_program program = { 0U };
+ size_t staging_buffer_size;
+ uint32_t *staging_buffer;
+ uint32_t *data_buffer;
+ uint32_t *code_buffer;
+ VkResult result;
+
+ STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) ==
+ ARRAY_SIZE(program.work_group_input_regs));
+ STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) ==
+ ARRAY_SIZE(program.global_input_regs));
+
+ /* Initialize PDS structure. */
+ for (uint32_t i = 0U; i < ARRAY_SIZE(program.local_input_regs); i++) {
+ program.local_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
+ program.work_group_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
+ program.global_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
+ }
+
+ program.barrier_coefficient = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
+
+ /* Fence kernel. */
+ program.fence = true;
+ program.clear_pds_barrier = true;
+
+ /* Calculate how much space we'll need for the compute shader PDS program.
+ */
+ pvr_pds_set_sizes_compute_shader(&program, dev_info);
+
+ /* FIXME: Fix the below inconsistency of code size being in bytes whereas
+ * data size being in dwords.
+ */
+ /* Code size is in bytes, data size in dwords. */
+ staging_buffer_size =
+ program.data_size * sizeof(uint32_t) + program.code_size;
+
+ staging_buffer = vk_alloc(&device->vk.alloc,
+ staging_buffer_size,
+ 8U,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!staging_buffer)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ data_buffer = staging_buffer;
+ code_buffer = pvr_pds_generate_compute_shader_data_segment(&program,
+ data_buffer,
+ dev_info);
+ pvr_pds_generate_compute_shader_code_segment(&program,
+ code_buffer,
+ dev_info);
+ result = pvr_gpu_upload_pds(device,
+ data_buffer,
+ program.data_size,
+ PVRX(CDMCTRL_KERNEL1_DATA_ADDR_ALIGNMENT),
+ code_buffer,
+ program.code_size / sizeof(uint32_t),
+ PVRX(CDMCTRL_KERNEL2_CODE_ADDR_ALIGNMENT),
+ cache_line_size,
+ &device->pds_compute_fence_program);
+
+ vk_free(&device->vk.alloc, staging_buffer);
+
+ return result;
+}
+
+/* FIXME: We should be calculating the size when we upload the code in
+ * pvr_srv_setup_static_pixel_event_program().
+ */
+static void pvr_device_get_pixel_event_pds_program_data_size(
+ uint32_t *const data_size_in_dwords_out)
+{
+ struct pvr_pds_event_program program = {
+ /* No data to DMA, just a DOUTU needed. */
+ .num_emit_word_pairs = 0,
+ };
+
+ pvr_pds_set_sizes_pixel_event(&program);
+
+ *data_size_in_dwords_out = program.data_size;
+}
+
+VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice,
+ const VkDeviceCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDevice *pDevice)
+{
+ PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
+ struct pvr_instance *instance = pdevice->instance;
+ struct vk_device_dispatch_table dispatch_table;
+ struct pvr_device *device;
+ VkResult result;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
+
+ device = vk_alloc2(&pdevice->vk.instance->alloc,
+ pAllocator,
+ sizeof(*device),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!device)
+ return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_device_dispatch_table_from_entrypoints(&dispatch_table,
+ &pvr_device_entrypoints,
+ true);
+
+ vk_device_dispatch_table_from_entrypoints(&dispatch_table,
+ &wsi_device_entrypoints,
+ false);
+
+ result = vk_device_init(&device->vk,
+ &pdevice->vk,
+ &dispatch_table,
+ pCreateInfo,
+ pAllocator);
+ if (result != VK_SUCCESS)
+ goto err_free_device;
+
+ device->render_fd = open(pdevice->render_path, O_RDWR | O_CLOEXEC);
+ if (device->render_fd < 0) {
+ result = vk_errorf(instance,
+ VK_ERROR_INITIALIZATION_FAILED,
+ "Failed to open device %s",
+ pdevice->render_path);
+ goto err_vk_device_finish;
+ }
+
+ if (pdevice->master_path)
+ device->master_fd = open(pdevice->master_path, O_RDWR | O_CLOEXEC);
+ else
+ device->master_fd = -1;
+
+ device->instance = instance;
+ device->pdevice = pdevice;
+
+ if (pAllocator)
+ device->vk.alloc = *pAllocator;
+ else
+ device->vk.alloc = pdevice->vk.instance->alloc;
+
+ device->ws = pvr_winsys_create(device->master_fd,
+ device->render_fd,
+ &device->vk.alloc);
+ if (!device->ws) {
+ result = VK_ERROR_INITIALIZATION_FAILED;
+ goto err_close_master_fd;
+ }
+
+ device->ws->ops->get_heaps_info(device->ws, &device->heaps);
+
+ result = pvr_free_list_create(device,
+ PVR_GLOBAL_FREE_LIST_INITIAL_SIZE,
+ PVR_GLOBAL_FREE_LIST_MAX_SIZE,
+ PVR_GLOBAL_FREE_LIST_GROW_SIZE,
+ PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD,
+ NULL /* parent_free_list */,
+ &device->global_free_list);
+ if (result != VK_SUCCESS)
+ goto err_pvr_winsys_destroy;
+
+ result = pvr_queues_create(device, pCreateInfo);
+ if (result != VK_SUCCESS)
+ goto err_pvr_free_list_destroy;
+
+ result = pvr_device_init_compute_pds_program(device);
+ if (result != VK_SUCCESS)
+ goto err_pvr_queues_destroy;
+
+ if (pCreateInfo->pEnabledFeatures)
+ memcpy(&device->features,
+ pCreateInfo->pEnabledFeatures,
+ sizeof(device->features));
+
+ /* FIXME: Move this to a later stage and possibly somewhere other than
+ * pvr_device. The purpose of this is so that we don't have to get the size
+ * on each kick.
+ */
+ pvr_device_get_pixel_event_pds_program_data_size(
+ &device->pixel_event_data_size_in_dwords);
+
+ device->global_queue_job_count = 0;
+ device->global_queue_present_count = 0;
+
+ *pDevice = pvr_device_to_handle(device);
+
+ return VK_SUCCESS;
+
+err_pvr_queues_destroy:
+ pvr_queues_destroy(device);
+
+err_pvr_free_list_destroy:
+ pvr_free_list_destroy(device->global_free_list);
+
+err_pvr_winsys_destroy:
+ pvr_winsys_destroy(device->ws);
+
+err_close_master_fd:
+ if (device->master_fd >= 0)
+ close(device->master_fd);
+
+ close(device->render_fd);
+
+err_vk_device_finish:
+ vk_device_finish(&device->vk);
+
+err_free_device:
+ vk_free(&device->vk.alloc, device);
+
+ return result;
+}
+
+void pvr_DestroyDevice(VkDevice _device,
+ const VkAllocationCallbacks *pAllocator)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+
+ pvr_bo_free(device, device->pds_compute_fence_program.pvr_bo);
+ pvr_queues_destroy(device);
+ pvr_free_list_destroy(device->global_free_list);
+ pvr_winsys_destroy(device->ws);
+ close(device->render_fd);
+ vk_device_finish(&device->vk);
+ vk_free(&device->vk.alloc, device);
+}
+
+VkResult pvr_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
+ VkLayerProperties *pProperties)
+{
+ if (!pProperties) {
+ *pPropertyCount = 0;
+ return VK_SUCCESS;
+ }
+
+ return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
+}
+
+VkResult pvr_AllocateMemory(VkDevice _device,
+ const VkMemoryAllocateInfo *pAllocateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkDeviceMemory *pMem)
+{
+ const VkImportMemoryFdInfoKHR *fd_info = NULL;
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ enum pvr_winsys_bo_type type = PVR_WINSYS_BO_TYPE_GPU;
+ struct pvr_device_memory *mem;
+ VkResult result;
+
+ assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
+ assert(pAllocateInfo->allocationSize > 0);
+
+ mem = vk_object_alloc(&device->vk,
+ pAllocator,
+ sizeof(*mem),
+ VK_OBJECT_TYPE_DEVICE_MEMORY);
+ if (!mem)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_foreach_struct_const (ext, pAllocateInfo->pNext) {
+ switch ((unsigned)ext->sType) {
+ case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
+ type = PVR_WINSYS_BO_TYPE_DISPLAY;
+ break;
+ case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
+ fd_info = (void *)ext;
+ break;
+ default:
+ pvr_debug_ignored_stype(ext->sType);
+ break;
+ }
+ }
+
+ if (fd_info && fd_info->handleType) {
+ VkDeviceSize aligned_alloc_size =
+ ALIGN_POT(pAllocateInfo->allocationSize, device->ws->page_size);
+
+ assert(
+ fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
+ fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
+
+ result = device->ws->ops->buffer_create_from_fd(device->ws,
+ fd_info->fd,
+ &mem->bo);
+ if (result != VK_SUCCESS)
+ goto err_vk_object_free_mem;
+
+ /* For security purposes, we reject importing the bo if it's smaller
+ * than the requested allocation size. This prevents a malicious client
+ * from passing a buffer to a trusted client, lying about the size, and
+ * telling the trusted client to try and texture from an image that goes
+ * out-of-bounds. This sort of thing could lead to GPU hangs or worse
+ * in the trusted client. The trusted client can protect itself against
+ * this sort of attack but only if it can trust the buffer size.
+ */
+ if (aligned_alloc_size > mem->bo->size) {
+ result = vk_errorf(device,
+ VK_ERROR_INVALID_EXTERNAL_HANDLE,
+ "Aligned requested size too large for the given fd "
+ "%" PRIu64 "B > %" PRIu64 "B",
+ pAllocateInfo->allocationSize,
+ mem->bo->size);
+ device->ws->ops->buffer_destroy(mem->bo);
+ goto err_vk_object_free_mem;
+ }
+
+ /* From the Vulkan spec:
+ *
+ * "Importing memory from a file descriptor transfers ownership of
+ * the file descriptor from the application to the Vulkan
+ * implementation. The application must not perform any operations on
+ * the file descriptor after a successful import."
+ *
+ * If the import fails, we leave the file descriptor open.
+ */
+ close(fd_info->fd);
+ } else {
+ /* Align physical allocations to the page size of the heap that will be
+ * used when binding device memory (see pvr_bind_memory()) to ensure the
+ * entire allocation can be mapped.
+ */
+ const uint64_t alignment = device->heaps.general_heap->page_size;
+
+ /* FIXME: Need to determine the flags based on
+ * device->pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags.
+ *
+ * The alternative would be to store the flags alongside the memory
+ * types as an array that's indexed by pAllocateInfo->memoryTypeIndex so
+ * that they can be looked up.
+ */
+ result = device->ws->ops->buffer_create(device->ws,
+ pAllocateInfo->allocationSize,
+ alignment,
+ type,
+ PVR_WINSYS_BO_FLAG_CPU_ACCESS,
+ &mem->bo);
+ if (result != VK_SUCCESS)
+ goto err_vk_object_free_mem;
+ }
+
+ *pMem = pvr_device_memory_to_handle(mem);
+
+ return VK_SUCCESS;
+
+err_vk_object_free_mem:
+ vk_object_free(&device->vk, pAllocator, mem);
+
+ return result;
+}
+
+VkResult pvr_GetMemoryFdKHR(VkDevice _device,
+ const VkMemoryGetFdInfoKHR *pGetFdInfo,
+ int *pFd)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_device_memory, mem, pGetFdInfo->memory);
+
+ assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
+
+ assert(
+ pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
+ pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
+
+ return device->ws->ops->buffer_get_fd(mem->bo, pFd);
+}
+
+VkResult
+pvr_GetMemoryFdPropertiesKHR(VkDevice _device,
+ VkExternalMemoryHandleTypeFlagBits handleType,
+ int fd,
+ VkMemoryFdPropertiesKHR *pMemoryFdProperties)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+
+ switch (handleType) {
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
+ /* FIXME: This should only allow memory types having
+ * VK_MEMORY_PROPERTY_HOST_CACHED_BIT flag set, as
+ * dma-buf should be imported using cacheable memory types,
+ * given exporter's mmap will always map it as cacheable.
+ * Ref:
+ * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops
+ */
+ pMemoryFdProperties->memoryTypeBits =
+ (1 << device->pdevice->memory.memoryTypeCount) - 1;
+ return VK_SUCCESS;
+ default:
+ return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+ }
+}
+
+void pvr_FreeMemory(VkDevice _device,
+ VkDeviceMemory _mem,
+ const VkAllocationCallbacks *pAllocator)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_device_memory, mem, _mem);
+
+ if (!mem)
+ return;
+
+ device->ws->ops->buffer_destroy(mem->bo);
+
+ vk_object_free(&device->vk, pAllocator, mem);
+}
+
+VkResult pvr_MapMemory(VkDevice _device,
+ VkDeviceMemory _memory,
+ VkDeviceSize offset,
+ VkDeviceSize size,
+ VkMemoryMapFlags flags,
+ void **ppData)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
+ void *map;
+
+ if (!mem) {
+ *ppData = NULL;
+ return VK_SUCCESS;
+ }
+
+ if (size == VK_WHOLE_SIZE)
+ size = mem->bo->size - offset;
+
+ /* From the Vulkan spec version 1.0.32 docs for MapMemory:
+ *
+ * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
+ * assert(size != 0);
+ * * If size is not equal to VK_WHOLE_SIZE, size must be less than or
+ * equal to the size of the memory minus offset
+ */
+
+ assert(size > 0);
+ assert(offset + size <= mem->bo->size);
+
+ /* Check if already mapped */
+ if (mem->bo->map) {
+ *ppData = mem->bo->map + offset;
+ return VK_SUCCESS;
+ }
+
+ /* Map it all at once */
+ map = device->ws->ops->buffer_map(mem->bo);
+ if (!map)
+ return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
+
+ *ppData = map + offset;
+
+ return VK_SUCCESS;
+}
+
+void pvr_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
+
+ if (!mem || !mem->bo->map)
+ return;
+
+ device->ws->ops->buffer_unmap(mem->bo);
+}
+
+VkResult pvr_FlushMappedMemoryRanges(VkDevice _device,
+ uint32_t memoryRangeCount,
+ const VkMappedMemoryRange *pMemoryRanges)
+{
+ return VK_SUCCESS;
+}
+
+VkResult
+pvr_InvalidateMappedMemoryRanges(VkDevice _device,
+ uint32_t memoryRangeCount,
+ const VkMappedMemoryRange *pMemoryRanges)
+{
+ return VK_SUCCESS;
+}
+
+void pvr_GetImageSparseMemoryRequirements2(
+ VkDevice device,
+ const VkImageSparseMemoryRequirementsInfo2 *pInfo,
+ uint32_t *pSparseMemoryRequirementCount,
+ VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
+{
+ *pSparseMemoryRequirementCount = 0;
+}
+
+void pvr_GetDeviceMemoryCommitment(VkDevice device,
+ VkDeviceMemory memory,
+ VkDeviceSize *pCommittedMemoryInBytes)
+{
+ *pCommittedMemoryInBytes = 0;
+}
+
+VkResult pvr_bind_memory(struct pvr_device *device,
+ struct pvr_device_memory *mem,
+ VkDeviceSize offset,
+ VkDeviceSize size,
+ VkDeviceSize alignment,
+ struct pvr_winsys_vma **const vma_out,
+ pvr_dev_addr_t *const dev_addr_out)
+{
+ VkDeviceSize virt_size =
+ size + (offset & (device->heaps.general_heap->page_size - 1));
+ struct pvr_winsys_vma *vma;
+ pvr_dev_addr_t dev_addr;
+
+ /* Valid usage:
+ *
+ * "memoryOffset must be an integer multiple of the alignment member of
+ * the VkMemoryRequirements structure returned from a call to
+ * vkGetBufferMemoryRequirements with buffer"
+ *
+ * "memoryOffset must be an integer multiple of the alignment member of
+ * the VkMemoryRequirements structure returned from a call to
+ * vkGetImageMemoryRequirements with image"
+ */
+ assert(offset % alignment == 0);
+ assert(offset < mem->bo->size);
+
+ vma = device->ws->ops->heap_alloc(device->heaps.general_heap,
+ virt_size,
+ alignment);
+ if (!vma)
+ return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+ dev_addr = device->ws->ops->vma_map(vma, mem->bo, offset, size);
+ if (!dev_addr.addr) {
+ device->ws->ops->heap_free(vma);
+ return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ }
+
+ *dev_addr_out = dev_addr;
+ *vma_out = vma;
+
+ return VK_SUCCESS;
+}
+
+void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma)
+{
+ device->ws->ops->vma_unmap(vma);
+ device->ws->ops->heap_free(vma);
+}
+
+VkResult pvr_BindBufferMemory2(VkDevice _device,
+ uint32_t bindInfoCount,
+ const VkBindBufferMemoryInfo *pBindInfos)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ uint32_t i;
+
+ for (i = 0; i < bindInfoCount; i++) {
+ PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory);
+ PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
+
+ VkResult result = pvr_bind_memory(device,
+ mem,
+ pBindInfos[i].memoryOffset,
+ buffer->size,
+ buffer->alignment,
+ &buffer->vma,
+ &buffer->dev_addr);
+ if (result != VK_SUCCESS) {
+ while (i--) {
+ PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
+ pvr_unbind_memory(device, buffer->vma);
+ }
+
+ return result;
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult pvr_QueueBindSparse(VkQueue _queue,
+ uint32_t bindInfoCount,
+ const VkBindSparseInfo *pBindInfo,
+ VkFence fence)
+{
+ return VK_SUCCESS;
+}
+
+/* Event functions. */
+
+VkResult pvr_CreateEvent(VkDevice _device,
+ const VkEventCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkEvent *pEvent)
+{
+ assert(!"Unimplemented");
+ return VK_SUCCESS;
+}
+
+void pvr_DestroyEvent(VkDevice _device,
+ VkEvent _event,
+ const VkAllocationCallbacks *pAllocator)
+{
+ assert(!"Unimplemented");
+}
+
+VkResult pvr_GetEventStatus(VkDevice _device, VkEvent _event)
+{
+ assert(!"Unimplemented");
+ return VK_SUCCESS;
+}
+
+VkResult pvr_SetEvent(VkDevice _device, VkEvent _event)
+{
+ assert(!"Unimplemented");
+ return VK_SUCCESS;
+}
+
+VkResult pvr_ResetEvent(VkDevice _device, VkEvent _event)
+{
+ assert(!"Unimplemented");
+ return VK_SUCCESS;
+}
+
+/* Buffer functions. */
+
+VkResult pvr_CreateBuffer(VkDevice _device,
+ const VkBufferCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkBuffer *pBuffer)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ const uint32_t alignment = 4096;
+ struct pvr_buffer *buffer;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
+ assert(pCreateInfo->usage != 0);
+
+ /* We check against (ULONG_MAX - alignment) to prevent overflow issues */
+ if (pCreateInfo->size >= ULONG_MAX - alignment)
+ return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+
+ buffer = vk_object_zalloc(&device->vk,
+ pAllocator,
+ sizeof(*buffer),
+ VK_OBJECT_TYPE_BUFFER);
+ if (!buffer)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ buffer->size = pCreateInfo->size;
+ buffer->alignment = alignment;
+
+ *pBuffer = pvr_buffer_to_handle(buffer);
+
+ return VK_SUCCESS;
+}
+
+void pvr_DestroyBuffer(VkDevice _device,
+ VkBuffer _buffer,
+ const VkAllocationCallbacks *pAllocator)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_buffer, buffer, _buffer);
+
+ if (!buffer)
+ return;
+
+ pvr_unbind_memory(device, buffer->vma);
+ vk_object_free(&device->vk, pAllocator, buffer);
+}
+
+void pvr_DestroySampler(VkDevice _device,
+ VkSampler _sampler,
+ const VkAllocationCallbacks *pAllocator)
+{
+ assert(!"Unimplemented");
+}
+
+VkResult pvr_gpu_upload(struct pvr_device *device,
+ struct pvr_winsys_heap *heap,
+ const void *data,
+ size_t size,
+ uint64_t alignment,
+ struct pvr_bo **const pvr_bo_out)
+{
+ struct pvr_bo *pvr_bo = NULL;
+ VkResult result;
+
+ assert(size > 0);
+
+ result = pvr_bo_alloc(device,
+ heap,
+ size,
+ alignment,
+ PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+ &pvr_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ memcpy(pvr_bo->bo->map, data, size);
+ pvr_bo_cpu_unmap(device, pvr_bo);
+
+ *pvr_bo_out = pvr_bo;
+
+ return VK_SUCCESS;
+}
+
+VkResult pvr_gpu_upload_usc(struct pvr_device *device,
+ const void *code,
+ size_t code_size,
+ uint64_t code_alignment,
+ struct pvr_bo **const pvr_bo_out)
+{
+ struct pvr_bo *pvr_bo = NULL;
+ VkResult result;
+
+ assert(code_size > 0);
+
+ /* The USC will prefetch the next instruction, so over allocate by 1
+ * instruction to prevent reading off the end of a page into a potentially
+ * unallocated page.
+ */
+ result = pvr_bo_alloc(device,
+ device->heaps.usc_heap,
+ code_size + ROGUE_MAX_INSTR_BYTES,
+ code_alignment,
+ PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+ &pvr_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ memcpy(pvr_bo->bo->map, code, code_size);
+ pvr_bo_cpu_unmap(device, pvr_bo);
+
+ *pvr_bo_out = pvr_bo;
+
+ return VK_SUCCESS;
+}
+
+/**
+ * \brief Upload PDS program data and code segments from host memory to device
+ * memory.
+ *
+ * \param[in] device Logical device pointer.
+ * \param[in] data Pointer to PDS data segment to upload.
+ * \param[in] data_size_dwords Size of PDS data segment in dwords.
+ * \param[in] data_alignment Required alignment of the PDS data segment in
+ * bytes. Must be a power of two.
+ * \param[in] code Pointer to PDS code segment to upload.
+ * \param[in] code_size_dwords Size of PDS code segment in dwords.
+ * \param[in] code_alignment Required alignment of the PDS code segment in
+ * bytes. Must be a power of two.
+ * \param[in] min_alignment Minimum alignment of the bo holding the PDS
+ * program in bytes.
+ * \param[out] pds_upload_out On success will be initialized based on the
+ * uploaded PDS program.
+ * \return VK_SUCCESS on success, or error code otherwise.
+ */
+VkResult pvr_gpu_upload_pds(struct pvr_device *device,
+ const uint32_t *data,
+ uint32_t data_size_dwords,
+ uint32_t data_alignment,
+ const uint32_t *code,
+ uint32_t code_size_dwords,
+ uint32_t code_alignment,
+ uint64_t min_alignment,
+ struct pvr_pds_upload *const pds_upload_out)
+{
+ /* All alignment and sizes below are in bytes. */
+ const size_t data_size = data_size_dwords * sizeof(*data);
+ const size_t code_size = code_size_dwords * sizeof(*code);
+ const uint64_t data_aligned_size = ALIGN_POT(data_size, data_alignment);
+ const uint64_t code_aligned_size = ALIGN_POT(code_size, code_alignment);
+ const uint32_t code_offset = ALIGN_POT(data_aligned_size, code_alignment);
+ const uint64_t bo_alignment = MAX2(min_alignment, data_alignment);
+ const uint64_t bo_size = (!!code) ? (code_offset + code_aligned_size)
+ : data_aligned_size;
+ const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_CPU_MAPPED |
+ PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC;
+ VkResult result;
+
+ assert(code || data);
+ assert(!code || (code_size_dwords != 0 && code_alignment != 0));
+ assert(!data || (data_size_dwords != 0 && data_alignment != 0));
+
+ result = pvr_bo_alloc(device,
+ device->heaps.pds_heap,
+ bo_size,
+ bo_alignment,
+ bo_flags,
+ &pds_upload_out->pvr_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ if (data) {
+ memcpy(pds_upload_out->pvr_bo->bo->map, data, data_size);
+
+ pds_upload_out->data_offset = pds_upload_out->pvr_bo->vma->dev_addr.addr -
+ device->heaps.pds_heap->base_addr.addr;
+
+ /* Store data size in dwords. */
+ assert(data_aligned_size % 4 == 0);
+ pds_upload_out->data_size = data_aligned_size / 4;
+ } else {
+ pds_upload_out->data_offset = 0;
+ pds_upload_out->data_size = 0;
+ }
+
+ if (code) {
+ memcpy((uint8_t *)pds_upload_out->pvr_bo->bo->map + code_offset,
+ code,
+ code_size);
+
+ pds_upload_out->code_offset =
+ (pds_upload_out->pvr_bo->vma->dev_addr.addr + code_offset) -
+ device->heaps.pds_heap->base_addr.addr;
+
+ /* Store code size in dwords. */
+ assert(code_aligned_size % 4 == 0);
+ pds_upload_out->code_size = code_aligned_size / 4;
+ } else {
+ pds_upload_out->code_offset = 0;
+ pds_upload_out->code_size = 0;
+ }
+
+ pvr_bo_cpu_unmap(device, pds_upload_out->pvr_bo);
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+pvr_framebuffer_create_ppp_state(struct pvr_device *device,
+ struct pvr_framebuffer *framebuffer)
+{
+ const uint32_t cache_line_size =
+ rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+ uint32_t ppp_state[3];
+ VkResult result;
+
+ pvr_csb_pack (&ppp_state[0], TA_STATE_HEADER, header) {
+ header.pres_terminate = true;
+ }
+
+ pvr_csb_pack (&ppp_state[1], TA_STATE_TERMINATE0, term0) {
+ term0.clip_right =
+ DIV_ROUND_UP(
+ framebuffer->width,
+ PVRX(TA_STATE_TERMINATE0_CLIP_RIGHT_BLOCK_SIZE_IN_PIXELS)) -
+ 1;
+ term0.clip_bottom =
+ DIV_ROUND_UP(
+ framebuffer->height,
+ PVRX(TA_STATE_TERMINATE0_CLIP_BOTTOM_BLOCK_SIZE_IN_PIXELS)) -
+ 1;
+ }
+
+ pvr_csb_pack (&ppp_state[2], TA_STATE_TERMINATE1, term1) {
+ term1.render_target = 0;
+ term1.clip_left = 0;
+ }
+
+ result = pvr_gpu_upload(device,
+ device->heaps.general_heap,
+ ppp_state,
+ sizeof(ppp_state),
+ cache_line_size,
+ &framebuffer->ppp_state_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* Calculate the size of PPP state in dwords. */
+ framebuffer->ppp_state_size = sizeof(ppp_state) / sizeof(uint32_t);
+
+ return VK_SUCCESS;
+}
+
+static bool pvr_render_targets_init(struct pvr_render_target *render_targets,
+ uint32_t render_targets_count)
+{
+ uint32_t i;
+
+ for (i = 0; i < render_targets_count; i++) {
+ if (pthread_mutex_init(&render_targets[i].mutex, NULL))
+ goto err_mutex_destroy;
+ }
+
+ return true;
+
+err_mutex_destroy:
+ while (i--)
+ pthread_mutex_destroy(&render_targets[i].mutex);
+
+ return false;
+}
+
+static void pvr_render_targets_fini(struct pvr_render_target *render_targets,
+ uint32_t render_targets_count)
+{
+ for (uint32_t i = 0; i < render_targets_count; i++) {
+ if (render_targets[i].valid) {
+ pvr_render_target_dataset_destroy(render_targets[i].rt_dataset);
+ render_targets[i].valid = false;
+ }
+
+ pthread_mutex_destroy(&render_targets[i].mutex);
+ }
+}
+
+VkResult pvr_CreateFramebuffer(VkDevice _device,
+ const VkFramebufferCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkFramebuffer *pFramebuffer)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ struct pvr_render_target *render_targets;
+ struct pvr_framebuffer *framebuffer;
+ struct pvr_image_view **attachments;
+ uint32_t render_targets_count;
+ VkResult result;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
+
+ render_targets_count =
+ PVR_RENDER_TARGETS_PER_FRAMEBUFFER(&device->pdevice->dev_info);
+
+ VK_MULTIALLOC(ma);
+ vk_multialloc_add(&ma, &framebuffer, __typeof__(*framebuffer), 1);
+ vk_multialloc_add(&ma,
+ &attachments,
+ __typeof__(*attachments),
+ pCreateInfo->attachmentCount);
+ vk_multialloc_add(&ma,
+ &render_targets,
+ __typeof__(*render_targets),
+ render_targets_count);
+
+ if (!vk_multialloc_zalloc2(&ma,
+ &device->vk.alloc,
+ pAllocator,
+ VK_OBJECT_TYPE_FRAMEBUFFER))
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ vk_object_base_init(&device->vk,
+ &framebuffer->base,
+ VK_OBJECT_TYPE_FRAMEBUFFER);
+
+ framebuffer->width = pCreateInfo->width;
+ framebuffer->height = pCreateInfo->height;
+ framebuffer->layers = pCreateInfo->layers;
+
+ framebuffer->attachments = attachments;
+ framebuffer->attachment_count = pCreateInfo->attachmentCount;
+ for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
+ framebuffer->attachments[i] =
+ pvr_image_view_from_handle(pCreateInfo->pAttachments[i]);
+ }
+
+ result = pvr_framebuffer_create_ppp_state(device, framebuffer);
+ if (result != VK_SUCCESS)
+ goto err_free_framebuffer;
+
+ framebuffer->render_targets = render_targets;
+ framebuffer->render_targets_count = render_targets_count;
+ if (!pvr_render_targets_init(framebuffer->render_targets,
+ render_targets_count)) {
+ result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto err_free_ppp_state_bo;
+ }
+
+ *pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
+
+ return VK_SUCCESS;
+
+err_free_ppp_state_bo:
+ pvr_bo_free(device, framebuffer->ppp_state_bo);
+
+err_free_framebuffer:
+ vk_object_base_finish(&framebuffer->base);
+ vk_free2(&device->vk.alloc, pAllocator, framebuffer);
+
+ return result;
+}
+
+void pvr_DestroyFramebuffer(VkDevice _device,
+ VkFramebuffer _fb,
+ const VkAllocationCallbacks *pAllocator)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_framebuffer, framebuffer, _fb);
+
+ if (!framebuffer)
+ return;
+
+ pvr_render_targets_fini(framebuffer->render_targets,
+ framebuffer->render_targets_count);
+ pvr_bo_free(device, framebuffer->ppp_state_bo);
+ vk_object_base_finish(&framebuffer->base);
+ vk_free2(&device->vk.alloc, pAllocator, framebuffer);
+}
+
+PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
+vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
+{
+ /* For the full details on loader interface versioning, see
+ * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
+ * What follows is a condensed summary, to help you navigate the large and
+ * confusing official doc.
+ *
+ * - Loader interface v0 is incompatible with later versions. We don't
+ * support it.
+ *
+ * - In loader interface v1:
+ * - The first ICD entrypoint called by the loader is
+ * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
+ * entrypoint.
+ * - The ICD must statically expose no other Vulkan symbol unless it
+ * is linked with -Bsymbolic.
+ * - Each dispatchable Vulkan handle created by the ICD must be
+ * a pointer to a struct whose first member is VK_LOADER_DATA. The
+ * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
+ * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
+ * vkDestroySurfaceKHR(). The ICD must be capable of working with
+ * such loader-managed surfaces.
+ *
+ * - Loader interface v2 differs from v1 in:
+ * - The first ICD entrypoint called by the loader is
+ * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
+ * statically expose this entrypoint.
+ *
+ * - Loader interface v3 differs from v2 in:
+ * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
+ * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
+ * because the loader no longer does so.
+ *
+ * - Loader interface v4 differs from v3 in:
+ * - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
+ */
+ *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
+ return VK_SUCCESS;
+}
+
+VkResult pvr_CreateSampler(VkDevice _device,
+ const VkSamplerCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkSampler *pSampler)
+{
+ assert(!"Unimplemented");
+ return VK_SUCCESS;
+}
+
+void pvr_GetBufferMemoryRequirements2(
+ VkDevice _device,
+ const VkBufferMemoryRequirementsInfo2 *pInfo,
+ VkMemoryRequirements2 *pMemoryRequirements)
+{
+ PVR_FROM_HANDLE(pvr_buffer, buffer, pInfo->buffer);
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+
+ /* The Vulkan 1.0.166 spec says:
+ *
+ * memoryTypeBits is a bitmask and contains one bit set for every
+ * supported memory type for the resource. Bit 'i' is set if and only
+ * if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
+ * structure for the physical device is supported for the resource.
+ *
+ * All types are currently supported for buffers.
+ */
+ pMemoryRequirements->memoryRequirements.memoryTypeBits =
+ (1ul << device->pdevice->memory.memoryTypeCount) - 1;
+
+ pMemoryRequirements->memoryRequirements.alignment = buffer->alignment;
+ pMemoryRequirements->memoryRequirements.size =
+ ALIGN_POT(buffer->size, buffer->alignment);
+}
+
+void pvr_GetDeviceQueue(VkDevice _device,
+ uint32_t queueFamilyIndex,
+ uint32_t queueIndex,
+ VkQueue *pQueue)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+
+ assert(queueFamilyIndex == 0);
+
+ *pQueue = pvr_queue_to_handle(&device->queues[queueIndex]);
+}
+
+void pvr_GetImageMemoryRequirements2(VkDevice _device,
+ const VkImageMemoryRequirementsInfo2 *pInfo,
+ VkMemoryRequirements2 *pMemoryRequirements)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_image, image, pInfo->image);
+
+ /* The Vulkan 1.0.166 spec says:
+ *
+ * memoryTypeBits is a bitmask and contains one bit set for every
+ * supported memory type for the resource. Bit 'i' is set if and only
+ * if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
+ * structure for the physical device is supported for the resource.
+ *
+ * All types are currently supported for images.
+ */
+ const uint32_t memory_types =
+ (1ul << device->pdevice->memory.memoryTypeCount) - 1;
+
+ /* TODO: The returned size is aligned here in case of arrays/CEM (as is done
+ * in GetImageMemoryRequirements()), but this should be known at image
+ * creation time (pCreateInfo->arrayLayers > 1). This is confirmed in
+ * ImageCreate()/ImageGetMipMapOffsetInBytes() where it aligns the size to
+ * 4096 if pCreateInfo->arrayLayers > 1. So is the alignment here actually
+ * necessary? If not, what should it be when pCreateInfo->arrayLayers == 1?
+ *
+ * Note: Presumably the 4096 alignment requirement comes from the Vulkan
+ * driver setting RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN when setting up
+ * render and compute jobs.
+ */
+ pMemoryRequirements->memoryRequirements.alignment = image->alignment;
+ pMemoryRequirements->memoryRequirements.size =
+ ALIGN(image->size, image->alignment);
+ pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+
+#include "pvr_formats.h"
+#include "pvr_private.h"
+#include "vk_format.h"
+#include "vk_log.h"
+#include "vk_util.h"
+
+#define FORMAT(vk, tex_fmt, pack_mode) \
+ [VK_FORMAT_##vk] = { \
+ .vk_format = VK_FORMAT_##vk, \
+ .tex_format = ROGUE_TEXSTATE_FORMAT_##tex_fmt, \
+ .pbe_packmode = ROGUE_PBESTATE_PACKMODE_##pack_mode, \
+ .supported = true, \
+ }
+
+struct pvr_format {
+ VkFormat vk_format;
+ uint32_t tex_format;
+ uint32_t pbe_packmode;
+ bool supported;
+};
+
+/* TODO: add all supported core formats */
+static const struct pvr_format pvr_format_table[] = {
+ FORMAT(B8G8R8A8_UNORM, U8U8U8U8, U8U8U8U8),
+ FORMAT(D32_SFLOAT, F32, F32),
+};
+
+#undef FORMAT
+
+static inline const struct pvr_format *pvr_get_format(VkFormat vk_format)
+{
+ if (vk_format < ARRAY_SIZE(pvr_format_table) &&
+ pvr_format_table[vk_format].supported) {
+ return &pvr_format_table[vk_format];
+ }
+
+ return NULL;
+}
+
+uint32_t pvr_get_tex_format(VkFormat vk_format)
+{
+ const struct pvr_format *pvr_format = pvr_get_format(vk_format);
+ if (pvr_format) {
+ return pvr_format->tex_format;
+ }
+
+ return ROGUE_TEXSTATE_FORMAT_INVALID;
+}
+
+uint32_t pvr_get_pbe_packmode(VkFormat vk_format)
+{
+ const struct pvr_format *pvr_format = pvr_get_format(vk_format);
+ if (pvr_format)
+ return pvr_format->pbe_packmode;
+
+ return ROGUE_PBESTATE_PACKMODE_INVALID;
+}
+
+static VkFormatFeatureFlags
+pvr_get_image_format_features(const struct pvr_format *pvr_format,
+ VkImageTiling vk_tiling)
+{
+ VkFormatFeatureFlags flags = 0;
+ VkImageAspectFlags aspects;
+
+ if (!pvr_format)
+ return 0;
+
+ aspects = vk_format_aspects(pvr_format->vk_format);
+ if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+ flags |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT |
+ VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
+ VK_FORMAT_FEATURE_BLIT_SRC_BIT;
+ }
+
+ return flags;
+}
+
+const uint8_t *pvr_get_format_swizzle(VkFormat vk_format)
+{
+ const struct util_format_description *vf = vk_format_description(vk_format);
+ static const uint8_t fallback[] = { PIPE_SWIZZLE_X,
+ PIPE_SWIZZLE_Y,
+ PIPE_SWIZZLE_Z,
+ PIPE_SWIZZLE_W };
+
+ if (vf)
+ return vf->swizzle;
+
+ assert(!"Unsupported format");
+ return fallback;
+}
+
+static VkFormatFeatureFlags
+pvr_get_buffer_format_features(const struct pvr_format *pvr_format)
+{
+ VkFormatFeatureFlags flags = 0;
+
+ if (!pvr_format)
+ return 0;
+
+ return flags;
+}
+
+void pvr_GetPhysicalDeviceFormatProperties2(
+ VkPhysicalDevice physicalDevice,
+ VkFormat format,
+ VkFormatProperties2 *pFormatProperties)
+{
+ const struct pvr_format *pvr_format = pvr_get_format(format);
+
+ pFormatProperties->formatProperties = (VkFormatProperties){
+ .linearTilingFeatures =
+ pvr_get_image_format_features(pvr_format, VK_IMAGE_TILING_LINEAR),
+ .optimalTilingFeatures =
+ pvr_get_image_format_features(pvr_format, VK_IMAGE_TILING_OPTIMAL),
+ .bufferFeatures = pvr_get_buffer_format_features(pvr_format),
+ };
+
+ vk_foreach_struct (ext, pFormatProperties->pNext) {
+ pvr_debug_ignored_stype(ext->sType);
+ }
+}
+
+static VkResult
+pvr_get_image_format_properties(struct pvr_physical_device *pdevice,
+ const VkPhysicalDeviceImageFormatInfo2 *info,
+ VkImageFormatProperties *pImageFormatProperties)
+{
+ assert(!"Unimplemented");
+ return VK_SUCCESS;
+}
+
+VkResult pvr_GetPhysicalDeviceImageFormatProperties2(
+ VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo,
+ VkImageFormatProperties2 *pImageFormatProperties)
+{
+ const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
+ PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
+ VkExternalImageFormatProperties *external_props = NULL;
+ VkResult result;
+
+ result = pvr_get_image_format_properties(
+ pdevice,
+ pImageFormatInfo,
+ &pImageFormatProperties->imageFormatProperties);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* Extract input structs */
+ vk_foreach_struct_const (ext, pImageFormatInfo->pNext) {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO:
+ external_info = (const void *)ext;
+ break;
+ default:
+ pvr_debug_ignored_stype(ext->sType);
+ break;
+ }
+ }
+
+ /* Extract output structs */
+ vk_foreach_struct (ext, pImageFormatProperties->pNext) {
+ switch (ext->sType) {
+ case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES:
+ external_props = (void *)ext;
+ break;
+ default:
+ pvr_debug_ignored_stype(ext->sType);
+ break;
+ }
+ }
+
+ /* From the Vulkan 1.0.42 spec:
+ *
+ * If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will
+ * behave as if VkPhysicalDeviceExternalImageFormatInfo was not
+ * present and VkExternalImageFormatProperties will be ignored.
+ */
+ if (external_info && external_info->handleType != 0) {
+ switch (external_info->handleType) {
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
+ if (!external_props)
+ break;
+
+ external_props->externalMemoryProperties.externalMemoryFeatures =
+ VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
+ VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+ external_props->externalMemoryProperties.compatibleHandleTypes =
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+ external_props->externalMemoryProperties.exportFromImportedHandleTypes =
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+ break;
+ default:
+ return vk_error(pdevice, VK_ERROR_FORMAT_NOT_SUPPORTED);
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+void pvr_GetPhysicalDeviceSparseImageFormatProperties(
+ VkPhysicalDevice physicalDevice,
+ VkFormat format,
+ VkImageType type,
+ uint32_t samples,
+ VkImageUsageFlags usage,
+ VkImageTiling tiling,
+ uint32_t *pNumProperties,
+ VkSparseImageFormatProperties *pProperties)
+{
+ /* Sparse images are not yet supported. */
+ *pNumProperties = 0;
+}
+
+void pvr_GetPhysicalDeviceSparseImageFormatProperties2(
+ VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
+ uint32_t *pPropertyCount,
+ VkSparseImageFormatProperties2 *pProperties)
+{
+ /* Sparse images are not yet supported. */
+ *pPropertyCount = 0;
+}
+
+void pvr_GetPhysicalDeviceExternalBufferProperties(
+ VkPhysicalDevice physicalDevice,
+ const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
+ VkExternalBufferProperties *pExternalBufferProperties)
+{
+ /* The Vulkan 1.0.42 spec says "handleType must be a valid
+ * VkExternalMemoryHandleTypeFlagBits value" in
+ * VkPhysicalDeviceExternalBufferInfo. This differs from
+ * VkPhysicalDeviceExternalImageFormatInfo, which surprisingly permits
+ * handleType == 0.
+ */
+ assert(pExternalBufferInfo->handleType != 0);
+
+ /* All of the current flags are for sparse which we don't support. */
+ if (pExternalBufferInfo->flags)
+ goto unsupported;
+
+ switch (pExternalBufferInfo->handleType) {
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
+ case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
+ /* clang-format off */
+ pExternalBufferProperties->externalMemoryProperties.externalMemoryFeatures =
+ VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
+ VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+ pExternalBufferProperties->externalMemoryProperties.exportFromImportedHandleTypes =
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+ pExternalBufferProperties->externalMemoryProperties.compatibleHandleTypes =
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
+ VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+ /* clang-format on */
+ return;
+ default:
+ break;
+ }
+
+unsupported:
+ /* From the Vulkan 1.1.113 spec:
+ *
+ * compatibleHandleTypes must include at least handleType.
+ */
+ pExternalBufferProperties->externalMemoryProperties =
+ (VkExternalMemoryProperties){
+ .compatibleHandleTypes = pExternalBufferInfo->handleType,
+ };
+}
+
+bool pvr_format_is_pbe_downscalable(VkFormat vk_format)
+{
+ if (vk_format_is_pure_integer(vk_format)) {
+ /* PBE downscale behavior for integer formats does not match Vulkan
+ * spec. Vulkan requires a single sample to be chosen instead of
+ * taking the average sample color.
+ */
+ return false;
+ }
+
+ switch (pvr_get_pbe_packmode(vk_format)) {
+ default:
+ return true;
+
+ case ROGUE_PBESTATE_PACKMODE_U16U16U16U16:
+ case ROGUE_PBESTATE_PACKMODE_S16S16S16S16:
+ case ROGUE_PBESTATE_PACKMODE_U32U32U32U32:
+ case ROGUE_PBESTATE_PACKMODE_S32S32S32S32:
+ case ROGUE_PBESTATE_PACKMODE_F32F32F32F32:
+ case ROGUE_PBESTATE_PACKMODE_U16U16U16:
+ case ROGUE_PBESTATE_PACKMODE_S16S16S16:
+ case ROGUE_PBESTATE_PACKMODE_U32U32U32:
+ case ROGUE_PBESTATE_PACKMODE_S32S32S32:
+ case ROGUE_PBESTATE_PACKMODE_F32F32F32:
+ case ROGUE_PBESTATE_PACKMODE_U16U16:
+ case ROGUE_PBESTATE_PACKMODE_S16S16:
+ case ROGUE_PBESTATE_PACKMODE_U32U32:
+ case ROGUE_PBESTATE_PACKMODE_S32S32:
+ case ROGUE_PBESTATE_PACKMODE_F32F32:
+ case ROGUE_PBESTATE_PACKMODE_U24ST8:
+ case ROGUE_PBESTATE_PACKMODE_ST8U24:
+ case ROGUE_PBESTATE_PACKMODE_U16:
+ case ROGUE_PBESTATE_PACKMODE_S16:
+ case ROGUE_PBESTATE_PACKMODE_U32:
+ case ROGUE_PBESTATE_PACKMODE_S32:
+ case ROGUE_PBESTATE_PACKMODE_F32:
+ case ROGUE_PBESTATE_PACKMODE_X24U8F32:
+ case ROGUE_PBESTATE_PACKMODE_X24X8F32:
+ case ROGUE_PBESTATE_PACKMODE_X24G8X32:
+ case ROGUE_PBESTATE_PACKMODE_X8U24:
+ case ROGUE_PBESTATE_PACKMODE_U8X24:
+ case ROGUE_PBESTATE_PACKMODE_PBYTE:
+ case ROGUE_PBESTATE_PACKMODE_PWORD:
+ case ROGUE_PBESTATE_PACKMODE_INVALID:
+ return false;
+ }
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_FORMATS_H
+#define PVR_FORMATS_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+const uint8_t *pvr_get_format_swizzle(VkFormat vk_format);
+uint32_t pvr_get_tex_format(VkFormat vk_format);
+uint32_t pvr_get_pbe_packmode(VkFormat vk_format);
+bool pvr_format_is_pbe_downscalable(VkFormat vk_format);
+
+#endif /* PVR_FORMATS_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+
+#include "pvr_hw_pass.h"
+#include "pvr_private.h"
+#include "vk_alloc.h"
+
+void pvr_destroy_renderpass_hwsetup(struct pvr_device *device,
+ struct pvr_renderpass_hwsetup *hw_setup)
+{
+ vk_free(&device->vk.alloc, hw_setup);
+}
+
+struct pvr_renderpass_hwsetup *
+pvr_create_renderpass_hwsetup(struct pvr_device *device,
+ struct pvr_render_pass *pass,
+ bool disable_merge)
+{
+ struct pvr_renderpass_hwsetup_eot_surface *eot_surface;
+ enum pvr_renderpass_surface_initop *color_initops;
+ struct pvr_renderpass_hwsetup_subpass *subpasses;
+ struct pvr_renderpass_hwsetup_render *renders;
+ struct pvr_renderpass_colorinit *color_inits;
+ struct pvr_renderpass_hwsetup *hw_setup;
+ struct pvr_renderpass_hw_map *subpass_map;
+ struct usc_mrt_resource *mrt_resources;
+
+ VK_MULTIALLOC(ma);
+ vk_multialloc_add(&ma, &hw_setup, __typeof__(*hw_setup), 1);
+ vk_multialloc_add(&ma, &renders, __typeof__(*renders), 1);
+ vk_multialloc_add(&ma, &color_inits, __typeof__(*color_inits), 1);
+ vk_multialloc_add(&ma, &subpass_map, __typeof__(*subpass_map), 1);
+ vk_multialloc_add(&ma, &mrt_resources, __typeof__(*mrt_resources), 2);
+ vk_multialloc_add(&ma, &subpasses, __typeof__(*subpasses), 1);
+ vk_multialloc_add(&ma, &eot_surface, __typeof__(*eot_surface), 1);
+ vk_multialloc_add(&ma,
+ &color_initops,
+ __typeof__(*color_initops),
+ pass->subpasses[0].color_count);
+ /* Note, no more multialloc slots available (maximum supported is 8). */
+
+ if (!vk_multialloc_zalloc(&ma,
+ &device->vk.alloc,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) {
+ return NULL;
+ }
+
+ /* FIXME: Remove hardcoding of hw_setup structure. */
+ subpasses[0].z_replicate = -1;
+ subpasses[0].depth_initop = RENDERPASS_SURFACE_INITOP_CLEAR;
+ subpasses[0].stencil_clear = false;
+ subpasses[0].driver_id = 0;
+ color_initops[0] = RENDERPASS_SURFACE_INITOP_NOP;
+ subpasses[0].color_initops = color_initops;
+ subpasses[0].client_data = NULL;
+ renders[0].subpass_count = 1;
+ renders[0].subpasses = subpasses;
+
+ renders[0].sample_count = 1;
+ renders[0].ds_surface_id = 1;
+ renders[0].depth_init = RENDERPASS_SURFACE_INITOP_CLEAR;
+ renders[0].stencil_init = RENDERPASS_SURFACE_INITOP_NOP;
+
+ mrt_resources[0].type = USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER;
+ mrt_resources[0].u.reg.out_reg = 0;
+ mrt_resources[0].u.reg.offset = 0;
+ renders[0].init_setup.render_targets_count = 1;
+ renders[0].init_setup.mrt_resources = &mrt_resources[0];
+
+ color_inits[0].op = RENDERPASS_SURFACE_INITOP_CLEAR;
+ color_inits[0].driver_id = 0;
+ renders[0].color_init_count = 1;
+ renders[0].color_init = color_inits;
+
+ mrt_resources[1].type = USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER;
+ mrt_resources[1].u.reg.out_reg = 0;
+ mrt_resources[1].u.reg.offset = 0;
+ renders[0].eot_setup.render_targets_count = 1;
+ renders[0].eot_setup.mrt_resources = &mrt_resources[1];
+
+ eot_surface->mrt_index = 0;
+ eot_surface->attachment_index = 0;
+ eot_surface->need_resolve = false;
+ eot_surface->resolve_type = PVR_RESOLVE_TYPE_INVALID;
+ eot_surface->src_attachment_index = 0;
+ renders[0].eot_surfaces = eot_surface;
+ renders[0].eot_surface_count = 1;
+
+ renders[0].output_regs_count = 1;
+ renders[0].tile_buffers_count = 0;
+ renders[0].client_data = NULL;
+ hw_setup->render_count = 1;
+ hw_setup->renders = renders;
+
+ subpass_map->render = 0;
+ subpass_map->subpass = 0;
+ hw_setup->subpass_map = subpass_map;
+
+ return hw_setup;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_HW_PASS_H
+#define PVR_HW_PASS_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct pvr_device;
+struct pvr_render_pass;
+
+enum pvr_renderpass_surface_initop {
+ RENDERPASS_SURFACE_INITOP_CLEAR,
+ RENDERPASS_SURFACE_INITOP_LOAD,
+ RENDERPASS_SURFACE_INITOP_NOP,
+};
+
+struct pvr_renderpass_hwsetup_subpass {
+ /* If >=0 then copy the depth into this pixel output for all fragment
+ * programs in the subpass.
+ */
+ int32_t z_replicate;
+
+ /* The operation to perform on the depth at the start of the subpass. Loads
+ * are deferred to subpasses when depth has been replicated
+ */
+ enum pvr_renderpass_surface_initop depth_initop;
+
+ /* If true then clear the stencil at the start of the subpass. */
+ bool stencil_clear;
+
+ /* Driver Id from the input pvr_render_subpass structure. */
+ uint32_t driver_id;
+
+ /* For each color attachment to the subpass: the operation to perform at
+ * the start of the subpass.
+ */
+ enum pvr_renderpass_surface_initop *color_initops;
+
+ void *client_data;
+};
+
+struct pvr_renderpass_colorinit {
+ /* Source surface for the operation. */
+ uint32_t driver_id;
+
+ /* Type of operation: either clear or load. */
+ enum pvr_renderpass_surface_initop op;
+};
+
+/* FIXME: Adding these USC enums and structures here for now to avoid adding
+ * usc.h header. Needs to be moved to compiler specific header.
+ */
+/* Specifies the location of render target writes. */
+enum usc_mrt_resource_type {
+ USC_MRT_RESOURCE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */
+ USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER,
+ USC_MRT_RESOURCE_TYPE_MEMORY,
+};
+
+struct usc_mrt_resource {
+ /* Resource type allocated for render target. */
+ enum usc_mrt_resource_type type;
+
+ union {
+ /* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER. */
+ struct {
+ /* The output register to use. */
+ uint32_t out_reg;
+
+ /* The offset in bytes into the output register. */
+ uint32_t offset;
+ } reg;
+
+ /* If type == USC_MRT_RESOURCE_TYPE_MEMORY. */
+ struct {
+ /* The number of the tile buffer to use. */
+ uint32_t tile_buffer;
+
+ /* The offset in dwords within the tile buffer. */
+ uint32_t offset_in_dwords;
+ } mem;
+ } u;
+};
+
+struct usc_mrt_setup {
+ /* Number of render targets present. */
+ uint32_t render_targets_count;
+
+ /* Array of MRT resources allocated for each render target. The number of
+ * elements is determined by usc_mrt_setup::render_targets_count.
+ */
+ struct usc_mrt_resource *mrt_resources;
+};
+
+enum pvr_resolve_type {
+ PVR_RESOLVE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */
+ PVR_RESOLVE_TYPE_PBE,
+ PVR_RESOLVE_TYPE_TRANSFER,
+};
+
+struct pvr_renderpass_hwsetup_eot_surface {
+ /* MRT index to store from. Also used to index into
+ * usc_mrt_setup::mrt_resources.
+ */
+ uint32_t mrt_index;
+
+ /* Index of pvr_render_pass_info::attachments to store into. */
+ uint32_t attachment_index;
+
+ /* True if the surface should be resolved. */
+ bool need_resolve;
+
+ /* How the surface should be resolved at the end of a render. Only valid if
+ * pvr_renderpass_hwsetup_eot_surface::need_resolve is set to true.
+ */
+ enum pvr_resolve_type resolve_type;
+
+ /* Index of pvr_render_pass_info::attachments to resolve from. Only valid if
+ * pvr_renderpass_hwsetup_eot_surface::need_resolve is set to true.
+ */
+ uint32_t src_attachment_index;
+};
+
+struct pvr_renderpass_hwsetup_render {
+ /* Number of pixel output registers to allocate for this render. */
+ uint32_t output_regs_count;
+
+ /* Number of tile buffers to allocate for this render. */
+ uint32_t tile_buffers_count;
+
+ /* Number of subpasses in this render. */
+ uint32_t subpass_count;
+
+ /* Description of each subpass. */
+ struct pvr_renderpass_hwsetup_subpass *subpasses;
+
+ /* The sample count of every color attachment (or depth attachment if
+ * z-only) in this render
+ */
+ uint32_t sample_count;
+
+ /* Driver Id for the surface to use for depth/stencil load/store in this
+ * render.
+ */
+ int32_t ds_surface_id;
+
+ /* Operation on the on-chip depth at the start of the render.
+ * Either load from 'ds_surface_id', clear using 'ds_surface_id' or leave
+ * uninitialized.
+ */
+ enum pvr_renderpass_surface_initop depth_init;
+
+ /* Operation on the on-chip stencil at the start of the render. */
+ enum pvr_renderpass_surface_initop stencil_init;
+
+ /* For each operation: the destination in the on-chip color storage. */
+ struct usc_mrt_setup init_setup;
+
+ /* Count of operations on on-chip color storage at the start of the render.
+ */
+ uint32_t color_init_count;
+
+ /* How to initialize render targets at the start of the render. */
+ struct pvr_renderpass_colorinit *color_init;
+
+ /* Describes the location of the source data for each stored surface. */
+ struct usc_mrt_setup eot_setup;
+
+ struct pvr_renderpass_hwsetup_eot_surface *eot_surfaces;
+ uint32_t eot_surface_count;
+
+ void *client_data;
+};
+
+struct pvr_renderpass_hw_map {
+ uint32_t render;
+ uint32_t subpass;
+};
+
+struct pvr_renderpass_hwsetup {
+ /* Number of renders. */
+ uint32_t render_count;
+
+ /* Description of each render. */
+ struct pvr_renderpass_hwsetup_render *renders;
+
+ /* Maps indices from pvr_render_pass::subpasses to the
+ * pvr_renderpass_hwsetup_render/pvr_renderpass_hwsetup_subpass relative to
+ * that render where the subpass is scheduled.
+ */
+ struct pvr_renderpass_hw_map *subpass_map;
+};
+
+struct pvr_renderpass_hwsetup *
+pvr_create_renderpass_hwsetup(struct pvr_device *device,
+ struct pvr_render_pass *pass,
+ bool disable_merge);
+void pvr_destroy_renderpass_hwsetup(struct pvr_device *device,
+ struct pvr_renderpass_hwsetup *hw_setup);
+
+#endif /* PVR_HW_PASS_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+
+#include "pvr_csb.h"
+#include "pvr_device_info.h"
+#include "pvr_formats.h"
+#include "pvr_private.h"
+#include "pvr_tex_state.h"
+#include "util/macros.h"
+#include "util/u_math.h"
+#include "vk_format.h"
+#include "vk_image.h"
+#include "vk_log.h"
+#include "vk_util.h"
+#include "wsi_common.h"
+
+static void pvr_image_init_memlayout(struct pvr_image *image)
+{
+ switch (image->vk.tiling) {
+ default:
+ unreachable("bad VkImageTiling");
+ case VK_IMAGE_TILING_OPTIMAL:
+ if (image->vk.wsi_legacy_scanout)
+ image->memlayout = PVR_MEMLAYOUT_LINEAR;
+ else if (image->vk.image_type == VK_IMAGE_TYPE_3D)
+ image->memlayout = PVR_MEMLAYOUT_3DTWIDDLED;
+ else
+ image->memlayout = PVR_MEMLAYOUT_TWIDDLED;
+ break;
+ case VK_IMAGE_TILING_LINEAR:
+ image->memlayout = PVR_MEMLAYOUT_LINEAR;
+ break;
+ }
+}
+
+static void pvr_image_init_physical_extent(struct pvr_image *image)
+{
+ assert(image->memlayout != PVR_MEMLAYOUT_UNDEFINED);
+
+ /* clang-format off */
+ if (image->vk.mip_levels > 1 ||
+ image->memlayout == PVR_MEMLAYOUT_TWIDDLED ||
+ image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED) {
+ /* clang-format on */
+ image->physical_extent.width =
+ util_next_power_of_two(image->vk.extent.width);
+ image->physical_extent.height =
+ util_next_power_of_two(image->vk.extent.height);
+ image->physical_extent.depth =
+ util_next_power_of_two(image->vk.extent.depth);
+ } else {
+ assert(image->memlayout == PVR_MEMLAYOUT_LINEAR);
+ image->physical_extent = image->vk.extent;
+ }
+}
+
+static void pvr_image_setup_mip_levels(struct pvr_image *image)
+{
+ const uint32_t extent_alignment =
+ image->vk.image_type == VK_IMAGE_TYPE_3D ? 4 : 1;
+ const unsigned int cpp = vk_format_get_blocksize(image->vk.format);
+
+ /* Mip-mapped textures that are non-dword aligned need dword-aligned levels
+ * so they can be TQd from.
+ */
+ const uint32_t level_alignment = image->vk.mip_levels > 1 ? 4 : 1;
+
+ assert(image->vk.mip_levels <= ARRAY_SIZE(image->mip_levels));
+
+ image->layer_size = 0;
+
+ for (uint32_t i = 0; i < image->vk.mip_levels; i++) {
+ const uint32_t height = u_minify(image->physical_extent.height, i);
+ const uint32_t width = u_minify(image->physical_extent.width, i);
+ const uint32_t depth = u_minify(image->physical_extent.depth, i);
+ struct pvr_mip_level *mip_level = &image->mip_levels[i];
+
+ mip_level->pitch = cpp * ALIGN(width, extent_alignment);
+ mip_level->height_pitch = ALIGN(height, extent_alignment);
+ mip_level->size = image->vk.samples * mip_level->pitch *
+ mip_level->height_pitch *
+ ALIGN(depth, extent_alignment);
+ mip_level->size = ALIGN(mip_level->size, level_alignment);
+ mip_level->offset = image->layer_size;
+
+ image->layer_size += mip_level->size;
+ }
+
+ /* TODO: It might be useful to store the alignment in the image so it can be
+ * checked (via an assert?) when setting
+ * RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN, assuming this is where the
+ * requirement comes from.
+ */
+ if (image->vk.array_layers > 1)
+ image->layer_size = ALIGN(image->layer_size, image->alignment);
+
+ image->size = image->layer_size * image->vk.array_layers;
+}
+
+VkResult pvr_CreateImage(VkDevice _device,
+ const VkImageCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkImage *pImage)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ struct pvr_image *image;
+
+ pvr_finishme("Review whether all inputs are handled\n");
+
+ image =
+ vk_image_create(&device->vk, pCreateInfo, pAllocator, sizeof(*image));
+ if (!image)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* All images aligned to 4k, in case of arrays/CEM.
+ * Refer: pvr_GetImageMemoryRequirements for further details.
+ */
+ image->alignment = 4096U;
+
+ /* Initialize the image using the saved information from pCreateInfo */
+ pvr_image_init_memlayout(image);
+ pvr_image_init_physical_extent(image);
+ pvr_image_setup_mip_levels(image);
+
+ *pImage = pvr_image_to_handle(image);
+
+ return VK_SUCCESS;
+}
+
+void pvr_DestroyImage(VkDevice _device,
+ VkImage _image,
+ const VkAllocationCallbacks *pAllocator)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_image, image, _image);
+
+ if (!image)
+ return;
+
+ pvr_unbind_memory(device, image->vma);
+ vk_image_destroy(&device->vk, pAllocator, &image->vk);
+}
+
+/* clang-format off */
+/* Consider a 4 page buffer object.
+ * _________________________________________
+ * | | | | |
+ * |_________|__________|_________|__________|
+ * |
+ * \__ offset (0.5 page size)
+ *
+ * |___size(2 pages)____|
+ *
+ * |__VMA size required (3 pages)__|
+ *
+ * |
+ * \__ returned dev_addr = vma + offset % page_size
+ *
+ * VMA size = align(size + offset % page_size, page_size);
+ *
+ * Note: the above handling is currently divided between generic
+ * driver code and winsys layer. Given are the details of how this is
+ * being handled.
+ * * As winsys vma allocation interface does not have offset information,
+ * it can not calculate the extra size needed to adjust for the unaligned
+ * offset. So generic code is responsible for allocating a VMA that has
+ * extra space to deal with the above scenario.
+ * * Remaining work of mapping the vma to bo is done by vma_map interface,
+ * as it contains offset information, we don't need to do any adjustments
+ * in the generic code for this part.
+ *
+ * TODO: Look into merging heap_alloc and vma_map into single interface.
+ */
+/* clang-format on */
+
+VkResult pvr_BindImageMemory2(VkDevice _device,
+ uint32_t bindInfoCount,
+ const VkBindImageMemoryInfo *pBindInfos)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ uint32_t i;
+
+ for (i = 0; i < bindInfoCount; i++) {
+ PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory);
+ PVR_FROM_HANDLE(pvr_image, image, pBindInfos[i].image);
+
+ VkResult result = pvr_bind_memory(device,
+ mem,
+ pBindInfos[i].memoryOffset,
+ image->size,
+ image->alignment,
+ &image->vma,
+ &image->dev_addr);
+ if (result != VK_SUCCESS) {
+ while (i--) {
+ PVR_FROM_HANDLE(pvr_image, image, pBindInfos[i].image);
+
+ pvr_unbind_memory(device, image->vma);
+ }
+
+ return result;
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+void pvr_GetImageSubresourceLayout(VkDevice device,
+ VkImage _image,
+ const VkImageSubresource *subresource,
+ VkSubresourceLayout *layout)
+{
+ PVR_FROM_HANDLE(pvr_image, image, _image);
+ const struct pvr_mip_level *mip_level =
+ &image->mip_levels[subresource->mipLevel];
+
+ pvr_assert(subresource->mipLevel < image->vk.mip_levels);
+ pvr_assert(subresource->arrayLayer < image->vk.array_layers);
+
+ layout->offset =
+ subresource->arrayLayer * image->layer_size + mip_level->offset;
+ layout->rowPitch = mip_level->pitch;
+ layout->depthPitch = mip_level->pitch * mip_level->height_pitch;
+ layout->arrayPitch = image->layer_size;
+ layout->size = mip_level->size;
+}
+
+VkResult pvr_CreateImageView(VkDevice _device,
+ const VkImageViewCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkImageView *pView)
+{
+ PVR_FROM_HANDLE(pvr_image, image, pCreateInfo->image);
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ struct pvr_texture_state_info info;
+ unsigned char input_swizzle[4];
+ const uint8_t *format_swizzle;
+ struct pvr_image_view *iview;
+ VkResult result;
+
+ iview = vk_image_view_create(&device->vk,
+ pCreateInfo,
+ pAllocator,
+ sizeof(*iview));
+ if (!iview)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ iview->image = image;
+
+ info.type = iview->vk.view_type;
+ info.base_level = iview->vk.base_mip_level;
+ info.mip_levels = iview->vk.level_count;
+ info.extent = image->vk.extent;
+ info.is_cube = (info.type == VK_IMAGE_VIEW_TYPE_CUBE ||
+ info.type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY);
+ info.array_size = iview->vk.layer_count;
+ info.offset = iview->vk.base_array_layer * image->layer_size +
+ image->mip_levels[info.base_level].offset;
+ info.mipmaps_present = (image->vk.mip_levels > 1) ? true : false;
+ info.stride = image->physical_extent.width - 1;
+ info.tex_state_type = PVR_TEXTURE_STATE_SAMPLE;
+ info.mem_layout = image->memlayout;
+ info.flags = 0;
+ info.sample_count = image->vk.samples;
+ info.addr = image->dev_addr;
+
+ /* TODO: if ERN_46863 is supported, Depth and stencil are sampled separately
+ * from images with combined depth+stencil. Add logic here to handle it.
+ */
+ info.format = iview->vk.format;
+
+ vk_component_mapping_to_pipe_swizzle(iview->vk.swizzle, input_swizzle);
+ format_swizzle = pvr_get_format_swizzle(info.format);
+ util_format_compose_swizzles(format_swizzle, input_swizzle, info.swizzle);
+
+ result = pvr_pack_tex_state(device,
+ &info,
+ iview->texture_state[info.tex_state_type]);
+ if (result != VK_SUCCESS)
+ goto err_vk_image_view_destroy;
+
+ /* Create an additional texture state for cube type if storage
+ * usage flat is set.
+ */
+ if (info.is_cube && image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) {
+ info.tex_state_type = PVR_TEXTURE_STATE_STORAGE;
+ result = pvr_pack_tex_state(device,
+ &info,
+ iview->texture_state[info.tex_state_type]);
+ if (result != VK_SUCCESS)
+ goto err_vk_image_view_destroy;
+ }
+
+ /* Attachment state is created as if the mipmaps are not supported, so the
+ * baselevel is set to zero and num_mip_levels is set to 1. Which gives an
+ * impression that this is the only level in the image. This also requires
+ * that width, height and depth be adjusted as well. Given iview->vk.extent
+ * is already adjusted for base mip map level we use it here.
+ */
+ /* TODO: Investigate and document the reason for above approach. */
+ info.extent = iview->vk.extent;
+
+ info.mip_levels = 1;
+ info.mipmaps_present = false;
+ info.stride = u_minify(image->physical_extent.width, info.base_level) - 1;
+ info.base_level = 0;
+ info.tex_state_type = PVR_TEXTURE_STATE_ATTACHMENT;
+
+ result = pvr_pack_tex_state(device,
+ &info,
+ iview->texture_state[info.tex_state_type]);
+ if (result != VK_SUCCESS)
+ goto err_vk_image_view_destroy;
+
+ *pView = pvr_image_view_to_handle(iview);
+
+ return VK_SUCCESS;
+
+err_vk_image_view_destroy:
+ vk_image_view_destroy(&device->vk, pAllocator, &iview->vk);
+
+ return result;
+}
+
+void pvr_DestroyImageView(VkDevice _device,
+ VkImageView _iview,
+ const VkAllocationCallbacks *pAllocator)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_image_view, iview, _iview);
+
+ if (!iview)
+ return;
+
+ vk_image_view_destroy(&device->vk, pAllocator, &iview->vk);
+}
+
+VkResult pvr_CreateBufferView(VkDevice _device,
+ const VkBufferViewCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkBufferView *pView)
+{
+ assert(!"Unimplemented");
+ return VK_SUCCESS;
+}
+
+void pvr_DestroyBufferView(VkDevice _device,
+ VkBufferView bufferView,
+ const VkAllocationCallbacks *pAllocator)
+{
+ assert(!"Unimplemented");
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_device_info.h"
+#include "pvr_job_common.h"
+#include "pvr_private.h"
+#include "util/macros.h"
+#include "util/u_math.h"
+#include "vk_alloc.h"
+#include "vk_format.h"
+#include "vk_object.h"
+
+/* clang-format off */
+static enum PVRX(PBESTATE_SWIZ)
+pvr_get_pbe_hw_swizzle(VkComponentSwizzle comp, enum pipe_swizzle swz)
+/* clang-format on */
+{
+ switch (swz) {
+ case PIPE_SWIZZLE_0:
+ return ROGUE_PBESTATE_SWIZ_ZERO;
+ case PIPE_SWIZZLE_1:
+ return ROGUE_PBESTATE_SWIZ_ONE;
+ case PIPE_SWIZZLE_X:
+ return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN0;
+ case PIPE_SWIZZLE_Y:
+ return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN1;
+ case PIPE_SWIZZLE_Z:
+ return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN2;
+ case PIPE_SWIZZLE_W:
+ return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN3;
+ case PIPE_SWIZZLE_NONE:
+ if (comp == VK_COMPONENT_SWIZZLE_A)
+ return ROGUE_PBESTATE_SWIZ_ONE;
+ else
+ return ROGUE_PBESTATE_SWIZ_ZERO;
+ default:
+ unreachable("Unknown enum pipe_swizzle");
+ };
+}
+
+void pvr_pbe_get_src_format_and_gamma(VkFormat vk_format,
+ enum pvr_pbe_gamma default_gamma,
+ bool with_packed_usc_channel,
+ uint32_t *const src_format_out,
+ enum pvr_pbe_gamma *const gamma_out)
+{
+ uint32_t chan_0_width = vk_format_get_channel_width(vk_format, 0);
+
+ *gamma_out = default_gamma;
+
+ if (vk_format_has_32bit_component(vk_format) ||
+ vk_format_is_pure_integer(vk_format)) {
+ *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
+ } else if (vk_format_is_float(vk_format)) {
+ *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
+ } else if (vk_format_is_srgb(vk_format)) {
+ *gamma_out = PVR_PBE_GAMMA_ENABLED;
+
+ /* F16 source for gamma'd formats. */
+ *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
+ } else if (vk_format_has_depth(vk_format) &&
+ vk_format_get_component_size_in_bits(vk_format,
+ UTIL_FORMAT_COLORSPACE_ZS,
+ 0) > 16) {
+ *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
+ } else if (vk_format_has_stencil(vk_format) &&
+ vk_format_get_component_size_in_bits(vk_format,
+ UTIL_FORMAT_COLORSPACE_ZS,
+ 1) > 0) {
+ *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
+ } else if (chan_0_width > 16) {
+ *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
+ } else if (chan_0_width > 8) {
+ *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
+ } else if (!with_packed_usc_channel) {
+ *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
+ } else {
+ *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
+ }
+}
+
+static void pvr_pbe_get_src_pos(struct pvr_device *device,
+ enum pvr_pbe_source_start_pos source_start,
+ uint32_t *const src_pos_out,
+ bool *const src_pos_offset_128_out)
+{
+ *src_pos_offset_128_out = false;
+
+ switch (source_start) {
+ case PVR_PBE_STARTPOS_BIT32:
+ *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT32);
+ break;
+
+ case PVR_PBE_STARTPOS_BIT64:
+ *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT64);
+ break;
+
+ case PVR_PBE_STARTPOS_BIT96:
+ *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT96);
+ break;
+
+ case PVR_PBE_STARTPOS_BIT0:
+ default:
+ if (PVR_HAS_FEATURE(&device->pdevice->dev_info, eight_output_registers)) {
+ switch (source_start) {
+ case PVR_PBE_STARTPOS_BIT128:
+ *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0);
+ *src_pos_offset_128_out = true;
+ break;
+
+ case PVR_PBE_STARTPOS_BIT160:
+ *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT32);
+ *src_pos_offset_128_out = true;
+ break;
+
+ case PVR_PBE_STARTPOS_BIT192:
+ *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT64);
+ *src_pos_offset_128_out = true;
+ break;
+
+ case PVR_PBE_STARTPOS_BIT224:
+ *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT96);
+ *src_pos_offset_128_out = true;
+ break;
+
+ default:
+ *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0);
+ break;
+ }
+ } else {
+ *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0);
+ }
+ break;
+ }
+}
+
+void pvr_pbe_pack_state(
+ struct pvr_device *device,
+ const struct pvr_pbe_surf_params *surface_params,
+ const struct pvr_pbe_render_params *render_params,
+ uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
+ uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS])
+{
+ /* This function needs updating if the value of
+ * ROGUE_NUM_PBESTATE_STATE_WORDS changes, so check that it's the expected
+ * value.
+ */
+ STATIC_ASSERT(ROGUE_NUM_PBESTATE_STATE_WORDS == 2);
+
+ /* This function needs updating if the value of ROGUE_NUM_PBESTATE_REG_WORDS
+ * changes, so check that it's the expected value.
+ */
+ STATIC_ASSERT(ROGUE_NUM_PBESTATE_REG_WORDS == 3);
+
+ pbe_reg_words[2] = 0;
+
+ if (surface_params->z_only_render) {
+ pbe_cs_words[0] = 0;
+
+ pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) {
+ state.emptytile = true;
+ }
+
+ pbe_reg_words[0] = 0;
+ pbe_reg_words[1] = 0;
+
+ return;
+ }
+
+ pvr_csb_pack (&pbe_cs_words[0], PBESTATE_STATE_WORD0, state) {
+ state.address_low = surface_params->addr;
+ }
+
+ pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) {
+ state.address_high = surface_params->addr;
+
+ state.source_format = surface_params->source_format;
+
+ pvr_pbe_get_src_pos(device,
+ render_params->source_start,
+ &state.source_pos,
+ &state.source_pos_offset_128);
+
+ /* MRT index (Use 0 for a single render target)/ */
+ state.mrt_index = render_params->mrt_index;
+
+ /* Normalization flag based on output format. */
+ state.norm = surface_params->is_normalized;
+
+ state.packmode = surface_params->pbe_packmode;
+ }
+
+ pvr_csb_pack (&pbe_reg_words[0], PBESTATE_REG_WORD0, reg) {
+ reg.tilerelative = true;
+
+ switch (surface_params->mem_layout) {
+ case PVR_MEMLAYOUT_TWIDDLED:
+ reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_2D);
+ break;
+
+ case PVR_MEMLAYOUT_3DTWIDDLED:
+ reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_3D);
+ break;
+
+ case PVR_MEMLAYOUT_LINEAR:
+ default:
+ reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_LINEAR);
+ break;
+ }
+
+ /* FIXME: Remove rotation and y_flip hardcoding if needed. */
+ reg.rotation = PVRX(PBESTATE_ROTATION_TYPE_0_DEG);
+ reg.y_flip = false;
+
+ /* Note: Due to gamma being overridden above, anything other than
+ * ENABLED/NONE is ignored.
+ */
+ if (surface_params->gamma == PVR_PBE_GAMMA_ENABLED) {
+ reg.gamma = true;
+
+ if (surface_params->nr_components == 2)
+ reg.twocomp_gamma =
+ PVRX(PBESTATE_TWOCOMP_GAMMA_GAMMA_BOTH_CHANNELS);
+ }
+
+ reg.linestride = (surface_params->stride - 1) /
+ PVRX(PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE);
+ reg.minclip_x = render_params->min_x_clip;
+
+ reg.swiz_chan0 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_R,
+ surface_params->swizzle[0]);
+ reg.swiz_chan1 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_G,
+ surface_params->swizzle[1]);
+ reg.swiz_chan2 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_B,
+ surface_params->swizzle[2]);
+ reg.swiz_chan3 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_A,
+ surface_params->swizzle[3]);
+
+ if (surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
+ reg.size_z = util_logbase2_ceil(surface_params->depth);
+
+ reg.downscale = surface_params->down_scale;
+ }
+
+ pvr_csb_pack (&pbe_reg_words[1], PBESTATE_REG_WORD1, reg) {
+ if (surface_params->mem_layout == PVR_MEMLAYOUT_TWIDDLED ||
+ surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
+ reg.size_x = util_logbase2_ceil(surface_params->width);
+ reg.size_y = util_logbase2_ceil(surface_params->height);
+ }
+
+ reg.minclip_y = render_params->min_y_clip;
+ reg.maxclip_x = render_params->max_x_clip;
+ reg.zslice = render_params->slice;
+ reg.maxclip_y = render_params->max_y_clip;
+ }
+}
+
+/* TODO: Split this into smaller functions to make it easier to follow. When
+ * doing this, it would be nice to have a function that returns
+ * total_tiles_in_flight so that CR_ISP_CTL can be fully packed in
+ * pvr_render_job_ws_fragment_state_init().
+ */
+void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info,
+ uint32_t msaa_mode,
+ uint32_t pixel_width,
+ bool paired_tiles,
+ uint32_t max_tiles_in_flight,
+ uint32_t *const isp_ctl_out,
+ uint32_t *const pixel_ctl_out)
+{
+ uint32_t total_tiles_in_flight = 0;
+ uint32_t usable_partition_size;
+ uint32_t partitions_available;
+ uint32_t usc_min_output_regs;
+ uint32_t max_partitions;
+ uint32_t partition_size;
+ uint32_t max_phantoms;
+ uint32_t tile_size_x;
+ uint32_t tile_size_y;
+ uint32_t isp_samples;
+
+ /* Round up the pixel width to the next allocation granularity. */
+ usc_min_output_regs =
+ PVR_GET_FEATURE_VALUE(dev_info, usc_min_output_registers_per_pix, 0);
+ pixel_width = MAX2(pixel_width, usc_min_output_regs);
+ pixel_width = util_next_power_of_two(pixel_width);
+
+ assert(pixel_width <= rogue_get_max_output_regs_per_pixel(dev_info));
+
+ partition_size = pixel_width;
+
+ isp_samples = PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 1);
+ if (isp_samples == 2) {
+ if (msaa_mode != PVRX(CR_ISP_AA_MODE_TYPE_AA_NONE))
+ partition_size *= 2U;
+ } else if (isp_samples == 4) {
+ if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_4X) ||
+ msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_8X))
+ partition_size *= 4U;
+ else if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_2X))
+ partition_size *= 2U;
+ }
+
+ /* Cores with a tile size of 16x16 don't have quadrant affinity. Hence the
+ * partition size is the same as for a 32x32 tile quadrant (with no MSAA).
+ * When MSAA is enabled, the USC has to process half the tile (16x8 pixels).
+ */
+ tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0);
+ tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0);
+
+ /* We only support square tiles. */
+ assert(tile_size_x == tile_size_y);
+
+ if (tile_size_x == 16U) {
+ /* Cores with 16x16 tiles does not use tile quadrants. */
+ partition_size *= tile_size_x * tile_size_y;
+ } else {
+ /* Size of a tile quadrant (in dwords). */
+ partition_size *= (tile_size_x * tile_size_y / 4U);
+ }
+
+ /* Maximum available partition space for partitions of this size. */
+ max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0);
+ usable_partition_size =
+ MIN2(rogue_get_total_reserved_partition_size(dev_info),
+ partition_size * max_partitions);
+
+ if (PVR_GET_FEATURE_VALUE(dev_info, common_store_size_in_dwords, 0) <
+ (1024 * 4 * 4)) {
+ /* Do not apply the limit for cores with 16x16 tile size (no quadrant
+ * affinity). */
+ if (tile_size_x != 16) {
+ /* This is to counter the extremely limited CS size on some cores.
+ */
+ /* Available partition space is limited to 8 tile quadrants. */
+ usable_partition_size =
+ MIN2((tile_size_x * tile_size_y / 4U) * 8U, usable_partition_size);
+ }
+ }
+
+ /* Ensure that maximum number of partitions in use is not greater
+ * than the total number of partitions available.
+ */
+ partitions_available =
+ MIN2(max_partitions, usable_partition_size / partition_size);
+
+ if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure))
+ max_phantoms = rogue_get_num_phantoms(dev_info);
+ else if (PVR_HAS_FEATURE(dev_info, roguexe))
+ max_phantoms = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0);
+ else
+ max_phantoms = 1;
+
+ for (uint32_t i = 0; i < max_phantoms; i++) {
+ uint32_t usc_tiles_in_flight = partitions_available;
+ uint32_t isp_tiles_in_flight;
+
+ /* Cores with tiles size other than 16x16 use tile quadrants. */
+ if (tile_size_x != 16) {
+ uint32_t num_clusters =
+ PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0U);
+ usc_tiles_in_flight =
+ (usc_tiles_in_flight * MIN2(4U, num_clusters - (4U * i))) / 4U;
+ }
+
+ assert(usc_tiles_in_flight > 0);
+
+ isp_tiles_in_flight =
+ PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0);
+ /* Ensure that maximum number of ISP tiles in flight is not greater
+ * than the maximum number of USC tiles in flight.
+ */
+ if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ||
+ PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) !=
+ 2) {
+ isp_tiles_in_flight /= rogue_get_num_phantoms(dev_info);
+ }
+
+ isp_tiles_in_flight = MIN2(usc_tiles_in_flight, isp_tiles_in_flight);
+
+ /* Limit the number of tiles in flight if the shaders have
+ * requested a large allocation of local memory.
+ */
+ if (max_tiles_in_flight > 0U) {
+ isp_tiles_in_flight = MIN2(usc_tiles_in_flight, max_tiles_in_flight);
+
+ if (PVR_HAS_FEATURE(dev_info, roguexe)) {
+ if (tile_size_x == 16) {
+ /* The FW infers the tiles in flight value from the
+ * partitions setting.
+ */
+ /* Partitions per tile. */
+ partitions_available = isp_tiles_in_flight;
+ } else {
+ /* Partitions per tile quadrant. */
+ partitions_available = isp_tiles_in_flight * 4U;
+ }
+ }
+ }
+
+ /* Due to limitations of ISP_CTL_PIPE there can only be a difference of
+ * 1 between Phantoms.
+ */
+ if (total_tiles_in_flight > (isp_tiles_in_flight + 1U))
+ total_tiles_in_flight = isp_tiles_in_flight + 1U;
+
+ total_tiles_in_flight += isp_tiles_in_flight;
+ }
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
+ PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) ==
+ 2) {
+ /* Limit the ISP tiles in flight to fit into the available USC partition
+ * store.
+ */
+ total_tiles_in_flight = MIN2(total_tiles_in_flight, partitions_available);
+ }
+
+ if (PVR_HAS_FEATURE(dev_info, paired_tiles) && paired_tiles) {
+ total_tiles_in_flight =
+ MIN2(total_tiles_in_flight, partitions_available / 2);
+ }
+
+ pvr_csb_pack (pixel_ctl_out, CR_USC_PIXEL_OUTPUT_CTRL, reg) {
+ if (pixel_width == 1 && usc_min_output_regs == 1) {
+ reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER);
+ } else if (pixel_width == 2) {
+ reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS);
+ } else if (pixel_width == 4) {
+ reg.width = PVRX(CR_PIXEL_WIDTH_4REGISTERS);
+ } else if (pixel_width == 8 &&
+ PVR_HAS_FEATURE(dev_info, eight_output_registers)) {
+ reg.width = PVRX(CR_PIXEL_WIDTH_8REGISTERS);
+ } else if (usc_min_output_regs == 1) {
+ reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER);
+ } else {
+ reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS);
+ }
+
+ if (PVR_HAS_FEATURE(dev_info, usc_pixel_partition_mask)) {
+ /* Setup the partition mask based on the maximum number of
+ * partitions available.
+ */
+ reg.partition_mask = (1 << max_partitions) - 1;
+ } else {
+ reg.enable_4th_partition = true;
+
+ /* Setup the partition mask based on the number of partitions
+ * available.
+ */
+ reg.partition_mask = (1U << partitions_available) - 1U;
+ }
+ }
+
+ pvr_csb_pack (isp_ctl_out, CR_ISP_CTL, reg) {
+ if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure))
+ reg.pipe_enable = (2 * total_tiles_in_flight) - 1;
+ else
+ reg.pipe_enable = total_tiles_in_flight - 1;
+ }
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_JOB_COMMON_H
+#define PVR_JOB_COMMON_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+
+enum pvr_pbe_gamma {
+ PVR_PBE_GAMMA_NONE,
+ /* For two-channel pack formats. */
+ PVR_PBE_GAMMA_RED,
+ PVR_PBE_GAMMA_REDGREEN,
+ /* For all other pack formats. */
+ PVR_PBE_GAMMA_ENABLED,
+};
+
+enum pvr_pbe_source_start_pos {
+ PVR_PBE_STARTPOS_BIT0,
+ PVR_PBE_STARTPOS_BIT32,
+ PVR_PBE_STARTPOS_BIT64,
+ PVR_PBE_STARTPOS_BIT96,
+ /* The below ones are available if has_eight_output_registers feature is
+ * enabled.
+ */
+ PVR_PBE_STARTPOS_BIT128,
+ PVR_PBE_STARTPOS_BIT160,
+ PVR_PBE_STARTPOS_BIT192,
+ PVR_PBE_STARTPOS_BIT224,
+};
+
+/**
+ * These are parameters specific to the surface being set up and hence can be
+ * typically set up at surface creation time.
+ */
+struct pvr_pbe_surf_params {
+ /* Swizzle for a format can be retrieved using pvr_get_format_swizzle(). */
+ uint8_t swizzle[4];
+ /* is_normalized can be retrieved using vk_format_is_normalized(). */
+ bool is_normalized;
+ /* pbe_packmode can be retrieved using pvr_get_pbe_packmode(). */
+ uint32_t pbe_packmode;
+ /* source_format and gamma can be retrieved using
+ * pvr_pbe_get_src_format_and_gamma().
+ */
+ uint32_t source_format;
+ enum pvr_pbe_gamma gamma;
+ /* nr_components can be retrieved using vk_format_get_nr_components(). */
+ uint32_t nr_components;
+
+ /* When an RT of MRT is packed using less USC outputs, this flag needs to be
+ * setup to true.
+ *
+ * Currently, this flag is only considered when has_usc_f16_sop is enabled.
+ * And it needs to be true when a render target by default should use F16
+ * USC channel but uses U8 channel instead for squeezing into on-chip MRT.
+ *
+ * It is better to make this member with FF_ACCUMFORMAT type or, at least,
+ * describe USC channel size. But for now, only use this flag for
+ * simplicity.
+ */
+
+ pvr_dev_addr_t addr;
+ enum pvr_memlayout mem_layout;
+ uint32_t stride;
+
+ /* Depth size for renders */
+ uint32_t depth;
+
+ /* Pre-rotation dimensions of surface */
+ uint32_t width;
+ uint32_t height;
+
+ bool z_only_render;
+ bool down_scale;
+ uint32_t msaa_mode;
+};
+
+/**
+ * These parameters are generally render-specific and need to be set up at the
+ * time #pvr_pbe_pack_state() is called.
+ */
+struct pvr_pbe_render_params {
+ /* Clipping params are in terms of pixels and are inclusive. */
+ uint32_t min_x_clip;
+ uint32_t max_x_clip;
+
+ uint32_t min_y_clip;
+ uint32_t max_y_clip;
+
+ /* Start position of pixels to be read within 128bit USC output buffer. */
+ enum pvr_pbe_source_start_pos source_start;
+
+ /* 9-bit slice number to be used when memlayout is 3D twiddle. */
+ uint32_t slice;
+
+ /* Index */
+ uint32_t mrt_index;
+};
+
+void pvr_pbe_pack_state(
+ struct pvr_device *device,
+ const struct pvr_pbe_surf_params *surface_params,
+ const struct pvr_pbe_render_params *render_params,
+ uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
+ uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS]);
+
+/* Helper to calculate pvr_pbe_surf_params::gamma and
+ * pvr_pbe_surf_params::source_format.
+ */
+void pvr_pbe_get_src_format_and_gamma(VkFormat vk_format,
+ enum pvr_pbe_gamma default_gamma,
+ bool with_packed_usc_channel,
+ uint32_t *const src_format_out,
+ enum pvr_pbe_gamma *const gamma_out);
+
+void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info,
+ uint32_t msaa_mode,
+ uint32_t pixel_width,
+ bool paired_tiles,
+ uint32_t max_tiles_in_flight,
+ uint32_t *const isp_ctl_out,
+ uint32_t *const pixel_ctl_out);
+
+#endif /* PVR_JOB_COMMON_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_csb.h"
+#include "pvr_job_common.h"
+#include "pvr_job_context.h"
+#include "pvr_job_compute.h"
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+#include "util/macros.h"
+
+static void pvr_compute_job_ws_submit_info_init(
+ struct pvr_compute_ctx *ctx,
+ struct pvr_sub_cmd *sub_cmd,
+ const VkSemaphore *semaphores,
+ uint32_t semaphore_count,
+ uint32_t *stage_flags,
+ struct pvr_winsys_compute_submit_info *submit_info)
+{
+ const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch;
+ uint32_t shared_regs = sub_cmd->compute.num_shared_regs;
+
+ submit_info->frame_num = ctx->device->global_queue_present_count;
+ submit_info->job_num = ctx->device->global_queue_job_count;
+
+ submit_info->semaphores = semaphores;
+ submit_info->semaphore_count = semaphore_count;
+ submit_info->stage_flags = stage_flags;
+
+ /* Other registers are initialized in pvr_sub_cmd_compute_job_init(). */
+ pvr_csb_pack (&submit_info->regs.cdm_resume_pds1,
+ CR_CDM_CONTEXT_PDS1,
+ state) {
+ /* Convert the data size from dwords to bytes. */
+ const uint32_t load_program_data_size =
+ ctx_switch->sr[0].pds.load_program.data_size * 4U;
+
+ state.pds_seq_dep = false;
+ state.usc_seq_dep = false;
+ state.target = false;
+ state.unified_size = ctx_switch->sr[0].usc.unified_size;
+ state.common_shared = true;
+ state.common_size =
+ DIV_ROUND_UP(shared_regs << 2,
+ PVRX(CR_CDM_CONTEXT_PDS1_COMMON_SIZE_UNIT_SIZE));
+ state.temp_size = 0;
+
+ assert(load_program_data_size %
+ PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE) ==
+ 0);
+ state.data_size =
+ load_program_data_size / PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE);
+ state.fence = false;
+ }
+}
+
+VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx,
+ struct pvr_sub_cmd *sub_cmd,
+ const VkSemaphore *semaphores,
+ uint32_t semaphore_count,
+ uint32_t *stage_flags,
+ struct pvr_winsys_syncobj **const syncobj_out)
+{
+ struct pvr_device *device = ctx->device;
+
+ pvr_compute_job_ws_submit_info_init(ctx,
+ sub_cmd,
+ semaphores,
+ semaphore_count,
+ stage_flags,
+ &sub_cmd->compute.submit_info);
+
+ return device->ws->ops->compute_submit(ctx->ws_ctx,
+ &sub_cmd->compute.submit_info,
+ syncobj_out);
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_JOB_COMPUTE_H
+#define PVR_JOB_COMPUTE_H
+
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+struct pvr_compute_ctx;
+struct pvr_sub_cmd;
+struct pvr_winsys_syncobj;
+
+VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx,
+ struct pvr_sub_cmd *sub_cmd,
+ const VkSemaphore *semaphores,
+ uint32_t semaphore_count,
+ uint32_t *stage_flags,
+ struct pvr_winsys_syncobj **const syncobj_out);
+
+#endif /* PVR_JOB_COMPUTE_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_bo.h"
+#include "pvr_csb.h"
+#include "pvr_job_context.h"
+#include "pvr_pds.h"
+#include "pvr_private.h"
+#include "pvr_cdm_load_sr.h"
+#include "pvr_vdm_load_sr.h"
+#include "pvr_vdm_store_sr.h"
+#include "pvr_winsys.h"
+#include "util/macros.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+
+/* TODO: Is there some way to ensure the Vulkan driver doesn't exceed this
+ * value when constructing the control stream?
+ */
+/* The VDM callstack is used by the hardware to implement control stream links
+ * with a return, i.e. sub-control streams/subroutines. This value specifies the
+ * maximum callstack depth.
+ */
+#define PVR_VDM_CALLSTACK_MAX_DEPTH 1U
+
+#define ROGUE_PDS_TASK_PROGRAM_SIZE 256U
+
+static VkResult pvr_ctx_reset_cmd_init(struct pvr_device *device,
+ struct pvr_reset_cmd *const reset_cmd)
+{
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+
+ /* The reset framework depends on compute support in the hw. */
+ assert(PVR_HAS_FEATURE(dev_info, compute));
+
+ if (PVR_HAS_QUIRK(dev_info, 51764))
+ pvr_finishme("Missing reset support for brn51764");
+
+ if (PVR_HAS_QUIRK(dev_info, 58839))
+ pvr_finishme("Missing reset support for brn58839");
+
+ return VK_SUCCESS;
+}
+
+static void pvr_ctx_reset_cmd_fini(struct pvr_device *device,
+ struct pvr_reset_cmd *reset_cmd)
+
+{
+ /* TODO: reset command cleanup. */
+}
+
+static VkResult pvr_pds_pt_store_program_create_and_upload(
+ struct pvr_device *device,
+ struct pvr_bo *pt_bo,
+ uint32_t pt_bo_size,
+ struct pvr_pds_upload *const pds_upload_out)
+{
+ struct pvr_pds_stream_out_terminate_program program = { 0 };
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+ size_t staging_buffer_size;
+ uint32_t *staging_buffer;
+ uint32_t *data_buffer;
+ uint32_t *code_buffer;
+ VkResult result;
+
+ /* Check the bo size can be converted to dwords without any rounding. */
+ assert(pt_bo_size % 4 == 0);
+
+ program.pds_persistent_temp_size_to_store = pt_bo_size / 4;
+ program.dev_address_for_storing_persistent_temp = pt_bo->vma->dev_addr.addr;
+
+ pvr_pds_generate_stream_out_terminate_program(&program,
+ NULL,
+ PDS_GENERATE_SIZES,
+ dev_info);
+
+ staging_buffer_size = (program.stream_out_terminate_pds_data_size +
+ program.stream_out_terminate_pds_code_size) *
+ sizeof(*staging_buffer);
+
+ staging_buffer = vk_zalloc(&device->vk.alloc,
+ staging_buffer_size,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!staging_buffer)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ data_buffer = staging_buffer;
+ code_buffer =
+ pvr_pds_generate_stream_out_terminate_program(&program,
+ data_buffer,
+ PDS_GENERATE_DATA_SEGMENT,
+ dev_info);
+ pvr_pds_generate_stream_out_terminate_program(&program,
+ code_buffer,
+ PDS_GENERATE_CODE_SEGMENT,
+ dev_info);
+
+ /* This PDS program is passed to the HW via the PPP state words. These only
+ * allow the data segment address to be specified and expect the code
+ * segment to immediately follow. Assume the code alignment is the same as
+ * the data.
+ */
+ result =
+ pvr_gpu_upload_pds(device,
+ data_buffer,
+ program.stream_out_terminate_pds_data_size,
+ PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
+ code_buffer,
+ program.stream_out_terminate_pds_code_size,
+ PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
+ cache_line_size,
+ pds_upload_out);
+
+ vk_free(&device->vk.alloc, staging_buffer);
+
+ return result;
+}
+
+static VkResult pvr_pds_pt_resume_program_create_and_upload(
+ struct pvr_device *device,
+ struct pvr_bo *pt_bo,
+ uint32_t pt_bo_size,
+ struct pvr_pds_upload *const pds_upload_out)
+{
+ struct pvr_pds_stream_out_init_program program = { 0 };
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+ size_t staging_buffer_size;
+ uint32_t *staging_buffer;
+ uint32_t *data_buffer;
+ uint32_t *code_buffer;
+ VkResult result;
+
+ /* Check the bo size can be converted to dwords without any rounding. */
+ assert(pt_bo_size % 4 == 0);
+
+ program.num_buffers = 1;
+ program.pds_buffer_data_size[0] = pt_bo_size / 4;
+ program.dev_address_for_buffer_data[0] = pt_bo->vma->dev_addr.addr;
+
+ pvr_pds_generate_stream_out_init_program(&program,
+ NULL,
+ false,
+ PDS_GENERATE_SIZES,
+ dev_info);
+
+ staging_buffer_size = (program.stream_out_init_pds_data_size +
+ program.stream_out_init_pds_code_size) *
+ sizeof(*staging_buffer);
+
+ staging_buffer = vk_zalloc(&device->vk.alloc,
+ staging_buffer_size,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!staging_buffer)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ data_buffer = staging_buffer;
+ code_buffer =
+ pvr_pds_generate_stream_out_init_program(&program,
+ data_buffer,
+ false,
+ PDS_GENERATE_DATA_SEGMENT,
+ dev_info);
+ pvr_pds_generate_stream_out_init_program(&program,
+ code_buffer,
+ false,
+ PDS_GENERATE_CODE_SEGMENT,
+ dev_info);
+
+ /* This PDS program is passed to the HW via the PPP state words. These only
+ * allow the data segment address to be specified and expect the code
+ * segment to immediately follow. Assume the code alignment is the same as
+ * the data.
+ */
+ result =
+ pvr_gpu_upload_pds(device,
+ data_buffer,
+ program.stream_out_init_pds_data_size,
+ PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
+ code_buffer,
+ program.stream_out_init_pds_code_size,
+ PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
+ cache_line_size,
+ pds_upload_out);
+
+ vk_free(&device->vk.alloc, staging_buffer);
+
+ return result;
+}
+
+static VkResult
+pvr_render_job_pt_programs_setup(struct pvr_device *device,
+ struct rogue_pt_programs *pt_programs)
+{
+ VkResult result;
+
+ result = pvr_bo_alloc(device,
+ device->heaps.pds_heap,
+ ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
+ ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_ALIGNMENT,
+ PVR_BO_ALLOC_FLAG_CPU_ACCESS,
+ &pt_programs->store_resume_state_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pvr_pds_pt_store_program_create_and_upload(
+ device,
+ pt_programs->store_resume_state_bo,
+ ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
+ &pt_programs->pds_store_program);
+ if (result != VK_SUCCESS)
+ goto err_free_store_resume_state_bo;
+
+ result = pvr_pds_pt_resume_program_create_and_upload(
+ device,
+ pt_programs->store_resume_state_bo,
+ ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
+ &pt_programs->pds_resume_program);
+ if (result != VK_SUCCESS)
+ goto err_free_pds_store_program;
+
+ return VK_SUCCESS;
+
+err_free_pds_store_program:
+ pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo);
+
+err_free_store_resume_state_bo:
+ pvr_bo_free(device, pt_programs->store_resume_state_bo);
+
+ return result;
+}
+
+static void
+pvr_render_job_pt_programs_cleanup(struct pvr_device *device,
+ struct rogue_pt_programs *pt_programs)
+{
+ pvr_bo_free(device, pt_programs->pds_resume_program.pvr_bo);
+ pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo);
+ pvr_bo_free(device, pt_programs->store_resume_state_bo);
+}
+
+static void pvr_pds_ctx_sr_program_setup(
+ bool cc_enable,
+ uint64_t usc_program_upload_offset,
+ uint8_t usc_temps,
+ pvr_dev_addr_t sr_addr,
+ struct pvr_pds_shared_storing_program *const program_out)
+{
+ /* The PDS task is the same for stores and loads. */
+ *program_out = (struct pvr_pds_shared_storing_program){
+ .cc_enable = cc_enable,
+ .doutw_control = {
+ .dest_store = PDS_UNIFIED_STORE,
+ .num_const64 = 2,
+ .doutw_data = {
+ [0] = sr_addr.addr,
+ [1] = sr_addr.addr + ROGUE_LLS_SHARED_REGS_RESERVE_SIZE,
+ },
+ .last_instruction = false,
+ },
+ };
+
+ pvr_pds_setup_doutu(&program_out->usc_task.usc_task_control,
+ usc_program_upload_offset,
+ usc_temps,
+ PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
+ false);
+}
+
+/* Note: pvr_pds_compute_ctx_sr_program_create_and_upload() is very similar to
+ * this. If there is a problem here it's likely that the same problem exists
+ * there so don't forget to update the compute function.
+ */
+static VkResult pvr_pds_render_ctx_sr_program_create_and_upload(
+ struct pvr_device *device,
+ uint64_t usc_program_upload_offset,
+ uint8_t usc_temps,
+ pvr_dev_addr_t sr_addr,
+ struct pvr_pds_upload *const pds_upload_out)
+{
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+ const uint32_t pds_data_alignment =
+ PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
+
+ /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data
+ * and code size when using the PDS_GENERATE_SIZES mode.
+ */
+ STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0);
+ uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 };
+ struct pvr_pds_shared_storing_program program;
+ ASSERTED uint32_t *buffer_end;
+ uint32_t code_offset;
+
+ pvr_pds_ctx_sr_program_setup(false,
+ usc_program_upload_offset,
+ usc_temps,
+ sr_addr,
+ &program);
+
+ pvr_pds_generate_shared_storing_program(&program,
+ &staging_buffer[0],
+ PDS_GENERATE_DATA_SEGMENT,
+ dev_info);
+
+ code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
+
+ buffer_end =
+ pvr_pds_generate_shared_storing_program(&program,
+ &staging_buffer[code_offset],
+ PDS_GENERATE_CODE_SEGMENT,
+ dev_info);
+
+ assert((uint32_t)(buffer_end - staging_buffer) * 4 <
+ ROGUE_PDS_TASK_PROGRAM_SIZE);
+
+ return pvr_gpu_upload_pds(device,
+ &staging_buffer[0],
+ program.data_size,
+ PVRX(VDMCTRL_PDS_STATE1_PDS_DATA_ADDR_ALIGNMENT),
+ &staging_buffer[code_offset],
+ program.code_size,
+ PVRX(VDMCTRL_PDS_STATE2_PDS_CODE_ADDR_ALIGNMENT),
+ cache_line_size,
+ pds_upload_out);
+}
+
+/* Note: pvr_pds_render_ctx_sr_program_create_and_upload() is very similar to
+ * this. If there is a problem here it's likely that the same problem exists
+ * there so don't forget to update the render_ctx function.
+ */
+static VkResult pvr_pds_compute_ctx_sr_program_create_and_upload(
+ struct pvr_device *device,
+ bool is_loading_program,
+ uint64_t usc_program_upload_offset,
+ uint8_t usc_temps,
+ pvr_dev_addr_t sr_addr,
+ struct pvr_pds_upload *const pds_upload_out)
+{
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+ const uint32_t pds_data_alignment =
+ PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
+
+ /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data
+ * and code size when using the PDS_GENERATE_SIZES mode.
+ */
+ STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0);
+ uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 };
+ struct pvr_pds_shared_storing_program program;
+ uint32_t *buffer_ptr;
+ uint32_t code_offset;
+
+ pvr_pds_ctx_sr_program_setup(PVR_HAS_ERN(dev_info, 35421),
+ usc_program_upload_offset,
+ usc_temps,
+ sr_addr,
+ &program);
+
+ if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
+ pvr_pds_generate_compute_shared_loading_program(&program,
+ &staging_buffer[0],
+ PDS_GENERATE_DATA_SEGMENT,
+ dev_info);
+ } else {
+ pvr_pds_generate_shared_storing_program(&program,
+ &staging_buffer[0],
+ PDS_GENERATE_DATA_SEGMENT,
+ dev_info);
+ }
+
+ code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
+
+ buffer_ptr =
+ pvr_pds_generate_compute_barrier_conditional(&staging_buffer[code_offset],
+ PDS_GENERATE_CODE_SEGMENT);
+
+ if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
+ buffer_ptr = pvr_pds_generate_compute_shared_loading_program(
+ &program,
+ buffer_ptr,
+ PDS_GENERATE_CODE_SEGMENT,
+ dev_info);
+ } else {
+ buffer_ptr =
+ pvr_pds_generate_shared_storing_program(&program,
+ buffer_ptr,
+ PDS_GENERATE_CODE_SEGMENT,
+ dev_info);
+ }
+
+ assert((uint32_t)(buffer_ptr - staging_buffer) * 4 <
+ ROGUE_PDS_TASK_PROGRAM_SIZE);
+
+ STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT) ==
+ PVRX(CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_ALIGNMENT));
+
+ STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT) ==
+ PVRX(CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_ALIGNMENT));
+
+ return pvr_gpu_upload_pds(
+ device,
+ &staging_buffer[0],
+ program.data_size,
+ PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT),
+ &staging_buffer[code_offset],
+ (uint32_t)(buffer_ptr - &staging_buffer[code_offset]),
+ PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT),
+ cache_line_size,
+ pds_upload_out);
+}
+
+enum pvr_ctx_sr_program_target {
+ PVR_CTX_SR_RENDER_TARGET,
+ PVR_CTX_SR_COMPUTE_TARGET,
+};
+
+static VkResult pvr_ctx_sr_programs_setup(struct pvr_device *device,
+ enum pvr_ctx_sr_program_target target,
+ struct rogue_sr_programs *sr_programs)
+{
+ const uint64_t store_load_state_bo_size =
+ PVRX(LLS_USC_SHARED_REGS_BUFFER_SIZE) +
+ ROGUE_LLS_SHARED_REGS_RESERVE_SIZE;
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+ uint64_t usc_store_program_upload_offset;
+ uint64_t usc_load_program_upload_offset;
+ const uint8_t *usc_load_sr_code;
+ uint32_t usc_load_sr_code_size;
+ VkResult result;
+
+ /* Note that this is being used for both compute and render ctx. There is no
+ * compute equivalent define for the VDMCTRL unit size.
+ */
+ /* 4 blocks (16 dwords / 64 bytes) in USC to prevent fragmentation. */
+ sr_programs->usc.unified_size =
+ DIV_ROUND_UP(64, PVRX(VDMCTRL_PDS_STATE0_USC_UNIFIED_SIZE_UNIT_SIZE));
+
+ result = pvr_bo_alloc(device,
+ device->heaps.pds_heap,
+ store_load_state_bo_size,
+ cache_line_size,
+ PVR_WINSYS_BO_FLAG_CPU_ACCESS,
+ &sr_programs->store_load_state_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* USC state update: SR state store. */
+
+ assert(sizeof(pvr_vdm_store_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
+
+ result = pvr_gpu_upload_usc(device,
+ pvr_vdm_store_sr_code,
+ sizeof(pvr_vdm_store_sr_code),
+ cache_line_size,
+ &sr_programs->usc.store_program_bo);
+ if (result != VK_SUCCESS)
+ goto err_free_store_load_state_bo;
+
+ usc_store_program_upload_offset =
+ sr_programs->usc.store_program_bo->vma->dev_addr.addr -
+ device->heaps.usc_heap->base_addr.addr;
+
+ /* USC state update: SR state load. */
+
+ if (target == PVR_CTX_SR_COMPUTE_TARGET && PVR_HAS_QUIRK(dev_info, 62269)) {
+ STATIC_ASSERT(sizeof(pvr_cdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
+
+ usc_load_sr_code = pvr_cdm_load_sr_code;
+ usc_load_sr_code_size = sizeof(pvr_cdm_load_sr_code);
+ } else {
+ STATIC_ASSERT(sizeof(pvr_vdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
+
+ usc_load_sr_code = pvr_vdm_load_sr_code;
+ usc_load_sr_code_size = sizeof(pvr_vdm_load_sr_code);
+ }
+
+ result = pvr_gpu_upload_usc(device,
+ usc_load_sr_code,
+ usc_load_sr_code_size,
+ cache_line_size,
+ &sr_programs->usc.load_program_bo);
+ if (result != VK_SUCCESS)
+ goto err_free_usc_store_program_bo;
+
+ usc_load_program_upload_offset =
+ sr_programs->usc.load_program_bo->vma->dev_addr.addr -
+ device->heaps.usc_heap->base_addr.addr;
+
+ /* FIXME: The number of USC temps should be output alongside
+ * pvr_vdm_store_sr_code rather than hard coded.
+ */
+ /* Create and upload the PDS load and store programs. Point them to the
+ * appropriate USC load and store programs.
+ */
+ switch (target) {
+ case PVR_CTX_SR_RENDER_TARGET:
+ /* PDS state update: SR state store. */
+ result = pvr_pds_render_ctx_sr_program_create_and_upload(
+ device,
+ usc_store_program_upload_offset,
+ 8,
+ sr_programs->store_load_state_bo->vma->dev_addr,
+ &sr_programs->pds.store_program);
+ if (result != VK_SUCCESS)
+ goto err_free_usc_load_program_bo;
+
+ /* PDS state update: SR state load. */
+ result = pvr_pds_render_ctx_sr_program_create_and_upload(
+ device,
+ usc_load_program_upload_offset,
+ 20,
+ sr_programs->store_load_state_bo->vma->dev_addr,
+ &sr_programs->pds.load_program);
+ if (result != VK_SUCCESS)
+ goto err_free_pds_store_program_bo;
+
+ break;
+
+ case PVR_CTX_SR_COMPUTE_TARGET:
+ /* PDS state update: SR state store. */
+ result = pvr_pds_compute_ctx_sr_program_create_and_upload(
+ device,
+ false,
+ usc_store_program_upload_offset,
+ 8,
+ sr_programs->store_load_state_bo->vma->dev_addr,
+ &sr_programs->pds.store_program);
+ if (result != VK_SUCCESS)
+ goto err_free_usc_load_program_bo;
+
+ /* PDS state update: SR state load. */
+ result = pvr_pds_compute_ctx_sr_program_create_and_upload(
+ device,
+ true,
+ usc_load_program_upload_offset,
+ 20,
+ sr_programs->store_load_state_bo->vma->dev_addr,
+ &sr_programs->pds.load_program);
+ if (result != VK_SUCCESS)
+ goto err_free_pds_store_program_bo;
+
+ break;
+
+ default:
+ unreachable("Invalid target.");
+ break;
+ }
+
+ return VK_SUCCESS;
+
+err_free_pds_store_program_bo:
+ pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo);
+
+err_free_usc_load_program_bo:
+ pvr_bo_free(device, sr_programs->usc.load_program_bo);
+
+err_free_usc_store_program_bo:
+ pvr_bo_free(device, sr_programs->usc.store_program_bo);
+
+err_free_store_load_state_bo:
+ pvr_bo_free(device, sr_programs->store_load_state_bo);
+
+ return VK_SUCCESS;
+}
+
+static void pvr_ctx_sr_programs_cleanup(struct pvr_device *device,
+ struct rogue_sr_programs *sr_programs)
+{
+ pvr_bo_free(device, sr_programs->pds.load_program.pvr_bo);
+ pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo);
+ pvr_bo_free(device, sr_programs->usc.load_program_bo);
+ pvr_bo_free(device, sr_programs->usc.store_program_bo);
+ pvr_bo_free(device, sr_programs->store_load_state_bo);
+}
+
+static VkResult
+pvr_render_ctx_switch_programs_setup(struct pvr_device *device,
+ struct pvr_render_ctx_programs *programs)
+{
+ VkResult result;
+
+ result = pvr_render_job_pt_programs_setup(device, &programs->pt);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pvr_ctx_sr_programs_setup(device,
+ PVR_CTX_SR_RENDER_TARGET,
+ &programs->sr);
+ if (result != VK_SUCCESS)
+ goto err_pt_programs_cleanup;
+
+ return VK_SUCCESS;
+
+err_pt_programs_cleanup:
+ pvr_render_job_pt_programs_cleanup(device, &programs->pt);
+
+ return result;
+}
+
+static void
+pvr_render_ctx_switch_programs_cleanup(struct pvr_device *device,
+ struct pvr_render_ctx_programs *programs)
+{
+ pvr_ctx_sr_programs_cleanup(device, &programs->sr);
+ pvr_render_job_pt_programs_cleanup(device, &programs->pt);
+}
+
+static VkResult pvr_render_ctx_switch_init(struct pvr_device *device,
+ struct pvr_render_ctx *ctx)
+{
+ struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch;
+ const uint64_t vdm_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
+ PVR_BO_ALLOC_FLAG_CPU_ACCESS;
+ const uint64_t geom_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
+ PVR_BO_ALLOC_FLAG_CPU_ACCESS;
+ VkResult result;
+
+ result = pvr_bo_alloc(device,
+ device->heaps.general_heap,
+ ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_SIZE,
+ ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_ALIGNMENT,
+ vdm_state_bo_flags,
+ &ctx_switch->vdm_state_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pvr_bo_alloc(device,
+ device->heaps.general_heap,
+ ROGUE_LLS_TA_STATE_BUFFER_SIZE,
+ ROGUE_LLS_TA_STATE_BUFFER_ALIGNMENT,
+ geom_state_bo_flags,
+ &ctx_switch->geom_state_bo);
+ if (result != VK_SUCCESS)
+ goto err_pvr_bo_free_vdm_state_bo;
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
+ result =
+ pvr_render_ctx_switch_programs_setup(device, &ctx_switch->programs[i]);
+ if (result)
+ goto err_programs_cleanup;
+ }
+
+ return result;
+
+err_programs_cleanup:
+ for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
+ pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]);
+ }
+
+ pvr_bo_free(device, ctx_switch->geom_state_bo);
+
+err_pvr_bo_free_vdm_state_bo:
+ pvr_bo_free(device, ctx_switch->vdm_state_bo);
+
+ return result;
+}
+
+static void pvr_render_ctx_switch_fini(struct pvr_device *device,
+ struct pvr_render_ctx *ctx)
+{
+ struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch;
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
+ pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]);
+ }
+
+ pvr_bo_free(device, ctx_switch->geom_state_bo);
+ pvr_bo_free(device, ctx_switch->vdm_state_bo);
+}
+
+static void
+pvr_rogue_get_vdmctrl_pds_state_words(struct pvr_pds_upload *pds_program,
+ enum PVRX(VDMCTRL_USC_TARGET) usc_target,
+ uint8_t usc_unified_size,
+ uint32_t *const state0_out,
+ uint32_t *const state1_out)
+{
+ pvr_csb_pack (state0_out, VDMCTRL_PDS_STATE0, state) {
+ /* Convert the data size from dwords to bytes. */
+ const uint32_t pds_data_size = pds_program->data_size * 4;
+
+ state.dm_target = PVRX(VDMCTRL_DM_TARGET_VDM);
+ state.usc_target = usc_target;
+ state.usc_common_size = 0;
+ state.usc_unified_size = usc_unified_size;
+ state.pds_temp_size = 0;
+
+ assert(pds_data_size % PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
+ 0);
+ state.pds_data_size =
+ pds_data_size / PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
+ };
+
+ pvr_csb_pack (state1_out, VDMCTRL_PDS_STATE1, state) {
+ state.pds_data_addr.addr = pds_program->data_offset;
+ state.sd_type = PVRX(VDMCTRL_SD_TYPE_PDS);
+ state.sd_next_type = PVRX(VDMCTRL_SD_TYPE_PDS);
+ }
+}
+
+static void
+pvr_rogue_get_geom_state_stream_out_words(struct pvr_pds_upload *pds_program,
+ uint32_t *const stream_out1_out,
+ uint32_t *const stream_out2_out)
+{
+ pvr_csb_pack (stream_out1_out, TA_STATE_STREAM_OUT1, state) {
+ /* Convert the data size from dwords to bytes. */
+ const uint32_t pds_data_size = pds_program->data_size * 4;
+
+ state.sync = true;
+
+ assert(pds_data_size %
+ PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE) ==
+ 0);
+ state.pds_data_size =
+ pds_data_size / PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE);
+
+ state.pds_temp_size = 0;
+ }
+
+ pvr_csb_pack (stream_out2_out, TA_STATE_STREAM_OUT2, state) {
+ state.pds_data_addr.addr = pds_program->data_offset;
+ }
+}
+
+static void pvr_render_ctx_ws_static_state_init(
+ struct pvr_render_ctx *ctx,
+ struct pvr_winsys_render_ctx_static_state *static_state)
+{
+ void *dst;
+
+ dst = &static_state->vdm_ctx_state_base_addr;
+ pvr_csb_pack (dst, CR_VDM_CONTEXT_STATE_BASE, base) {
+ base.addr = ctx->ctx_switch.vdm_state_bo->vma->dev_addr;
+ }
+
+ dst = &static_state->geom_ctx_state_base_addr;
+ pvr_csb_pack (dst, CR_TA_CONTEXT_STATE_BASE, base) {
+ base.addr = ctx->ctx_switch.geom_state_bo->vma->dev_addr;
+ }
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.programs); i++) {
+ struct rogue_pt_programs *pt_prog = &ctx->ctx_switch.programs[i].pt;
+ struct rogue_sr_programs *sr_prog = &ctx->ctx_switch.programs[i].sr;
+
+ /* Context store state. */
+ dst = &static_state->geom_state[i].vdm_ctx_store_task0;
+ pvr_csb_pack (dst, CR_VDM_CONTEXT_STORE_TASK0, task0) {
+ pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.store_program,
+ PVRX(VDMCTRL_USC_TARGET_ANY),
+ sr_prog->usc.unified_size,
+ &task0.pds_state0,
+ &task0.pds_state1);
+ }
+
+ dst = &static_state->geom_state[i].vdm_ctx_store_task1;
+ pvr_csb_pack (dst, CR_VDM_CONTEXT_STORE_TASK1, task1) {
+ pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) {
+ state.pds_code_addr.addr = sr_prog->pds.store_program.code_offset;
+ }
+ }
+
+ dst = &static_state->geom_state[i].vdm_ctx_store_task2;
+ pvr_csb_pack (dst, CR_VDM_CONTEXT_STORE_TASK2, task2) {
+ pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_store_program,
+ &task2.stream_out1,
+ &task2.stream_out2);
+ }
+
+ /* Context resume state. */
+ dst = &static_state->geom_state[i].vdm_ctx_resume_task0;
+ pvr_csb_pack (dst, CR_VDM_CONTEXT_RESUME_TASK0, task0) {
+ pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.load_program,
+ PVRX(VDMCTRL_USC_TARGET_ALL),
+ sr_prog->usc.unified_size,
+ &task0.pds_state0,
+ &task0.pds_state1);
+ }
+
+ dst = &static_state->geom_state[i].vdm_ctx_resume_task1;
+ pvr_csb_pack (dst, CR_VDM_CONTEXT_RESUME_TASK1, task1) {
+ pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) {
+ state.pds_code_addr.addr = sr_prog->pds.load_program.code_offset;
+ }
+ }
+
+ dst = &static_state->geom_state[i].vdm_ctx_resume_task2;
+ pvr_csb_pack (dst, CR_VDM_CONTEXT_RESUME_TASK2, task2) {
+ pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_resume_program,
+ &task2.stream_out1,
+ &task2.stream_out2);
+ }
+ }
+}
+
+static void pvr_render_ctx_ws_create_info_init(
+ struct pvr_render_ctx *ctx,
+ enum pvr_winsys_ctx_priority priority,
+ struct pvr_winsys_render_ctx_create_info *create_info)
+{
+ create_info->priority = priority;
+ create_info->vdm_callstack_addr = ctx->vdm_callstack_bo->vma->dev_addr;
+
+ pvr_render_ctx_ws_static_state_init(ctx, &create_info->static_state);
+}
+
+VkResult pvr_render_ctx_create(struct pvr_device *device,
+ enum pvr_winsys_ctx_priority priority,
+ struct pvr_render_ctx **const ctx_out)
+{
+ const uint64_t vdm_callstack_size =
+ sizeof(uint64_t) * PVR_VDM_CALLSTACK_MAX_DEPTH;
+ struct pvr_winsys_render_ctx_create_info create_info;
+ struct pvr_render_ctx *ctx;
+ VkResult result;
+
+ ctx = vk_alloc(&device->vk.alloc,
+ sizeof(*ctx),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!ctx)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ ctx->device = device;
+
+ result = pvr_bo_alloc(device,
+ device->heaps.general_heap,
+ vdm_callstack_size,
+ PVRX(CR_VDM_CALL_STACK_POINTER_ADDR_ALIGNMENT),
+ 0,
+ &ctx->vdm_callstack_bo);
+ if (result != VK_SUCCESS)
+ goto err_vk_free_ctx;
+
+ result = pvr_render_ctx_switch_init(device, ctx);
+ if (result != VK_SUCCESS)
+ goto err_free_vdm_callstack_bo;
+
+ result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
+ if (result != VK_SUCCESS)
+ goto err_render_ctx_switch_fini;
+
+ /* ctx must be fully initialized by this point since
+ * pvr_render_ctx_ws_create_info_init() depends on this.
+ */
+ pvr_render_ctx_ws_create_info_init(ctx, priority, &create_info);
+
+ result = device->ws->ops->render_ctx_create(device->ws,
+ &create_info,
+ &ctx->ws_ctx);
+ if (result != VK_SUCCESS)
+ goto err_render_ctx_reset_cmd_fini;
+
+ *ctx_out = ctx;
+
+ return VK_SUCCESS;
+
+err_render_ctx_reset_cmd_fini:
+ pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
+
+err_render_ctx_switch_fini:
+ pvr_render_ctx_switch_fini(device, ctx);
+
+err_free_vdm_callstack_bo:
+ pvr_bo_free(device, ctx->vdm_callstack_bo);
+
+err_vk_free_ctx:
+ vk_free(&device->vk.alloc, ctx);
+
+ return result;
+}
+
+void pvr_render_ctx_destroy(struct pvr_render_ctx *ctx)
+{
+ struct pvr_device *device = ctx->device;
+
+ device->ws->ops->render_ctx_destroy(ctx->ws_ctx);
+
+ pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
+ pvr_render_ctx_switch_fini(device, ctx);
+ pvr_bo_free(device, ctx->vdm_callstack_bo);
+ vk_free(&device->vk.alloc, ctx);
+}
+
+static VkResult pvr_pds_sr_fence_terminate_program_create_and_upload(
+ struct pvr_device *device,
+ struct pvr_pds_upload *const pds_upload_out)
+{
+ const uint32_t pds_data_alignment =
+ PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
+ ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ uint32_t staging_buffer[PVRX(PDS_TASK_PROGRAM_SIZE) >> 2U];
+ struct pvr_pds_fence_program program = { 0 };
+ ASSERTED uint32_t *buffer_end;
+ uint32_t code_offset;
+ uint32_t data_size;
+
+ /* SW_COMPUTE_PDS_BARRIER is not supported with 2 or more phantoms. */
+ assert(!(PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info) &&
+ rogue_get_num_phantoms(dev_info) >= 2));
+
+ pvr_pds_generate_fence_terminate_program(&program,
+ staging_buffer,
+ PDS_GENERATE_DATA_SEGMENT,
+ &device->pdevice->dev_info);
+
+ /* FIXME: pvr_pds_generate_fence_terminate_program() zeros out the data_size
+ * when we generate the code segment. Implement
+ * PDS_GENERATE_CODEDATA_SEGMENTS? Or wait for the pds gen api to change?
+ * This behavior doesn't seem consistent with the rest of the api. For now
+ * we store the size in a variable.
+ */
+ data_size = program.data_size;
+ code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
+
+ buffer_end =
+ pvr_pds_generate_fence_terminate_program(&program,
+ &staging_buffer[code_offset],
+ PDS_GENERATE_CODE_SEGMENT,
+ &device->pdevice->dev_info);
+
+ assert((uint64_t)(buffer_end - staging_buffer) * 4U <
+ ROGUE_PDS_TASK_PROGRAM_SIZE);
+
+ return pvr_gpu_upload_pds(device,
+ staging_buffer,
+ data_size,
+ PVRX(CR_CDM_TERMINATE_PDS_DATA_ADDR_ALIGNMENT),
+ &staging_buffer[code_offset],
+ program.code_size,
+ PVRX(CR_CDM_TERMINATE_PDS_CODE_ADDR_ALIGNMENT),
+ 0,
+ pds_upload_out);
+}
+
+static void pvr_compute_ctx_ws_static_state_init(
+ const struct pvr_device_info *const dev_info,
+ const struct pvr_compute_ctx *const ctx,
+ struct pvr_winsys_compute_ctx_static_state *const static_state)
+{
+ const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch;
+
+ pvr_csb_pack (&static_state->cdm_ctx_state_base_addr,
+ CR_CDM_CONTEXT_STATE_BASE,
+ state) {
+ state.addr = ctx_switch->compute_state_bo->vma->dev_addr;
+ }
+
+ /* CR_CDM_CONTEXT_... use state store program info. */
+
+ pvr_csb_pack (&static_state->cdm_ctx_store_pds0,
+ CR_CDM_CONTEXT_PDS0,
+ state) {
+ state.data_addr.addr = ctx_switch->sr[0].pds.store_program.data_offset;
+ state.code_addr.addr = ctx_switch->sr[0].pds.store_program.code_offset;
+ }
+
+ pvr_csb_pack (&static_state->cdm_ctx_store_pds0_b,
+ CR_CDM_CONTEXT_PDS0,
+ state) {
+ state.data_addr.addr = ctx_switch->sr[1].pds.store_program.data_offset;
+ state.code_addr.addr = ctx_switch->sr[1].pds.store_program.code_offset;
+ }
+
+ pvr_csb_pack (&static_state->cdm_ctx_store_pds1,
+ CR_CDM_CONTEXT_PDS1,
+ state) {
+ /* Convert the data size from dwords to bytes. */
+ const uint32_t store_program_data_size =
+ ctx_switch->sr[0].pds.store_program.data_size * 4U;
+
+ state.pds_seq_dep = true;
+ state.usc_seq_dep = false;
+ state.target = true;
+ state.unified_size = ctx_switch->sr[0].usc.unified_size;
+ state.common_shared = false;
+ state.common_size = 0;
+ state.temp_size = 0;
+
+ assert(store_program_data_size %
+ PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
+ 0);
+ state.data_size = store_program_data_size /
+ PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
+
+ state.fence = true;
+ }
+
+ /* CR_CDM_TERMINATE_... use fence terminate info. */
+
+ pvr_csb_pack (&static_state->cdm_ctx_terminate_pds,
+ CR_CDM_TERMINATE_PDS,
+ state) {
+ state.data_addr.addr = ctx_switch->sr_fence_terminate_program.data_offset;
+ state.code_addr.addr = ctx_switch->sr_fence_terminate_program.code_offset;
+ }
+
+ pvr_csb_pack (&static_state->cdm_ctx_terminate_pds1,
+ CR_CDM_TERMINATE_PDS1,
+ state) {
+ /* Convert the data size from dwords to bytes. */
+ const uint32_t fence_terminate_program_data_size =
+ ctx_switch->sr_fence_terminate_program.data_size * 4U;
+
+ state.pds_seq_dep = true;
+ state.usc_seq_dep = false;
+ state.target = !PVR_HAS_FEATURE(dev_info, compute_morton_capable);
+ state.unified_size = 0;
+ /* Common store is for shareds -- this will free the partitions. */
+ state.common_shared = true;
+ state.common_size = 0;
+ state.temp_size = 0;
+
+ assert(fence_terminate_program_data_size %
+ PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
+ 0);
+ state.data_size = fence_terminate_program_data_size /
+ PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
+ state.fence = true;
+ }
+
+ /* CR_CDM_RESUME_... use state load program info. */
+
+ pvr_csb_pack (&static_state->cdm_ctx_resume_pds0,
+ CR_CDM_CONTEXT_LOAD_PDS0,
+ state) {
+ state.data_addr.addr = ctx_switch->sr[0].pds.load_program.data_offset;
+ state.code_addr.addr = ctx_switch->sr[0].pds.load_program.code_offset;
+ }
+
+ pvr_csb_pack (&static_state->cdm_ctx_resume_pds0_b,
+ CR_CDM_CONTEXT_LOAD_PDS0,
+ state) {
+ state.data_addr.addr = ctx_switch->sr[1].pds.load_program.data_offset;
+ state.code_addr.addr = ctx_switch->sr[1].pds.load_program.code_offset;
+ }
+}
+
+static void pvr_compute_ctx_ws_create_info_init(
+ const struct pvr_compute_ctx *const ctx,
+ enum pvr_winsys_ctx_priority priority,
+ struct pvr_winsys_compute_ctx_create_info *const create_info)
+{
+ create_info->priority = priority;
+
+ pvr_compute_ctx_ws_static_state_init(&ctx->device->pdevice->dev_info,
+ ctx,
+ &create_info->static_state);
+}
+
+VkResult pvr_compute_ctx_create(struct pvr_device *const device,
+ enum pvr_winsys_ctx_priority priority,
+ struct pvr_compute_ctx **const ctx_out)
+{
+ struct pvr_winsys_compute_ctx_create_info create_info;
+ struct pvr_compute_ctx *ctx;
+ VkResult result;
+
+ ctx = vk_alloc(&device->vk.alloc,
+ sizeof(*ctx),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!ctx)
+ return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ ctx->device = device;
+
+ result = pvr_bo_alloc(
+ device,
+ device->heaps.general_heap,
+ rogue_get_cdm_context_resume_buffer_size(&device->pdevice->dev_info),
+ rogue_get_cdm_context_resume_buffer_alignment(&device->pdevice->dev_info),
+ PVR_WINSYS_BO_FLAG_CPU_ACCESS | PVR_WINSYS_BO_FLAG_GPU_UNCACHED,
+ &ctx->ctx_switch.compute_state_bo);
+ if (result != VK_SUCCESS)
+ goto err_free_ctx;
+
+ /* TODO: Change this so that enabling storage to B doesn't change the array
+ * size. Instead of looping we could unroll this and have the second
+ * programs setup depending on the B enable. Doing it that way would make
+ * things more obvious.
+ */
+ for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); i++) {
+ result = pvr_ctx_sr_programs_setup(device,
+ PVR_CTX_SR_COMPUTE_TARGET,
+ &ctx->ctx_switch.sr[i]);
+ if (result != VK_SUCCESS) {
+ for (uint32_t j = 0; j < i; j++)
+ pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[j]);
+
+ goto err_free_state_buffer;
+ }
+ }
+
+ result = pvr_pds_sr_fence_terminate_program_create_and_upload(
+ device,
+ &ctx->ctx_switch.sr_fence_terminate_program);
+ if (result != VK_SUCCESS)
+ goto err_free_sr_programs;
+
+ pvr_compute_ctx_ws_create_info_init(ctx, priority, &create_info);
+
+ result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
+ if (result != VK_SUCCESS)
+ goto err_free_pds_fence_terminate_program;
+
+ result = device->ws->ops->compute_ctx_create(device->ws,
+ &create_info,
+ &ctx->ws_ctx);
+ if (result != VK_SUCCESS)
+ goto err_fini_reset_cmd;
+
+ *ctx_out = ctx;
+
+ return VK_SUCCESS;
+
+err_fini_reset_cmd:
+ pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
+
+err_free_pds_fence_terminate_program:
+ pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo);
+
+err_free_sr_programs:
+ for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i)
+ pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]);
+
+err_free_state_buffer:
+ pvr_bo_free(device, ctx->ctx_switch.compute_state_bo);
+
+err_free_ctx:
+ vk_free(&device->vk.alloc, ctx);
+
+ return result;
+}
+
+void pvr_compute_ctx_destroy(struct pvr_compute_ctx *const ctx)
+{
+ struct pvr_device *device = ctx->device;
+
+ device->ws->ops->compute_ctx_destroy(ctx->ws_ctx);
+
+ pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
+
+ pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo);
+ for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i)
+ pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]);
+
+ pvr_bo_free(device, ctx->ctx_switch.compute_state_bo);
+
+ vk_free(&device->vk.alloc, ctx);
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_JOB_CONTEXT_H
+#define PVR_JOB_CONTEXT_H
+
+#include "pvr_winsys.h"
+#include "pvr_private.h"
+
+/* Support PDS code/data loading/storing to the 'B' shared register state
+ * buffers.
+ */
+#define ROGUE_NUM_SHADER_STATE_BUFFERS 2U
+
+/* TODO: Add reset framework support. */
+struct pvr_reset_cmd {
+};
+
+struct pvr_compute_ctx;
+
+struct rogue_sr_programs {
+ struct pvr_bo *store_load_state_bo;
+
+ struct {
+ uint8_t unified_size;
+
+ struct pvr_bo *store_program_bo;
+
+ struct pvr_bo *load_program_bo;
+ } usc;
+
+ struct {
+ struct pvr_pds_upload store_program;
+ struct pvr_pds_upload load_program;
+ } pds;
+};
+
+struct pvr_render_ctx {
+ struct pvr_device *device;
+
+ struct pvr_winsys_render_ctx *ws_ctx;
+
+ /* Buffer to hold the VDM call stack */
+ struct pvr_bo *vdm_callstack_bo;
+
+ struct pvr_render_ctx_switch {
+ /* Buffer to hold the VDM context resume control stream. */
+ struct pvr_bo *vdm_state_bo;
+
+ struct pvr_bo *geom_state_bo;
+
+ struct pvr_render_ctx_programs {
+ /* Context switch persistent state programs. */
+ struct rogue_pt_programs {
+ /* Buffer used to hold the persistent state. */
+ struct pvr_bo *store_resume_state_bo;
+
+ /* PDS program to store out the persistent state in
+ * 'store_resume_state_bo'.
+ */
+ struct pvr_pds_upload pds_store_program;
+
+ /* PDS program to load in the persistent state in
+ * 'store_resume_state_bo'.
+ */
+ struct pvr_pds_upload pds_resume_program;
+ } pt;
+
+ /* Context switch shared register programs. */
+ struct rogue_sr_programs sr;
+
+ } programs[ROGUE_NUM_SHADER_STATE_BUFFERS];
+ } ctx_switch;
+
+ /* Reset framework. */
+ struct pvr_reset_cmd reset_cmd;
+};
+
+struct pvr_compute_ctx {
+ struct pvr_device *device;
+
+ struct pvr_winsys_compute_ctx *ws_ctx;
+
+ struct pvr_compute_ctx_switch {
+ struct pvr_bo *compute_state_bo;
+
+ struct rogue_sr_programs sr[ROGUE_NUM_SHADER_STATE_BUFFERS];
+
+ struct pvr_pds_upload sr_fence_terminate_program;
+ } ctx_switch;
+
+ /* Reset framework. */
+ struct pvr_reset_cmd reset_cmd;
+};
+
+VkResult pvr_render_ctx_create(struct pvr_device *device,
+ enum pvr_winsys_ctx_priority priority,
+ struct pvr_render_ctx **const ctx_out);
+void pvr_render_ctx_destroy(struct pvr_render_ctx *ctx);
+
+VkResult pvr_compute_ctx_create(struct pvr_device *const device,
+ enum pvr_winsys_ctx_priority priority,
+ struct pvr_compute_ctx **const ctx_out);
+void pvr_compute_ctx_destroy(struct pvr_compute_ctx *ctx);
+
+#endif /* PVR_JOB_CONTEXT_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_bo.h"
+#include "pvr_csb.h"
+#include "pvr_job_common.h"
+#include "pvr_job_context.h"
+#include "pvr_job_render.h"
+#include "pvr_pds.h"
+#include "pvr_private.h"
+#include "pvr_rogue_fw.h"
+#include "pvr_winsys.h"
+#include "util/compiler.h"
+#include "util/macros.h"
+#include "util/u_math.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+#include "vk_util.h"
+
+#define ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE 16U
+
+/* FIXME: Is there a hardware define we can use instead? */
+/* 1 DWord per PM physical page stored in the free list */
+#define ROGUE_FREE_LIST_ENTRY_SIZE ((uint32_t)sizeof(uint32_t))
+
+/* FIXME: The three defines below, for the number of PC, PD and PT entries in a
+ * 4KB page, come from rgxmmudefs_km.h (meaning they're part of the
+ * auto-generated hwdefs). Should these be defined in rogue_mmu.xml? Keeping in
+ * mind that we probably only need these three values. */
+#define ROGUE_NUM_PC_ENTRIES_PER_PAGE 0x400U
+
+#define ROGUE_NUM_PD_ENTRIES_PER_PAGE 0x200U
+
+#define ROGUE_NUM_PT_ENTRIES_PER_PAGE 0x200U
+
+struct pvr_free_list {
+ struct pvr_device *device;
+
+ uint64_t size;
+
+ struct pvr_bo *bo;
+
+ struct pvr_winsys_free_list *ws_free_list;
+};
+
+/* Macrotile information. */
+struct pvr_rt_mtile_info {
+ uint32_t tile_size_x;
+ uint32_t tile_size_y;
+
+ uint32_t num_tiles_x;
+ uint32_t num_tiles_y;
+
+ uint32_t tiles_per_mtile_x;
+ uint32_t tiles_per_mtile_y;
+
+ uint32_t x_tile_max;
+ uint32_t y_tile_max;
+
+ uint32_t mtiles_x;
+ uint32_t mtiles_y;
+
+ uint32_t mtile_x1;
+ uint32_t mtile_y1;
+ uint32_t mtile_x2;
+ uint32_t mtile_y2;
+ uint32_t mtile_x3;
+ uint32_t mtile_y3;
+
+ uint32_t mtile_stride;
+};
+
+struct pvr_rt_dataset {
+ struct pvr_device *device;
+
+ /* RT dataset information */
+ uint32_t width;
+ uint32_t height;
+ uint32_t samples;
+ uint32_t layers;
+
+ struct pvr_free_list *global_free_list;
+ struct pvr_free_list *local_free_list;
+
+ struct pvr_bo *vheap_rtc_bo;
+ pvr_dev_addr_t vheap_dev_addr;
+ pvr_dev_addr_t rtc_dev_addr;
+
+ struct pvr_bo *tpc_bo;
+ uint64_t tpc_stride;
+ uint64_t tpc_size;
+
+ struct pvr_winsys_rt_dataset *ws_rt_dataset;
+
+ /* RT data information */
+ struct pvr_bo *mta_mlist_bo;
+
+ struct pvr_bo *rgn_headers_bo;
+ uint64_t rgn_headers_stride;
+
+ bool need_frag;
+
+ uint8_t rt_data_idx;
+
+ struct {
+ pvr_dev_addr_t mta_dev_addr;
+ pvr_dev_addr_t mlist_dev_addr;
+ pvr_dev_addr_t rgn_headers_dev_addr;
+ } rt_datas[ROGUE_NUM_RTDATAS];
+};
+
+VkResult pvr_free_list_create(struct pvr_device *device,
+ uint32_t initial_size,
+ uint32_t max_size,
+ uint32_t grow_size,
+ uint32_t grow_threshold,
+ struct pvr_free_list *parent_free_list,
+ struct pvr_free_list **const free_list_out)
+{
+ struct pvr_winsys_free_list *parent_ws_free_list =
+ parent_free_list ? parent_free_list->ws_free_list : NULL;
+ const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
+ PVR_BO_ALLOC_FLAG_PM_FW_PROTECT;
+ struct pvr_free_list *free_list;
+ uint32_t cache_line_size;
+ uint32_t initial_num_pages;
+ uint32_t grow_num_pages;
+ uint32_t max_num_pages;
+ uint64_t addr_alignment;
+ uint64_t size_alignment;
+ uint64_t size;
+ VkResult result;
+
+ assert((initial_size + grow_size) <= max_size);
+ assert(max_size != 0);
+ assert(grow_threshold <= 100);
+
+ /* Make sure the free list is created with at least a single page. */
+ if (initial_size == 0)
+ initial_size = ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE;
+
+ /* The freelists sizes must respect the PM freelist base address alignment
+ * requirement. As the freelist entries are cached by the SLC, it's also
+ * necessary to ensure the sizes respect the SLC cache line size to avoid
+ * invalid entries appearing in the cache, which would be problematic after
+ * a grow operation, as the SLC entries aren't invalidated. We do this by
+ * making sure the freelist values are appropriately aligned.
+ *
+ * To calculate the alignment, we first take the largest of the freelist
+ * base address alignment and the SLC cache line size. We then divide this
+ * by the freelist entry size to determine the number of freelist entries
+ * required by the PM. Finally, as each entry holds a single PM physical
+ * page, we multiple the number of entries by the page size.
+ *
+ * As an example, if the base address alignment is 16 bytes, the SLC cache
+ * line size is 64 bytes and the freelist entry size is 4 bytes then 16
+ * entries are required, as we take the SLC cacheline size (being the larger
+ * of the two values) and divide this by 4. If the PM page size is 4096
+ * bytes then we end up with an alignment of 65536 bytes.
+ */
+ cache_line_size = rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+
+ addr_alignment =
+ MAX2(ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE, cache_line_size);
+ size_alignment = (addr_alignment / ROGUE_FREE_LIST_ENTRY_SIZE) *
+ ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE;
+
+ assert(util_is_power_of_two_nonzero(size_alignment));
+
+ initial_size = align64(initial_size, size_alignment);
+ max_size = align64(max_size, size_alignment);
+ grow_size = align64(grow_size, size_alignment);
+
+ /* Make sure the 'max' size doesn't exceed what the firmware supports and
+ * adjust the other sizes accordingly.
+ */
+ if (max_size > ROGUE_FREE_LIST_MAX_SIZE) {
+ max_size = ROGUE_FREE_LIST_MAX_SIZE;
+ assert(align64(max_size, size_alignment) == max_size);
+ }
+
+ if (initial_size > max_size)
+ initial_size = max_size;
+
+ if (initial_size == max_size)
+ grow_size = 0;
+
+ initial_num_pages = initial_size >> ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
+ max_num_pages = max_size >> ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
+ grow_num_pages = grow_size >> ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
+
+ /* Calculate the size of the buffer needed to store the free list entries
+ * based on the maximum number of pages we can have.
+ */
+ size = max_num_pages * ROGUE_FREE_LIST_ENTRY_SIZE;
+ assert(align64(size, addr_alignment) == size);
+
+ free_list = vk_alloc(&device->vk.alloc,
+ sizeof(*free_list),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!free_list)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* FIXME: The memory is mapped GPU uncached, but this seems to contradict
+ * the comment above about aligning to the SLC cache line size.
+ */
+ result = pvr_bo_alloc(device,
+ device->heaps.general_heap,
+ size,
+ addr_alignment,
+ bo_flags,
+ &free_list->bo);
+ if (result != VK_SUCCESS)
+ goto err_vk_free_free_list;
+
+ result = device->ws->ops->free_list_create(device->ws,
+ free_list->bo->vma,
+ initial_num_pages,
+ max_num_pages,
+ grow_num_pages,
+ grow_threshold,
+ parent_ws_free_list,
+ &free_list->ws_free_list);
+ if (result != VK_SUCCESS)
+ goto err_pvr_bo_free_bo;
+
+ free_list->device = device;
+ free_list->size = size;
+
+ *free_list_out = free_list;
+
+ return VK_SUCCESS;
+
+err_pvr_bo_free_bo:
+ pvr_bo_free(device, free_list->bo);
+
+err_vk_free_free_list:
+ vk_free(&device->vk.alloc, free_list);
+
+ return result;
+}
+
+void pvr_free_list_destroy(struct pvr_free_list *free_list)
+{
+ struct pvr_device *device = free_list->device;
+
+ device->ws->ops->free_list_destroy(free_list->ws_free_list);
+ pvr_bo_free(device, free_list->bo);
+ vk_free(&device->vk.alloc, free_list);
+}
+
+static inline void pvr_get_samples_in_xy(uint32_t samples,
+ uint32_t *const x_out,
+ uint32_t *const y_out)
+{
+ switch (samples) {
+ case 1:
+ *x_out = 1;
+ *y_out = 1;
+ break;
+ case 2:
+ *x_out = 1;
+ *y_out = 2;
+ break;
+ case 4:
+ *x_out = 2;
+ *y_out = 2;
+ break;
+ case 8:
+ *x_out = 2;
+ *y_out = 4;
+ break;
+ default:
+ unreachable("Unsupported number of samples");
+ }
+}
+
+static void pvr_rt_mtile_info_init(struct pvr_device *device,
+ struct pvr_rt_mtile_info *info,
+ uint32_t width,
+ uint32_t height,
+ uint32_t samples)
+{
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ uint32_t samples_in_x;
+ uint32_t samples_in_y;
+
+ pvr_get_samples_in_xy(samples, &samples_in_x, &samples_in_y);
+
+ info->tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 1);
+ info->tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 1);
+
+ info->num_tiles_x = DIV_ROUND_UP(width, info->tile_size_x);
+ info->num_tiles_y = DIV_ROUND_UP(height, info->tile_size_y);
+
+ rogue_get_num_macrotiles_xy(dev_info, &info->mtiles_x, &info->mtiles_y);
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+ assert(PVR_GET_FEATURE_VALUE(dev_info,
+ simple_parameter_format_version,
+ 0) == 2);
+ /* Set up 16 macrotiles with a multiple of 2x2 tiles per macrotile,
+ * which is aligned to a tile group.
+ */
+ info->mtile_x1 = DIV_ROUND_UP(info->num_tiles_x, 8) * 2;
+ info->mtile_y1 = DIV_ROUND_UP(info->num_tiles_y, 8) * 2;
+ info->mtile_x2 = 0;
+ info->mtile_y2 = 0;
+ info->mtile_x3 = 0;
+ info->mtile_y3 = 0;
+ info->x_tile_max = ALIGN_POT(info->num_tiles_x, 2) - 1;
+ info->y_tile_max = ALIGN_POT(info->num_tiles_y, 2) - 1;
+ } else {
+ /* Set up 16 macrotiles with a multiple of 4x4 tiles per macrotile. */
+ info->mtile_x1 = ALIGN_POT(DIV_ROUND_UP(info->num_tiles_x, 4), 4);
+ info->mtile_y1 = ALIGN_POT(DIV_ROUND_UP(info->num_tiles_y, 4), 4);
+ info->mtile_x2 = info->mtile_x1 * 2;
+ info->mtile_y2 = info->mtile_y1 * 2;
+ info->mtile_x3 = info->mtile_x1 * 3;
+ info->mtile_y3 = info->mtile_y1 * 3;
+ info->x_tile_max = info->num_tiles_x - 1;
+ info->y_tile_max = info->num_tiles_y - 1;
+ }
+
+ info->tiles_per_mtile_x = info->mtile_x1 * samples_in_x;
+ info->tiles_per_mtile_y = info->mtile_y1 * samples_in_y;
+
+ info->mtile_stride = info->mtile_x1 * info->mtile_y1;
+}
+
+/* Note that the unit of the return value depends on the GPU. For cores with the
+ * simple_internal_parameter_format feature the returned size is interpreted as
+ * the number of region headers. For cores without this feature its interpreted
+ * as the size in dwords.
+ */
+static uint64_t
+pvr_rt_get_isp_region_size(struct pvr_device *device,
+ const struct pvr_rt_mtile_info *mtile_info)
+{
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ uint64_t rgn_size =
+ mtile_info->tiles_per_mtile_x * mtile_info->tiles_per_mtile_y;
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+ uint32_t version;
+
+ rgn_size *= mtile_info->mtiles_x * mtile_info->mtiles_y;
+
+ if (PVR_FEATURE_VALUE(dev_info,
+ simple_parameter_format_version,
+ &version)) {
+ version = 0;
+ }
+
+ if (version == 2) {
+ /* One region header per 2x2 tile group. */
+ rgn_size /= (2U * 2U);
+ }
+ } else {
+ const uint64_t rgn_header_size = rogue_get_region_header_size(dev_info);
+
+ /* Round up to next dword to prevent IPF overrun and convert to bytes.
+ */
+ rgn_size = DIV_ROUND_UP(rgn_size * rgn_header_size, 4);
+ }
+
+ return rgn_size;
+}
+
+static VkResult pvr_rt_vheap_rtc_data_init(struct pvr_device *device,
+ struct pvr_rt_dataset *rt_dataset,
+ uint32_t layers)
+{
+ const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
+ PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC;
+ uint64_t vheap_size;
+ uint32_t alignment;
+ uint64_t rtc_size;
+ VkResult result;
+
+ vheap_size = ROGUE_CR_PM_VHEAP_TABLE_SIZE * ROGUE_PM_VHEAP_ENTRY_SIZE;
+
+ if (layers > 1) {
+ uint64_t rtc_entries;
+
+ vheap_size = ALIGN_POT(vheap_size, PVRX(CR_TA_RTC_ADDR_BASE_ALIGNMENT));
+
+ rtc_entries = ROGUE_NUM_TEAC + ROGUE_NUM_TE + ROGUE_NUM_VCE;
+ if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 48545))
+ rtc_entries += ROGUE_NUM_TE;
+
+ rtc_size = rtc_entries * ROGUE_RTC_SIZE_IN_BYTES;
+ } else {
+ rtc_size = 0;
+ }
+
+ alignment = MAX2(PVRX(CR_PM_VHEAP_TABLE_BASE_ADDR_ALIGNMENT),
+ PVRX(CR_TA_RTC_ADDR_BASE_ALIGNMENT));
+
+ result = pvr_bo_alloc(device,
+ device->heaps.general_heap,
+ vheap_size + rtc_size,
+ alignment,
+ bo_flags,
+ &rt_dataset->vheap_rtc_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ rt_dataset->vheap_dev_addr = rt_dataset->vheap_rtc_bo->vma->dev_addr;
+
+ if (rtc_size > 0) {
+ rt_dataset->rtc_dev_addr.addr =
+ rt_dataset->vheap_dev_addr.addr + vheap_size;
+ } else {
+ rt_dataset->rtc_dev_addr = PVR_DEV_ADDR_INVALID;
+ }
+
+ return VK_SUCCESS;
+}
+
+static void pvr_rt_vheap_rtc_data_fini(struct pvr_rt_dataset *rt_dataset)
+{
+ rt_dataset->rtc_dev_addr = PVR_DEV_ADDR_INVALID;
+
+ pvr_bo_free(rt_dataset->device, rt_dataset->vheap_rtc_bo);
+ rt_dataset->vheap_rtc_bo = NULL;
+}
+
+static void
+pvr_rt_get_tail_ptr_stride_size(const struct pvr_device *device,
+ const struct pvr_rt_mtile_info *mtile_info,
+ uint32_t layers,
+ uint64_t *const stride_out,
+ uint64_t *const size_out)
+{
+ uint32_t max_num_mtiles;
+ uint32_t num_mtiles_x;
+ uint32_t num_mtiles_y;
+ uint32_t version;
+ uint64_t size;
+
+ num_mtiles_x = mtile_info->mtiles_x * mtile_info->tiles_per_mtile_x;
+ num_mtiles_y = mtile_info->mtiles_y * mtile_info->tiles_per_mtile_y;
+
+ max_num_mtiles = MAX2(util_next_power_of_two64(num_mtiles_x),
+ util_next_power_of_two64(num_mtiles_y));
+
+ size = max_num_mtiles * max_num_mtiles;
+
+ if (PVR_FEATURE_VALUE(&device->pdevice->dev_info,
+ simple_parameter_format_version,
+ &version)) {
+ version = 0;
+ }
+
+ if (version == 2) {
+ /* One tail pointer cache entry per 2x2 tile group. */
+ size /= (2U * 2U);
+ }
+
+ size *= ROGUE_TAIL_POINTER_SIZE;
+
+ if (layers > 1) {
+ size = ALIGN_POT(size, ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE);
+
+ *stride_out = size / ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE;
+ *size_out = size * layers;
+ } else {
+ *stride_out = 0;
+ *size_out = size;
+ }
+}
+
+static VkResult pvr_rt_tpc_data_init(struct pvr_device *device,
+ struct pvr_rt_dataset *rt_dataset,
+ const struct pvr_rt_mtile_info *mtile_info,
+ uint32_t layers)
+{
+ const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
+ PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC;
+ uint64_t tpc_size;
+
+ pvr_rt_get_tail_ptr_stride_size(device,
+ mtile_info,
+ layers,
+ &rt_dataset->tpc_stride,
+ &rt_dataset->tpc_size);
+ tpc_size = ALIGN_POT(rt_dataset->tpc_size, ROGUE_TE_TPC_CACHE_LINE_SIZE);
+
+ return pvr_bo_alloc(device,
+ device->heaps.general_heap,
+ tpc_size,
+ PVRX(CR_TE_TPC_ADDR_BASE_ALIGNMENT),
+ bo_flags,
+ &rt_dataset->tpc_bo);
+}
+
+static void pvr_rt_tpc_data_fini(struct pvr_rt_dataset *rt_dataset)
+{
+ pvr_bo_free(rt_dataset->device, rt_dataset->tpc_bo);
+ rt_dataset->tpc_bo = NULL;
+}
+
+static uint32_t
+pvr_rt_get_mlist_size(const struct pvr_free_list *global_free_list,
+ const struct pvr_free_list *local_free_list)
+{
+ uint32_t num_pte_pages;
+ uint32_t num_pde_pages;
+ uint32_t num_pce_pages;
+ uint64_t total_pages;
+ uint32_t mlist_size;
+
+ assert(global_free_list->size + local_free_list->size <=
+ ROGUE_PM_MAX_PB_VIRT_ADDR_SPACE);
+
+ total_pages = (global_free_list->size + local_free_list->size) >>
+ ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
+
+ /* Calculate the total number of physical pages required to hold the page
+ * table, directory and catalog entries for the freelist pages.
+ */
+ num_pte_pages = DIV_ROUND_UP(total_pages, ROGUE_NUM_PT_ENTRIES_PER_PAGE);
+ num_pde_pages = DIV_ROUND_UP(num_pte_pages, ROGUE_NUM_PD_ENTRIES_PER_PAGE);
+ num_pce_pages = DIV_ROUND_UP(num_pde_pages, ROGUE_NUM_PC_ENTRIES_PER_PAGE);
+
+ /* Calculate the MList size considering the total number of pages in the PB
+ * are shared among all the PM address spaces.
+ */
+ mlist_size = (num_pce_pages + num_pde_pages + num_pte_pages) *
+ ROGUE_NUM_PM_ADDRESS_SPACES * ROGUE_MLIST_ENTRY_STRIDE;
+
+ return ALIGN_POT(mlist_size, ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE);
+}
+
+static void pvr_rt_get_region_headers_stride_size(
+ const struct pvr_device *device,
+ const struct pvr_rt_mtile_info *mtile_info,
+ uint32_t layers,
+ uint64_t *const stride_out,
+ uint64_t *const size_out)
+{
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ const uint32_t rgn_header_size = rogue_get_region_header_size(dev_info);
+ uint32_t rgn_headers_size;
+ uint32_t num_tiles_x;
+ uint32_t num_tiles_y;
+ uint32_t group_size;
+ uint32_t version;
+
+ if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version))
+ version = 0;
+
+ group_size = version == 2 ? 2 : 1;
+
+ num_tiles_x = mtile_info->mtiles_x * mtile_info->tiles_per_mtile_x;
+ num_tiles_y = mtile_info->mtiles_y * mtile_info->tiles_per_mtile_y;
+
+ rgn_headers_size =
+ (num_tiles_x / group_size) * (num_tiles_y / group_size) * rgn_header_size;
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+ rgn_headers_size =
+ ALIGN_POT(rgn_headers_size, PVRX(CR_TE_PSGREGION_ADDR_BASE_ALIGNMENT));
+ }
+
+ if (layers > 1) {
+ rgn_headers_size =
+ ALIGN_POT(rgn_headers_size, PVRX(CR_TE_PSG_REGION_STRIDE_UNIT_SIZE));
+ }
+
+ *stride_out = rgn_header_size;
+ *size_out = rgn_headers_size * layers;
+}
+
+static VkResult
+pvr_rt_mta_mlist_data_init(struct pvr_device *device,
+ struct pvr_rt_dataset *rt_dataset,
+ const struct pvr_free_list *global_free_list,
+ const struct pvr_free_list *local_free_list,
+ const struct pvr_rt_mtile_info *mtile_info)
+{
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ const uint32_t mlist_size =
+ pvr_rt_get_mlist_size(global_free_list, local_free_list);
+ uint32_t mta_size = rogue_get_macrotile_array_size(dev_info);
+ const uint32_t num_rt_datas = ARRAY_SIZE(rt_dataset->rt_datas);
+ uint32_t rt_datas_mlist_size;
+ uint32_t rt_datas_mta_size;
+ pvr_dev_addr_t dev_addr;
+ VkResult result;
+
+ /* Allocate memory for macrotile array and Mlist for all RT datas.
+ *
+ * Allocation layout: MTA[0..N] + Mlist alignment padding + Mlist[0..N].
+ *
+ * N is number of RT datas.
+ */
+ rt_datas_mta_size = ALIGN_POT(mta_size * num_rt_datas,
+ PVRX(CR_PM_MLIST0_BASE_ADDR_ALIGNMENT));
+ rt_datas_mlist_size = mlist_size * num_rt_datas;
+
+ result = pvr_bo_alloc(device,
+ device->heaps.general_heap,
+ rt_datas_mta_size + rt_datas_mlist_size,
+ PVRX(CR_PM_MTILE_ARRAY_BASE_ADDR_ALIGNMENT),
+ PVR_BO_ALLOC_FLAG_GPU_UNCACHED,
+ &rt_dataset->mta_mlist_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ dev_addr = rt_dataset->mta_mlist_bo->vma->dev_addr;
+
+ for (uint32_t i = 0; i < num_rt_datas; i++) {
+ if (mta_size != 0) {
+ rt_dataset->rt_datas[i].mta_dev_addr = dev_addr;
+ dev_addr.addr += mta_size;
+ } else {
+ rt_dataset->rt_datas[i].mta_dev_addr = PVR_DEV_ADDR_INVALID;
+ }
+ }
+
+ dev_addr.addr =
+ rt_dataset->mta_mlist_bo->vma->dev_addr.addr + rt_datas_mta_size;
+
+ for (uint32_t i = 0; i < num_rt_datas; i++) {
+ if (mlist_size != 0) {
+ rt_dataset->rt_datas[i].mlist_dev_addr = dev_addr;
+ dev_addr.addr += mlist_size;
+ } else {
+ rt_dataset->rt_datas[i].mlist_dev_addr = PVR_DEV_ADDR_INVALID;
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+static void pvr_rt_mta_mlist_data_fini(struct pvr_rt_dataset *rt_dataset)
+{
+ for (uint32_t i = 0; i < ARRAY_SIZE(rt_dataset->rt_datas); i++) {
+ rt_dataset->rt_datas[i].mlist_dev_addr = PVR_DEV_ADDR_INVALID;
+ rt_dataset->rt_datas[i].mta_dev_addr = PVR_DEV_ADDR_INVALID;
+ }
+
+ pvr_bo_free(rt_dataset->device, rt_dataset->mta_mlist_bo);
+ rt_dataset->mta_mlist_bo = NULL;
+}
+
+static VkResult
+pvr_rt_rgn_headers_data_init(struct pvr_device *device,
+ struct pvr_rt_dataset *rt_dataset,
+ const struct pvr_rt_mtile_info *mtile_info,
+ uint32_t layers)
+{
+ const uint32_t num_rt_datas = ARRAY_SIZE(rt_dataset->rt_datas);
+ uint64_t rgn_headers_size;
+ pvr_dev_addr_t dev_addr;
+ VkResult result;
+
+ pvr_rt_get_region_headers_stride_size(device,
+ mtile_info,
+ layers,
+ &rt_dataset->rgn_headers_stride,
+ &rgn_headers_size);
+
+ result = pvr_bo_alloc(device,
+ device->heaps.rgn_hdr_heap,
+ rgn_headers_size * num_rt_datas,
+ PVRX(CR_TE_PSGREGION_ADDR_BASE_ALIGNMENT),
+ PVR_BO_ALLOC_FLAG_GPU_UNCACHED,
+ &rt_dataset->rgn_headers_bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ dev_addr = rt_dataset->rgn_headers_bo->vma->dev_addr;
+
+ for (uint32_t i = 0; i < num_rt_datas; i++) {
+ rt_dataset->rt_datas[i].rgn_headers_dev_addr = dev_addr;
+ dev_addr.addr += rgn_headers_size;
+ }
+
+ return VK_SUCCESS;
+}
+
+static void pvr_rt_rgn_headers_data_fini(struct pvr_rt_dataset *rt_dataset)
+{
+ for (uint32_t i = 0; i < ARRAY_SIZE(rt_dataset->rt_datas); i++)
+ rt_dataset->rt_datas[i].rgn_headers_dev_addr = PVR_DEV_ADDR_INVALID;
+
+ pvr_bo_free(rt_dataset->device, rt_dataset->rgn_headers_bo);
+ rt_dataset->rgn_headers_bo = NULL;
+}
+
+static VkResult pvr_rt_datas_init(struct pvr_device *device,
+ struct pvr_rt_dataset *rt_dataset,
+ const struct pvr_free_list *global_free_list,
+ const struct pvr_free_list *local_free_list,
+ const struct pvr_rt_mtile_info *mtile_info,
+ uint32_t layers)
+{
+ VkResult result;
+
+ result = pvr_rt_mta_mlist_data_init(device,
+ rt_dataset,
+ global_free_list,
+ local_free_list,
+ mtile_info);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result =
+ pvr_rt_rgn_headers_data_init(device, rt_dataset, mtile_info, layers);
+ if (result != VK_SUCCESS)
+ goto err_pvr_rt_mta_mlist_data_fini;
+
+ return VK_SUCCESS;
+
+err_pvr_rt_mta_mlist_data_fini:
+ pvr_rt_mta_mlist_data_fini(rt_dataset);
+
+ return VK_SUCCESS;
+}
+
+static void pvr_rt_datas_fini(struct pvr_rt_dataset *rt_dataset)
+{
+ pvr_rt_rgn_headers_data_fini(rt_dataset);
+ pvr_rt_mta_mlist_data_fini(rt_dataset);
+}
+
+static uint32_t
+pvr_rogue_get_cr_isp_mtile_size_val(const struct pvr_device_info *dev_info,
+ uint32_t samples,
+ const struct pvr_rt_mtile_info *mtile_info)
+{
+ uint32_t samples_per_pixel =
+ PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0);
+ uint32_t isp_mtile_size;
+
+ pvr_csb_pack (&isp_mtile_size, CR_ISP_MTILE_SIZE, value) {
+ value.x = mtile_info->mtile_x1;
+ value.y = mtile_info->mtile_y1;
+
+ if (samples_per_pixel == 1) {
+ if (samples >= 4)
+ value.x <<= 1;
+
+ if (samples >= 2)
+ value.y <<= 1;
+ } else if (samples_per_pixel == 2) {
+ if (samples >= 8)
+ value.x <<= 1;
+
+ if (samples >= 4)
+ value.y <<= 1;
+ } else if (samples_per_pixel == 4) {
+ if (samples >= 8)
+ value.y <<= 1;
+ } else {
+ assert(!"Unsupported ISP samples per pixel value");
+ }
+ }
+
+ return isp_mtile_size;
+}
+
+static uint64_t pvr_rogue_get_cr_multisamplectl_val(uint32_t samples,
+ bool y_flip)
+{
+ static const struct {
+ uint8_t x[8];
+ uint8_t y[8];
+ } sample_positions[4] = {
+ /* 1 sample */
+ {
+ .x = { 8 },
+ .y = { 8 },
+ },
+ /* 2 samples */
+ {
+ .x = { 12, 4 },
+ .y = { 12, 4 },
+ },
+ /* 4 samples */
+ {
+ .x = { 6, 14, 2, 10 },
+ .y = { 2, 6, 10, 14 },
+ },
+ /* 8 samples */
+ {
+ .x = { 9, 7, 13, 5, 3, 1, 11, 15 },
+ .y = { 5, 11, 9, 3, 13, 7, 15, 1 },
+ },
+ };
+ uint64_t multisamplectl;
+ uint8_t idx;
+
+ idx = util_fast_log2(samples);
+ assert(idx < ARRAY_SIZE(sample_positions));
+
+ pvr_csb_pack (&multisamplectl, CR_PPP_MULTISAMPLECTL, value) {
+ switch (samples) {
+ case 8:
+ value.msaa_x7 = sample_positions[idx].x[7];
+ value.msaa_x6 = sample_positions[idx].x[6];
+ value.msaa_x5 = sample_positions[idx].x[5];
+ value.msaa_x4 = sample_positions[idx].x[4];
+
+ if (y_flip) {
+ value.msaa_y7 = 16U - sample_positions[idx].y[7];
+ value.msaa_y6 = 16U - sample_positions[idx].y[6];
+ value.msaa_y5 = 16U - sample_positions[idx].y[5];
+ value.msaa_y4 = 16U - sample_positions[idx].y[4];
+ } else {
+ value.msaa_y7 = sample_positions[idx].y[7];
+ value.msaa_y6 = sample_positions[idx].y[6];
+ value.msaa_y5 = sample_positions[idx].y[5];
+ value.msaa_y4 = sample_positions[idx].y[4];
+ }
+
+ FALLTHROUGH;
+ case 4:
+ value.msaa_x3 = sample_positions[idx].x[3];
+ value.msaa_x2 = sample_positions[idx].x[2];
+
+ if (y_flip) {
+ value.msaa_y3 = 16U - sample_positions[idx].y[3];
+ value.msaa_y2 = 16U - sample_positions[idx].y[2];
+ } else {
+ value.msaa_y3 = sample_positions[idx].y[3];
+ value.msaa_y2 = sample_positions[idx].y[2];
+ }
+
+ FALLTHROUGH;
+ case 2:
+ value.msaa_x1 = sample_positions[idx].x[1];
+
+ if (y_flip) {
+ value.msaa_y1 = 16U - sample_positions[idx].y[1];
+ } else {
+ value.msaa_y1 = sample_positions[idx].y[1];
+ }
+
+ FALLTHROUGH;
+ case 1:
+ value.msaa_x0 = sample_positions[idx].x[0];
+
+ if (y_flip) {
+ value.msaa_y0 = 16U - sample_positions[idx].y[0];
+ } else {
+ value.msaa_y0 = sample_positions[idx].y[0];
+ }
+
+ break;
+ default:
+ unreachable("Unsupported number of samples");
+ }
+ }
+
+ return multisamplectl;
+}
+
+static uint32_t
+pvr_rogue_get_cr_te_aa_val(const struct pvr_device_info *dev_info,
+ uint32_t samples)
+{
+ uint32_t samples_per_pixel =
+ PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0);
+ uint32_t te_aa;
+
+ pvr_csb_pack (&te_aa, CR_TE_AA, value) {
+ if (samples_per_pixel == 1) {
+ if (samples >= 2)
+ value.y = true;
+ if (samples >= 4)
+ value.x = true;
+ } else if (samples_per_pixel == 2) {
+ if (samples >= 2)
+ value.x2 = true;
+ if (samples >= 4)
+ value.y = true;
+ if (samples >= 8)
+ value.x = true;
+ } else if (samples_per_pixel == 4) {
+ if (samples >= 2)
+ value.x2 = true;
+ if (samples >= 4)
+ value.y2 = true;
+ if (samples >= 8)
+ value.y = true;
+ } else {
+ assert(!"Unsupported ISP samples per pixel value");
+ }
+ }
+
+ return te_aa;
+}
+
+static void pvr_rt_dataset_ws_create_info_init(
+ struct pvr_rt_dataset *rt_dataset,
+ const struct pvr_rt_mtile_info *mtile_info,
+ struct pvr_winsys_rt_dataset_create_info *create_info)
+{
+ struct pvr_device *device = rt_dataset->device;
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+
+ memset(create_info, 0, sizeof(*create_info));
+
+ /* Local freelist. */
+ create_info->local_free_list = rt_dataset->local_free_list->ws_free_list;
+
+ /* ISP register values. */
+ if (PVR_HAS_ERN(dev_info, 42307) &&
+ !(PVR_HAS_FEATURE(dev_info, roguexe) && mtile_info->tile_size_x == 16)) {
+ float value;
+
+ if (rt_dataset->width != 0) {
+ value =
+ ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR / (float)rt_dataset->width;
+ create_info->isp_merge_lower_x = fui(value);
+
+ value =
+ ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR / (float)rt_dataset->width;
+ create_info->isp_merge_upper_x = fui(value);
+ }
+
+ if (rt_dataset->height != 0) {
+ value =
+ ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR / (float)rt_dataset->height;
+ create_info->isp_merge_lower_y = fui(value);
+
+ value =
+ ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR / (float)rt_dataset->height;
+ create_info->isp_merge_upper_y = fui(value);
+ }
+
+ value = ((float)rt_dataset->width * ROGUE_ISP_MERGE_SCALE_FACTOR) /
+ (ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR -
+ ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR);
+ create_info->isp_merge_scale_x = fui(value);
+
+ value = ((float)rt_dataset->height * ROGUE_ISP_MERGE_SCALE_FACTOR) /
+ (ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR -
+ ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR);
+ create_info->isp_merge_scale_y = fui(value);
+ }
+
+ create_info->isp_mtile_size =
+ pvr_rogue_get_cr_isp_mtile_size_val(dev_info,
+ rt_dataset->samples,
+ mtile_info);
+
+ /* PPP register values. */
+ create_info->ppp_multi_sample_ctl =
+ pvr_rogue_get_cr_multisamplectl_val(rt_dataset->samples, false);
+ create_info->ppp_multi_sample_ctl_y_flipped =
+ pvr_rogue_get_cr_multisamplectl_val(rt_dataset->samples, true);
+
+ pvr_csb_pack (&create_info->ppp_screen, CR_PPP_SCREEN, value) {
+ value.pixxmax = rt_dataset->width - 1;
+ value.pixymax = rt_dataset->height - 1;
+ }
+
+ /* TE register values. */
+ create_info->te_aa =
+ pvr_rogue_get_cr_te_aa_val(dev_info, rt_dataset->samples);
+
+ pvr_csb_pack (&create_info->te_mtile1, CR_TE_MTILE1, value) {
+ value.x1 = mtile_info->mtile_x1;
+ if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+ value.x2 = mtile_info->mtile_x2;
+ value.x3 = mtile_info->mtile_x3;
+ }
+ }
+
+ pvr_csb_pack (&create_info->te_mtile2, CR_TE_MTILE2, value) {
+ value.y1 = mtile_info->mtile_y1;
+ if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+ value.y2 = mtile_info->mtile_y2;
+ value.y3 = mtile_info->mtile_y3;
+ }
+ }
+
+ pvr_csb_pack (&create_info->te_screen, CR_TE_SCREEN, value) {
+ value.xmax = mtile_info->x_tile_max;
+ value.ymax = mtile_info->y_tile_max;
+ }
+
+ /* Allocations and associated information. */
+ create_info->vheap_table_dev_addr = rt_dataset->vheap_dev_addr;
+ create_info->rtc_dev_addr = rt_dataset->rtc_dev_addr;
+
+ create_info->tpc_dev_addr = rt_dataset->tpc_bo->vma->dev_addr;
+ create_info->tpc_stride = rt_dataset->tpc_stride;
+ create_info->tpc_size = rt_dataset->tpc_size;
+
+ STATIC_ASSERT(ARRAY_SIZE(create_info->rt_datas) ==
+ ARRAY_SIZE(rt_dataset->rt_datas));
+ for (uint32_t i = 0; i < ARRAY_SIZE(create_info->rt_datas); i++) {
+ create_info->rt_datas[i].pm_mlist_dev_addr =
+ rt_dataset->rt_datas[i].mlist_dev_addr;
+ create_info->rt_datas[i].macrotile_array_dev_addr =
+ rt_dataset->rt_datas[i].mta_dev_addr;
+ create_info->rt_datas[i].rgn_header_dev_addr =
+ rt_dataset->rt_datas[i].rgn_headers_dev_addr;
+ }
+
+ create_info->rgn_header_size =
+ pvr_rt_get_isp_region_size(device, mtile_info);
+
+ /* Miscellaneous. */
+ create_info->mtile_stride = mtile_info->mtile_stride;
+ create_info->max_rts = rt_dataset->layers;
+}
+
+VkResult
+pvr_render_target_dataset_create(struct pvr_device *device,
+ uint32_t width,
+ uint32_t height,
+ uint32_t samples,
+ uint32_t layers,
+ struct pvr_rt_dataset **const rt_dataset_out)
+{
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ struct pvr_winsys_rt_dataset_create_info rt_dataset_create_info;
+ struct pvr_rt_mtile_info mtile_info;
+ struct pvr_rt_dataset *rt_dataset;
+ VkResult result;
+
+ assert(device->global_free_list);
+ assert(width <= rogue_get_render_size_max_x(dev_info));
+ assert(height <= rogue_get_render_size_max_y(dev_info));
+ assert(layers > 0 && layers <= PVR_MAX_FRAMEBUFFER_LAYERS);
+
+ pvr_rt_mtile_info_init(device, &mtile_info, width, height, samples);
+
+ rt_dataset = vk_zalloc(&device->vk.alloc,
+ sizeof(*rt_dataset),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!rt_dataset)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ rt_dataset->device = device;
+ rt_dataset->width = width;
+ rt_dataset->height = height;
+ rt_dataset->samples = samples;
+ rt_dataset->layers = layers;
+ rt_dataset->global_free_list = device->global_free_list;
+
+ /* The maximum supported free list size is based on the assumption that this
+ * freelist (the "local" freelist) is always the minimum size required by
+ * the hardware. See the documentation of ROGUE_FREE_LIST_MAX_SIZE for more
+ * details.
+ */
+ result = pvr_free_list_create(device,
+ rogue_get_min_free_list_size(dev_info),
+ rogue_get_min_free_list_size(dev_info),
+ 0 /* grow_size */,
+ 0 /* grow_threshold */,
+ rt_dataset->global_free_list,
+ &rt_dataset->local_free_list);
+ if (result != VK_SUCCESS)
+ goto err_vk_free_rt_dataset;
+
+ result = pvr_rt_vheap_rtc_data_init(device, rt_dataset, layers);
+ if (result != VK_SUCCESS)
+ goto err_pvr_free_list_destroy;
+
+ result = pvr_rt_tpc_data_init(device, rt_dataset, &mtile_info, layers);
+ if (result != VK_SUCCESS)
+ goto err_pvr_rt_vheap_rtc_data_fini;
+
+ result = pvr_rt_datas_init(device,
+ rt_dataset,
+ rt_dataset->global_free_list,
+ rt_dataset->local_free_list,
+ &mtile_info,
+ layers);
+ if (result != VK_SUCCESS)
+ goto err_pvr_rt_tpc_data_fini;
+
+ /* rt_dataset must be fully initialized by this point since
+ * pvr_rt_dataset_ws_create_info_init() depends on this.
+ */
+ pvr_rt_dataset_ws_create_info_init(rt_dataset,
+ &mtile_info,
+ &rt_dataset_create_info);
+
+ result =
+ device->ws->ops->render_target_dataset_create(device->ws,
+ &rt_dataset_create_info,
+ &rt_dataset->ws_rt_dataset);
+ if (result != VK_SUCCESS)
+ goto err_pvr_rt_datas_fini;
+
+ *rt_dataset_out = rt_dataset;
+
+ return VK_SUCCESS;
+
+err_pvr_rt_datas_fini:
+ pvr_rt_datas_fini(rt_dataset);
+
+err_pvr_rt_tpc_data_fini:
+ pvr_rt_tpc_data_fini(rt_dataset);
+
+err_pvr_rt_vheap_rtc_data_fini:
+ pvr_rt_vheap_rtc_data_fini(rt_dataset);
+
+err_pvr_free_list_destroy:
+ pvr_free_list_destroy(rt_dataset->local_free_list);
+
+err_vk_free_rt_dataset:
+ vk_free(&device->vk.alloc, rt_dataset);
+
+ return result;
+}
+
+void pvr_render_target_dataset_destroy(struct pvr_rt_dataset *rt_dataset)
+{
+ struct pvr_device *device = rt_dataset->device;
+
+ device->ws->ops->render_target_dataset_destroy(rt_dataset->ws_rt_dataset);
+
+ pvr_rt_datas_fini(rt_dataset);
+ pvr_rt_tpc_data_fini(rt_dataset);
+ pvr_rt_vheap_rtc_data_fini(rt_dataset);
+
+ pvr_free_list_destroy(rt_dataset->local_free_list);
+
+ vk_free(&device->vk.alloc, rt_dataset);
+}
+
+static void
+pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx,
+ struct pvr_render_job *job,
+ struct pvr_winsys_geometry_state *state)
+{
+ const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
+
+ /* FIXME: Should this just be done unconditionally? The firmware will just
+ * ignore the value anyway.
+ */
+ if (PVR_HAS_QUIRK(dev_info, 56279)) {
+ pvr_csb_pack (&state->regs.pds_ctrl, CR_PDS_CTRL, value) {
+ value.max_num_vdm_tasks = rogue_get_max_num_vdm_pds_tasks(dev_info);
+ }
+ } else {
+ state->regs.pds_ctrl = 0;
+ }
+
+ pvr_csb_pack (&state->regs.ppp_ctrl, CR_PPP_CTRL, value) {
+ value.wclampen = true;
+ value.fixed_point_format = 1;
+ }
+
+ pvr_csb_pack (&state->regs.te_psg, CR_TE_PSG, value) {
+ value.completeonterminate = job->geometry_terminate;
+
+ value.region_stride = job->rt_dataset->rgn_headers_stride /
+ PVRX(CR_TE_PSG_REGION_STRIDE_UNIT_SIZE);
+
+ value.forcenewstate = PVR_HAS_QUIRK(dev_info, 52942);
+ }
+
+ /* The set up of CR_TPU must be identical to
+ * pvr_render_job_ws_fragment_state_init().
+ */
+ pvr_csb_pack (&state->regs.tpu, CR_TPU, value) {
+ value.tag_cem_4k_face_packing = true;
+ }
+
+ pvr_csb_pack (&state->regs.tpu_border_colour_table,
+ CR_TPU_BORDER_COLOUR_TABLE_VDM,
+ value) {
+ value.border_colour_table_address = job->border_colour_table_addr;
+ }
+
+ pvr_csb_pack (&state->regs.vdm_ctrl_stream_base,
+ CR_VDM_CTRL_STREAM_BASE,
+ value) {
+ value.addr = job->ctrl_stream_addr;
+ }
+
+ /* Set up the USC common size for the context switch resume/load program
+ * (ctx->ctx_switch.programs[i].sr->pds_load_program), which was created
+ * as part of the render context.
+ */
+ pvr_csb_pack (&state->regs.vdm_ctx_resume_task0_size,
+ VDMCTRL_PDS_STATE0,
+ value) {
+ /* Calculate the size in bytes. */
+ const uint16_t shared_registers_size = job->max_shared_registers * 4;
+
+ value.usc_common_size =
+ DIV_ROUND_UP(shared_registers_size,
+ PVRX(VDMCTRL_PDS_STATE0_USC_COMMON_SIZE_UNIT_SIZE));
+ };
+
+ state->flags = 0;
+
+ if (!job->rt_dataset->need_frag)
+ state->flags |= PVR_WINSYS_GEOM_FLAG_FIRST_GEOMETRY;
+
+ if (job->geometry_terminate)
+ state->flags |= PVR_WINSYS_GEOM_FLAG_LAST_GEOMETRY;
+
+ if (job->frag_uses_atomic_ops)
+ state->flags |= PVR_WINSYS_GEOM_FLAG_SINGLE_CORE;
+}
+
+static inline void
+pvr_get_isp_num_tiles_xy(const struct pvr_device_info *dev_info,
+ uint32_t samples,
+ uint32_t width,
+ uint32_t height,
+ uint32_t *const x_out,
+ uint32_t *const y_out)
+{
+ uint32_t tile_samples_x;
+ uint32_t tile_samples_y;
+ uint32_t scale_x;
+ uint32_t scale_y;
+
+ rogue_get_isp_samples_per_tile_xy(dev_info,
+ samples,
+ &tile_samples_x,
+ &tile_samples_y);
+
+ switch (samples) {
+ case 1:
+ scale_x = 1;
+ scale_y = 1;
+ break;
+ case 2:
+ scale_x = 1;
+ scale_y = 2;
+ break;
+ case 4:
+ scale_x = 2;
+ scale_y = 2;
+ break;
+ case 8:
+ scale_x = 2;
+ scale_y = 4;
+ break;
+ default:
+ unreachable("Unsupported number of samples");
+ }
+
+ *x_out = DIV_ROUND_UP(width * scale_x, tile_samples_x);
+ *y_out = DIV_ROUND_UP(height * scale_y, tile_samples_y);
+
+ if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+ assert(PVR_GET_FEATURE_VALUE(dev_info,
+ simple_parameter_format_version,
+ 0U) == 2U);
+ /* Align to a 2x2 tile block. */
+ *x_out = ALIGN_POT(*x_out, 2);
+ *y_out = ALIGN_POT(*y_out, 2);
+ }
+}
+
+static void
+pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
+ struct pvr_render_job *job,
+ struct pvr_winsys_fragment_state *state)
+{
+ const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
+ enum PVRX(CR_ISP_AA_MODE_TYPE) isp_aa_mode;
+ uint32_t isp_ctl;
+
+ /* FIXME: what to do when job->run_frag is false? */
+
+ switch (job->samples) {
+ case 1:
+ isp_aa_mode = PVRX(CR_ISP_AA_MODE_TYPE_AA_NONE);
+ break;
+ case 2:
+ isp_aa_mode = PVRX(CR_ISP_AA_MODE_TYPE_AA_2X);
+ break;
+ case 3:
+ isp_aa_mode = PVRX(CR_ISP_AA_MODE_TYPE_AA_4X);
+ break;
+ case 8:
+ isp_aa_mode = PVRX(CR_ISP_AA_MODE_TYPE_AA_8X);
+ break;
+ default:
+ unreachable("Unsupported number of samples");
+ }
+
+ /* FIXME: pass in the number of samples rather than isp_aa_mode? */
+ pvr_setup_tiles_in_flight(dev_info,
+ isp_aa_mode,
+ job->pixel_output_width,
+ false,
+ job->max_tiles_in_flight,
+ &isp_ctl,
+ &state->regs.usc_pixel_output_ctrl);
+
+ pvr_csb_pack (&state->regs.isp_ctl, CR_ISP_CTL, value) {
+ value.sample_pos = true;
+
+ /* FIXME: There are a number of things that cause this to be set, this
+ * is just one of them.
+ */
+ value.process_empty_tiles = job->process_empty_tiles;
+ }
+
+ /* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be
+ * possible to fully pack CR_ISP_CTL above rather than having to OR in part
+ * of the value.
+ */
+ state->regs.isp_ctl |= isp_ctl;
+
+ pvr_csb_pack (&state->regs.isp_aa, CR_ISP_AA, value) {
+ value.mode = isp_aa_mode;
+ }
+
+ /* The set up of CR_TPU must be identical to
+ * pvr_render_job_ws_geometry_state_init().
+ */
+ pvr_csb_pack (&state->regs.tpu, CR_TPU, value) {
+ value.tag_cem_4k_face_packing = true;
+ }
+
+ if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
+ PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
+ rogue_get_num_phantoms(dev_info) > 1 && job->frag_uses_atomic_ops) {
+ /* Each phantom has its own MCU, so atomicity can only be guaranteed
+ * when all work items are processed on the same phantom. This means we
+ * need to disable all USCs other than those of the first phantom, which
+ * has 4 clusters. Note that we only need to do this for atomic
+ * operations in fragment shaders, since hardware prevents the TA to run
+ * on more than one phantom anyway.
+ */
+ state->regs.pixel_phantom = 0xF;
+ } else {
+ state->regs.pixel_phantom = 0;
+ }
+
+ pvr_csb_pack (&state->regs.isp_bgobjvals, CR_ISP_BGOBJVALS, value) {
+ value.enablebgtag = job->enable_bg_tag;
+
+ value.mask = true;
+
+ /* FIXME: Hard code this for now as we don't currently support any
+ * stencil image formats.
+ */
+ value.stencil = 0xFF;
+ }
+
+ pvr_csb_pack (&state->regs.isp_bgobjdepth, CR_ISP_BGOBJDEPTH, value) {
+ /* FIXME: This is suitable for the single depth format the driver
+ * currently supports, but may need updating to handle other depth
+ * formats.
+ */
+ value.value = fui(job->depth_clear_value);
+ }
+
+ /* FIXME: Some additional set up needed to support depth and stencil
+ * load/store operations.
+ */
+ pvr_csb_pack (&state->regs.isp_zlsctl, CR_ISP_ZLSCTL, value) {
+ uint32_t aligned_width =
+ ALIGN_POT(job->depth_physical_width, ROGUE_IPF_TILE_SIZE_PIXELS);
+ uint32_t aligned_height =
+ ALIGN_POT(job->depth_physical_height, ROGUE_IPF_TILE_SIZE_PIXELS);
+
+ pvr_get_isp_num_tiles_xy(dev_info,
+ job->samples,
+ aligned_width,
+ aligned_height,
+ &value.zlsextent_x_z,
+ &value.zlsextent_y_z);
+ value.zlsextent_x_z -= 1;
+ value.zlsextent_y_z -= 1;
+
+ if (job->depth_memlayout == PVR_MEMLAYOUT_TWIDDLED) {
+ value.loadtwiddled = true;
+ value.storetwiddled = true;
+ }
+
+ /* FIXME: This is suitable for the single depth format the driver
+ * currently supports, but may need updating to handle other depth
+ * formats.
+ */
+ assert(job->depth_vk_format == VK_FORMAT_D32_SFLOAT);
+ value.zloadformat = PVRX(CR_ZSTOREFORMAT_TYPE_F32Z);
+ value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_F32Z);
+ }
+
+ if (PVR_HAS_FEATURE(dev_info, zls_subtile)) {
+ pvr_csb_pack (&state->regs.isp_zls_pixels, CR_ISP_ZLS_PIXELS, value) {
+ value.x = job->depth_stride - 1;
+ value.y = job->depth_height - 1;
+ }
+ } else {
+ state->regs.isp_zls_pixels = 0;
+ }
+
+ pvr_csb_pack (&state->regs.isp_zload_store_base, CR_ISP_ZLOAD_BASE, value) {
+ value.addr = job->depth_addr;
+ }
+
+ pvr_csb_pack (&state->regs.isp_stencil_load_store_base,
+ CR_ISP_STENCIL_LOAD_BASE,
+ value) {
+ value.addr = job->stencil_addr;
+
+ /* FIXME: May need to set value.enable to true. */
+ }
+
+ pvr_csb_pack (&state->regs.tpu_border_colour_table,
+ CR_TPU_BORDER_COLOUR_TABLE_PDM,
+ value) {
+ value.border_colour_table_address = job->border_colour_table_addr;
+ }
+
+ state->regs.isp_oclqry_base = 0;
+
+ pvr_csb_pack (&state->regs.isp_dbias_base, CR_ISP_DBIAS_BASE, value) {
+ value.addr = job->depth_bias_table_addr;
+ }
+
+ pvr_csb_pack (&state->regs.isp_scissor_base, CR_ISP_SCISSOR_BASE, value) {
+ value.addr = job->scissor_table_addr;
+ }
+
+ pvr_csb_pack (&state->regs.event_pixel_pds_info,
+ CR_EVENT_PIXEL_PDS_INFO,
+ value) {
+ value.const_size =
+ DIV_ROUND_UP(ctx->device->pixel_event_data_size_in_dwords,
+ PVRX(CR_EVENT_PIXEL_PDS_INFO_CONST_SIZE_UNIT_SIZE));
+ value.temp_stride = 0;
+ value.usc_sr_size =
+ DIV_ROUND_UP(PVR_STATE_PBE_DWORDS,
+ PVRX(CR_EVENT_PIXEL_PDS_INFO_USC_SR_SIZE_UNIT_SIZE));
+ }
+
+ pvr_csb_pack (&state->regs.event_pixel_pds_data,
+ CR_EVENT_PIXEL_PDS_DATA,
+ value) {
+ value.addr.addr = job->pds_pixel_event_data_offset;
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(state->regs.pbe_word) ==
+ ARRAY_SIZE(job->pbe_reg_words));
+ STATIC_ASSERT(ARRAY_SIZE(state->regs.pbe_word[0]) ==
+ ARRAY_SIZE(job->pbe_reg_words[0]));
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(job->pbe_reg_words); i++) {
+ state->regs.pbe_word[i][0] = job->pbe_reg_words[i][0];
+ state->regs.pbe_word[i][1] = job->pbe_reg_words[i][1];
+ state->regs.pbe_word[i][2] = job->pbe_reg_words[i][2];
+ }
+
+ STATIC_ASSERT(__same_type(state->regs.pds_bgnd, job->pds_bgnd_reg_values));
+ typed_memcpy(state->regs.pds_bgnd,
+ job->pds_bgnd_reg_values,
+ ARRAY_SIZE(state->regs.pds_bgnd));
+
+ memset(state->regs.pds_pr_bgnd, 0, sizeof(state->regs.pds_pr_bgnd));
+
+ /* FIXME: Merge geometry and fragment flags into a single flags member? */
+ /* FIXME: move to its own function? */
+ state->flags = 0;
+
+ if (job->depth_addr.addr)
+ state->flags |= PVR_WINSYS_FRAG_FLAG_DEPTH_BUFFER_PRESENT;
+
+ if (job->stencil_addr.addr)
+ state->flags |= PVR_WINSYS_FRAG_FLAG_STENCIL_BUFFER_PRESENT;
+
+ if (job->disable_compute_overlap)
+ state->flags |= PVR_WINSYS_FRAG_FLAG_PREVENT_CDM_OVERLAP;
+
+ if (job->frag_uses_atomic_ops)
+ state->flags |= PVR_WINSYS_FRAG_FLAG_SINGLE_CORE;
+
+ state->zls_stride = job->depth_layer_size;
+ state->sls_stride = job->depth_layer_size;
+}
+
+static void pvr_render_job_ws_submit_info_init(
+ struct pvr_render_ctx *ctx,
+ struct pvr_render_job *job,
+ const struct pvr_winsys_job_bo *bos,
+ uint32_t bo_count,
+ const VkSemaphore *semaphores,
+ uint32_t semaphore_count,
+ uint32_t *stage_flags,
+ struct pvr_winsys_render_submit_info *submit_info)
+{
+ memset(submit_info, 0, sizeof(*submit_info));
+
+ submit_info->rt_dataset = job->rt_dataset->ws_rt_dataset;
+ submit_info->rt_data_idx = job->rt_dataset->rt_data_idx;
+
+ submit_info->frame_num = ctx->device->global_queue_present_count;
+ submit_info->job_num = ctx->device->global_queue_job_count;
+
+ submit_info->run_frag = job->run_frag;
+
+ submit_info->bos = bos;
+ submit_info->bo_count = bo_count;
+
+ submit_info->semaphores = semaphores;
+ submit_info->semaphore_count = semaphore_count;
+ submit_info->stage_flags = stage_flags;
+
+ /* FIXME: add WSI image bos. */
+
+ pvr_render_job_ws_geometry_state_init(ctx, job, &submit_info->geometry);
+ pvr_render_job_ws_fragment_state_init(ctx, job, &submit_info->fragment);
+
+ /* These values are expected to match. */
+ assert(submit_info->geometry.regs.tpu == submit_info->fragment.regs.tpu);
+}
+
+VkResult
+pvr_render_job_submit(struct pvr_render_ctx *ctx,
+ struct pvr_render_job *job,
+ const struct pvr_winsys_job_bo *bos,
+ uint32_t bo_count,
+ const VkSemaphore *semaphores,
+ uint32_t semaphore_count,
+ uint32_t *stage_flags,
+ struct pvr_winsys_syncobj **const syncobj_geom_out,
+ struct pvr_winsys_syncobj **const syncobj_frag_out)
+{
+ struct pvr_rt_dataset *rt_dataset = job->rt_dataset;
+ struct pvr_winsys_render_submit_info submit_info;
+ struct pvr_device *device = ctx->device;
+ VkResult result;
+
+ pvr_render_job_ws_submit_info_init(ctx,
+ job,
+ bos,
+ bo_count,
+ semaphores,
+ semaphore_count,
+ stage_flags,
+ &submit_info);
+
+ result = device->ws->ops->render_submit(ctx->ws_ctx,
+ &submit_info,
+ syncobj_geom_out,
+ syncobj_frag_out);
+ if (result != VK_SUCCESS)
+ return result;
+
+ if (job->run_frag) {
+ /* Move to the next render target data now that a fragment job has been
+ * successfully submitted. This will allow the next geometry job to be
+ * submitted to been run in parallel with it.
+ */
+ rt_dataset->rt_data_idx =
+ (rt_dataset->rt_data_idx + 1) % ARRAY_SIZE(rt_dataset->rt_datas);
+
+ rt_dataset->need_frag = false;
+ } else {
+ rt_dataset->need_frag = true;
+ }
+
+ return VK_SUCCESS;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_JOB_RENDER_H
+#define PVR_JOB_RENDER_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "pvr_limits.h"
+#include "pvr_winsys.h"
+
+struct pvr_device;
+struct pvr_free_list;
+struct pvr_render_ctx;
+struct pvr_rt_dataset;
+
+/* FIXME: Turn 'struct pvr_sub_cmd' into 'struct pvr_job' and change 'struct
+ * pvr_render_job' to subclass it? This is approximately what v3dv does
+ * (although it doesn't subclass).
+ */
+struct pvr_render_job {
+ struct pvr_rt_dataset *rt_dataset;
+
+ bool run_frag;
+ bool geometry_terminate;
+ bool frag_uses_atomic_ops;
+ bool disable_compute_overlap;
+ bool enable_bg_tag;
+ bool process_empty_tiles;
+
+ uint32_t pds_pixel_event_data_offset;
+
+ pvr_dev_addr_t ctrl_stream_addr;
+
+ pvr_dev_addr_t border_colour_table_addr;
+ pvr_dev_addr_t depth_bias_table_addr;
+ pvr_dev_addr_t scissor_table_addr;
+
+ pvr_dev_addr_t depth_addr;
+ uint32_t depth_stride;
+ uint32_t depth_height;
+ uint32_t depth_physical_width;
+ uint32_t depth_physical_height;
+ uint32_t depth_layer_size;
+ float depth_clear_value;
+ VkFormat depth_vk_format;
+ /* FIXME: This should be of type 'enum pvr_memlayout', but this is defined
+ * in pvr_private.h, which causes a circular include dependency. For now,
+ * treat it has a uint32_t. A couple of ways to possibly fix this:
+ *
+ * 1. Merge the contents of this header file into pvr_private.h.
+ * 2. Move 'enum pvr_memlayout' into it a new header that can be included
+ * by both this header and pvr_private.h.
+ */
+ uint32_t depth_memlayout;
+
+ pvr_dev_addr_t stencil_addr;
+
+ uint32_t samples;
+
+ uint32_t pixel_output_width;
+
+ uint8_t max_shared_registers;
+
+ /* Upper limit for tiles in flight, '0' means use default limit based
+ * on partition store.
+ */
+ uint32_t max_tiles_in_flight;
+
+ uint64_t pbe_reg_words[PVR_MAX_COLOR_ATTACHMENTS]
+ [ROGUE_NUM_PBESTATE_REG_WORDS];
+
+ uint64_t pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
+};
+
+VkResult pvr_free_list_create(struct pvr_device *device,
+ uint32_t initial_size,
+ uint32_t max_size,
+ uint32_t grow_size,
+ uint32_t grow_threshold,
+ struct pvr_free_list *parent_free_list,
+ struct pvr_free_list **const free_list_out);
+void pvr_free_list_destroy(struct pvr_free_list *free_list);
+
+VkResult
+pvr_render_target_dataset_create(struct pvr_device *device,
+ uint32_t width,
+ uint32_t height,
+ uint32_t samples,
+ uint32_t layers,
+ struct pvr_rt_dataset **const rt_dataset_out);
+void pvr_render_target_dataset_destroy(struct pvr_rt_dataset *dataset);
+
+VkResult
+pvr_render_job_submit(struct pvr_render_ctx *ctx,
+ struct pvr_render_job *job,
+ const struct pvr_winsys_job_bo *bos,
+ uint32_t bo_count,
+ const VkSemaphore *semaphores,
+ uint32_t semaphore_count,
+ uint32_t *stage_flags,
+ struct pvr_winsys_syncobj **const syncobj_geom_out,
+ struct pvr_winsys_syncobj **const syncobj_frag_out);
+
+#endif /* PVR_JOB_RENDER_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * Constants for VkPhysicalDeviceLimits.
+ */
+
+#ifndef PVR_LIMITS_H
+#define PVR_LIMITS_H
+
+#include "hwdef/rogue_hw_defs.h"
+#include "pvr_device_info.h"
+#include "util/u_math.h"
+
+#define PVR_MAX_COLOR_ATTACHMENTS 8U
+#define PVR_MAX_QUEUES 2U
+#define PVR_MAX_VIEWPORTS 1U
+#define PVR_MAX_NEG_OFFSCREEN_OFFSET 4096U
+
+#define PVR_MAX_PUSH_CONSTANTS_SIZE 128U
+
+#define PVR_MAX_DESCRIPTOR_SETS 4U
+#define PVR_MAX_FRAMEBUFFER_LAYERS ROGUE_MAX_RENDER_TARGETS
+
+/* The limit is somewhat arbitrary, it just translates into more pds code
+ * and larger arrays, 32 appears to be the popular (and highest choice) across
+ * other implementations.
+ */
+#define PVR_MAX_VERTEX_INPUT_BINDINGS 16U
+
+/* We need one RenderTarget per supported MSAA mode as each render target
+ * contains state that is dependent on the sample count of the render that is
+ * rendering to it.
+ *
+ * As we do not know the sample count until we know the renderpass framebuffer
+ * combination being used, we create one per supported sample mode.
+ */
+#define PVR_RENDER_TARGETS_PER_FRAMEBUFFER(dev_info) \
+ ({ \
+ uint32_t __ret = PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 4U); \
+ util_logbase2(__ret) + 1; \
+ })
+
+#endif
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_bo.h"
+#include "pvr_device_info.h"
+#include "pvr_formats.h"
+#include "pvr_hw_pass.h"
+#include "pvr_pds.h"
+#include "pvr_private.h"
+#include "pvr_usc_fragment_shader.h"
+#include "rogue/rogue.h"
+#include "vk_alloc.h"
+#include "vk_format.h"
+#include "vk_log.h"
+
+/*****************************************************************************
+ PDS pre-baked program generation parameters and variables.
+*****************************************************************************/
+/* These would normally be produced by the compiler or other code. We're using
+ * them for now just to speed up things. All of these should eventually be
+ * removed.
+ */
+
+static const struct {
+ /* Indicates the amount of temporaries for the shader. */
+ uint32_t temp_count;
+ enum rogue_msaa_mode msaa_mode;
+ /* Indicates the presence of PHAS instruction. */
+ bool has_phase_rate_change;
+} pvr_pds_fragment_program_params = {
+ .temp_count = 0,
+ .msaa_mode = ROGUE_MSAA_MODE_PIXEL,
+ .has_phase_rate_change = false,
+};
+
+static inline bool pvr_subpass_has_msaa_input_attachment(
+ struct pvr_render_subpass *subpass,
+ const VkRenderPassCreateInfo2KHR *pCreateInfo)
+{
+ for (uint32_t i = 0; i < subpass->input_count; i++) {
+ const uint32_t attachment = subpass->input_attachments[i];
+
+ if (pCreateInfo->pAttachments[attachment].samples > 1)
+ return true;
+ }
+
+ return false;
+}
+
+static inline size_t
+pvr_num_subpass_attachments(const VkSubpassDescription2 *desc)
+{
+ return desc->inputAttachmentCount + desc->colorAttachmentCount +
+ (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
+ (desc->pDepthStencilAttachment != NULL);
+}
+
+static bool pvr_is_subpass_initops_flush_needed(
+ const struct pvr_render_pass *pass,
+ const struct pvr_renderpass_hwsetup_render *hw_render)
+{
+ struct pvr_render_subpass *subpass = &pass->subpasses[0];
+ uint32_t render_loadop_mask = 0;
+ uint32_t color_attachment_mask;
+
+ for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
+ if (hw_render->color_init[i].op != RENDERPASS_SURFACE_INITOP_NOP)
+ render_loadop_mask |= (1 << hw_render->color_init[i].driver_id);
+ }
+
+ /* If there are no load ops then there's nothing to flush. */
+ if (render_loadop_mask == 0)
+ return false;
+
+ /* If the first subpass has any input attachments, they need to be
+ * initialized with the result of the load op. Since the input attachment
+ * may be read from fragments with an opaque pass type, the load ops must be
+ * flushed or else they would be obscured and eliminated by HSR.
+ */
+ if (subpass->input_count != 0)
+ return true;
+
+ color_attachment_mask = 0;
+
+ for (uint32_t i = 0; i < subpass->color_count; i++) {
+ const int32_t color_idx = subpass->color_attachments[i];
+
+ if (color_idx != -1)
+ color_attachment_mask |= (1 << pass->attachments[color_idx].index);
+ }
+
+ /* If the first subpass does not write to all attachments which have a load
+ * op then the load ops need to be flushed to ensure they don't get obscured
+ * and removed by HSR.
+ */
+ return (render_loadop_mask & color_attachment_mask) != render_loadop_mask;
+}
+
+static void
+pvr_init_subpass_userpass_spawn(struct pvr_renderpass_hwsetup *hw_setup,
+ struct pvr_render_pass *pass,
+ struct pvr_render_subpass *subpasses)
+{
+ uint32_t subpass_idx = 0;
+
+ for (uint32_t i = 0; i < hw_setup->render_count; i++) {
+ struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i];
+ uint32_t initial_userpass_spawn =
+ (uint32_t)pvr_is_subpass_initops_flush_needed(pass, hw_render);
+
+ for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
+ subpasses[subpass_idx].userpass_spawn = (j + initial_userpass_spawn);
+ subpass_idx++;
+ }
+ }
+
+ assert(subpass_idx == pass->subpass_count);
+}
+
+static inline bool pvr_has_output_register_writes(
+ const struct pvr_renderpass_hwsetup_render *hw_render)
+{
+ for (uint32_t i = 0; i < hw_render->init_setup.render_targets_count; i++) {
+ struct usc_mrt_resource *mrt_resource =
+ &hw_render->init_setup.mrt_resources[i];
+
+ if (mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER)
+ return true;
+ }
+
+ return false;
+}
+
+static VkResult pvr_pds_texture_state_program_create_and_upload(
+ struct pvr_device *device,
+ const VkAllocationCallbacks *allocator,
+ struct pvr_pds_upload *const pds_upload_out)
+{
+ struct pvr_pds_pixel_shader_sa_program program = {
+ .num_texture_dma_kicks = 1,
+ };
+ uint32_t staging_buffer_size;
+ uint32_t *staging_buffer;
+ VkResult result;
+
+ pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&program);
+
+ staging_buffer_size = program.code_size * sizeof(*staging_buffer);
+
+ staging_buffer = vk_alloc2(&device->vk.alloc,
+ allocator,
+ staging_buffer_size,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!staging_buffer)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ pvr_pds_generate_pixel_shader_sa_code_segment(&program, staging_buffer);
+
+ /* FIXME: Figure out the define for alignment of 16. */
+ result = pvr_gpu_upload_pds(device,
+ NULL,
+ 0,
+ 0,
+ &staging_buffer[program.data_size],
+ program.code_size,
+ 16,
+ 16,
+ pds_upload_out);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, allocator, staging_buffer);
+ return result;
+ }
+
+ vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+pvr_load_op_create(struct pvr_device *device,
+ const VkAllocationCallbacks *allocator,
+ struct pvr_renderpass_hwsetup_render *hw_render,
+ struct pvr_load_op **const load_op_out)
+{
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+ struct pvr_load_op *load_op;
+ VkResult result;
+
+ load_op = vk_zalloc2(&device->vk.alloc,
+ allocator,
+ sizeof(*load_op),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!load_op)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
+ struct pvr_renderpass_colorinit *color_init = &hw_render->color_init[i];
+
+ if (color_init->op == RENDERPASS_SURFACE_INITOP_CLEAR)
+ load_op->clear_mask |= 1U << i;
+ else if (color_init->op == RENDERPASS_SURFACE_INITOP_LOAD)
+ pvr_finishme("Missing 'load' load op");
+ }
+
+ result = pvr_gpu_upload_usc(device,
+ pvr_usc_fragment_shader,
+ sizeof(pvr_usc_fragment_shader),
+ cache_line_size,
+ &load_op->usc_frag_prog_bo);
+ if (result != VK_SUCCESS)
+ goto err_free_load_op;
+
+ result = pvr_pds_fragment_program_create_and_upload(
+ device,
+ allocator,
+ load_op->usc_frag_prog_bo,
+ pvr_pds_fragment_program_params.temp_count,
+ pvr_pds_fragment_program_params.msaa_mode,
+ pvr_pds_fragment_program_params.has_phase_rate_change,
+ &load_op->pds_frag_prog);
+ if (result != VK_SUCCESS)
+ goto err_free_usc_frag_prog_bo;
+
+ result = pvr_pds_texture_state_program_create_and_upload(
+ device,
+ allocator,
+ &load_op->pds_tex_state_prog);
+ if (result != VK_SUCCESS)
+ goto err_free_pds_frag_prog;
+
+ load_op->is_hw_object = true;
+ /* FIXME: These should be based on the USC and PDS programs, but are hard
+ * coded for now.
+ */
+ load_op->const_shareds_count = 1;
+ load_op->shareds_dest_offset = 0;
+ load_op->shareds_count = 1;
+ load_op->temps_count = 1;
+
+ *load_op_out = load_op;
+
+ return VK_SUCCESS;
+
+err_free_pds_frag_prog:
+ pvr_bo_free(device, load_op->pds_frag_prog.pvr_bo);
+
+err_free_usc_frag_prog_bo:
+ pvr_bo_free(device, load_op->usc_frag_prog_bo);
+
+err_free_load_op:
+ vk_free2(&device->vk.alloc, allocator, load_op);
+
+ return result;
+}
+
+static void pvr_load_op_destroy(struct pvr_device *device,
+ const VkAllocationCallbacks *allocator,
+ struct pvr_load_op *load_op)
+{
+ pvr_bo_free(device, load_op->pds_tex_state_prog.pvr_bo);
+ pvr_bo_free(device, load_op->pds_frag_prog.pvr_bo);
+ pvr_bo_free(device, load_op->usc_frag_prog_bo);
+ vk_free2(&device->vk.alloc, allocator, load_op);
+}
+
+#define PVR_SPM_LOAD_IN_BUFFERS_COUNT(dev_info) \
+ ({ \
+ int __ret = 7U; \
+ if (PVR_HAS_FEATURE(dev_info, eight_output_registers)) \
+ __ret = 3U; \
+ __ret; \
+ })
+
+VkResult pvr_CreateRenderPass2(VkDevice _device,
+ const VkRenderPassCreateInfo2KHR *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkRenderPass *pRenderPass)
+{
+ struct pvr_render_pass_attachment *attachments;
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ struct pvr_render_subpass *subpasses;
+ size_t subpass_attachment_count;
+ uint32_t *subpass_attachments;
+ struct pvr_render_pass *pass;
+ uint32_t *dep_list;
+ bool *flush_on_dep;
+ VkResult result;
+
+ VK_MULTIALLOC(ma);
+ vk_multialloc_add(&ma, &pass, __typeof__(*pass), 1);
+ vk_multialloc_add(&ma,
+ &attachments,
+ __typeof__(*attachments),
+ pCreateInfo->attachmentCount);
+ vk_multialloc_add(&ma,
+ &subpasses,
+ __typeof__(*subpasses),
+ pCreateInfo->subpassCount);
+
+ subpass_attachment_count = 0;
+ for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
+ subpass_attachment_count +=
+ pvr_num_subpass_attachments(&pCreateInfo->pSubpasses[i]);
+ }
+
+ vk_multialloc_add(&ma,
+ &subpass_attachments,
+ __typeof__(*subpass_attachments),
+ subpass_attachment_count);
+ vk_multialloc_add(&ma,
+ &dep_list,
+ __typeof__(*dep_list),
+ pCreateInfo->dependencyCount);
+ vk_multialloc_add(&ma,
+ &flush_on_dep,
+ __typeof__(*flush_on_dep),
+ pCreateInfo->dependencyCount);
+
+ if (!vk_multialloc_zalloc2(&ma,
+ &device->vk.alloc,
+ pAllocator,
+ VK_OBJECT_TYPE_RENDER_PASS)) {
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ vk_object_base_init(&device->vk, &pass->base, VK_OBJECT_TYPE_RENDER_PASS);
+ pass->attachment_count = pCreateInfo->attachmentCount;
+ pass->attachments = attachments;
+ pass->subpass_count = pCreateInfo->subpassCount;
+ pass->subpasses = subpasses;
+ pass->max_sample_count = 1;
+
+ /* Copy attachment descriptions. */
+ for (uint32_t i = 0; i < pass->attachment_count; i++) {
+ const VkAttachmentDescription2 *desc = &pCreateInfo->pAttachments[i];
+ struct pvr_render_pass_attachment *attachment = &pass->attachments[i];
+
+ pvr_assert(!(desc->flags & ~VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT));
+
+ attachment->load_op = desc->loadOp;
+ attachment->store_op = desc->storeOp;
+
+ attachment->has_stencil = vk_format_has_stencil(attachment->vk_format);
+ if (attachment->has_stencil) {
+ attachment->stencil_load_op = desc->stencilLoadOp;
+ attachment->stencil_store_op = desc->stencilStoreOp;
+ }
+
+ attachment->vk_format = desc->format;
+ attachment->sample_count = desc->samples;
+ attachment->initial_layout = desc->initialLayout;
+ attachment->is_pbe_downscalable =
+ pvr_format_is_pbe_downscalable(attachment->vk_format);
+ attachment->index = i;
+
+ if (attachment->sample_count > pass->max_sample_count)
+ pass->max_sample_count = attachment->sample_count;
+ }
+
+ /* Count how many dependencies each subpass has. */
+ for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
+ const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
+
+ if (dep->srcSubpass != VK_SUBPASS_EXTERNAL &&
+ dep->dstSubpass != VK_SUBPASS_EXTERNAL &&
+ dep->srcSubpass != dep->dstSubpass) {
+ pass->subpasses[dep->dstSubpass].dep_count++;
+ }
+ }
+
+ /* Assign reference pointers to lists, and fill in the attachments list, we
+ * need to re-walk the dependencies array later to fill the per-subpass
+ * dependencies lists in.
+ */
+ for (uint32_t i = 0; i < pass->subpass_count; i++) {
+ const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
+ struct pvr_render_subpass *subpass = &pass->subpasses[i];
+
+ subpass->pipeline_bind_point = desc->pipelineBindPoint;
+ subpass->sample_count = 1;
+
+ subpass->color_count = desc->colorAttachmentCount;
+ if (subpass->color_count > 0) {
+ bool has_used_color_attachment = false;
+ uint32_t index;
+
+ subpass->color_attachments = subpass_attachments;
+ subpass_attachments += subpass->color_count;
+
+ for (uint32_t j = 0; j < subpass->color_count; j++) {
+ subpass->color_attachments[j] =
+ desc->pColorAttachments[j].attachment;
+
+ if (subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
+ continue;
+
+ index = subpass->color_attachments[j];
+ subpass->sample_count = pass->attachments[index].sample_count;
+ has_used_color_attachment = true;
+ }
+
+ if (!has_used_color_attachment && desc->pDepthStencilAttachment &&
+ desc->pDepthStencilAttachment->attachment !=
+ VK_ATTACHMENT_UNUSED) {
+ index = desc->pDepthStencilAttachment->attachment;
+ subpass->sample_count = pass->attachments[index].sample_count;
+ }
+ }
+
+ if (desc->pResolveAttachments) {
+ subpass->resolve_attachments = subpass_attachments;
+ subpass_attachments += subpass->color_count;
+
+ for (uint32_t j = 0; j < subpass->color_count; j++) {
+ subpass->resolve_attachments[j] =
+ desc->pResolveAttachments[j].attachment;
+ }
+ }
+
+ subpass->input_count = desc->inputAttachmentCount;
+ if (subpass->input_count > 0) {
+ subpass->input_attachments = subpass_attachments;
+ subpass_attachments += subpass->input_count;
+
+ for (uint32_t j = 0; j < subpass->input_count; j++) {
+ subpass->input_attachments[j] =
+ desc->pInputAttachments[j].attachment;
+ }
+ }
+
+ if (desc->pDepthStencilAttachment) {
+ subpass->depth_stencil_attachment = subpass_attachments++;
+ *subpass->depth_stencil_attachment =
+ desc->pDepthStencilAttachment->attachment;
+ }
+
+ /* Give the dependencies a slice of the subpass_attachments array. */
+ subpass->dep_list = dep_list;
+ dep_list += subpass->dep_count;
+ subpass->flush_on_dep = flush_on_dep;
+ flush_on_dep += subpass->dep_count;
+
+ /* Reset the dependencies count so we can start from 0 and index into
+ * the dependencies array.
+ */
+ subpass->dep_count = 0;
+ subpass->index = i;
+ }
+
+ /* Compute dependencies and populate dep_list and flush_on_dep. */
+ for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
+ const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
+
+ if (dep->srcSubpass != VK_SUBPASS_EXTERNAL &&
+ dep->dstSubpass != VK_SUBPASS_EXTERNAL &&
+ dep->srcSubpass != dep->dstSubpass) {
+ struct pvr_render_subpass *subpass = &pass->subpasses[dep->dstSubpass];
+
+ subpass->dep_list[subpass->dep_count] = dep->srcSubpass;
+ if (pvr_subpass_has_msaa_input_attachment(subpass, pCreateInfo))
+ subpass->flush_on_dep[subpass->dep_count] = true;
+
+ subpass->dep_count++;
+ }
+ }
+
+ pass->max_tilebuffer_count =
+ PVR_SPM_LOAD_IN_BUFFERS_COUNT(&device->pdevice->dev_info);
+
+ pass->hw_setup = pvr_create_renderpass_hwsetup(device, pass, false);
+ if (!pass->hw_setup) {
+ result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto err_free_pass;
+ }
+
+ pvr_init_subpass_userpass_spawn(pass->hw_setup, pass, pass->subpasses);
+
+ for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
+ struct pvr_renderpass_hwsetup_render *hw_render =
+ &pass->hw_setup->renders[i];
+ struct pvr_load_op *load_op = NULL;
+
+ if (hw_render->tile_buffers_count)
+ pvr_finishme("Set up tile buffer table");
+
+ if (!hw_render->color_init_count) {
+ assert(!hw_render->client_data);
+ continue;
+ }
+
+ if (!pvr_has_output_register_writes(hw_render))
+ pvr_finishme("Add output register write");
+
+ result = pvr_load_op_create(device, pAllocator, hw_render, &load_op);
+ if (result != VK_SUCCESS)
+ goto err_load_op_destroy;
+
+ hw_render->client_data = load_op;
+ }
+
+ *pRenderPass = pvr_render_pass_to_handle(pass);
+
+ return VK_SUCCESS;
+
+err_load_op_destroy:
+ for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
+ struct pvr_renderpass_hwsetup_render *hw_render =
+ &pass->hw_setup->renders[i];
+
+ if (hw_render->client_data)
+ pvr_load_op_destroy(device, pAllocator, hw_render->client_data);
+ }
+
+ pvr_destroy_renderpass_hwsetup(device, pass->hw_setup);
+
+err_free_pass:
+ vk_object_base_finish(&pass->base);
+ vk_free2(&device->vk.alloc, pAllocator, pass);
+
+ return result;
+}
+
+void pvr_DestroyRenderPass(VkDevice _device,
+ VkRenderPass _pass,
+ const VkAllocationCallbacks *pAllocator)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_render_pass, pass, _pass);
+
+ if (!pass)
+ return;
+
+ for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
+ struct pvr_renderpass_hwsetup_render *hw_render =
+ &pass->hw_setup->renders[i];
+
+ pvr_load_op_destroy(device, pAllocator, hw_render->client_data);
+ }
+
+ pvr_destroy_renderpass_hwsetup(device, pass->hw_setup);
+ vk_object_base_finish(&pass->base);
+ vk_free2(&device->vk.alloc, pAllocator, pass);
+}
+
+void pvr_GetRenderAreaGranularity(VkDevice _device,
+ VkRenderPass renderPass,
+ VkExtent2D *pGranularity)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+
+ /* Granularity does not depend on any settings in the render pass, so return
+ * the tile granularity.
+ *
+ * The default value is based on the minimum value found in all existing
+ * cores.
+ */
+ pGranularity->width = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 16);
+ pGranularity->height = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 16);
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on v3dv driver which is:
+ * Copyright © 2019 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <vulkan/vulkan.h>
+
+#include "compiler/shader_enums.h"
+#include "hwdef/rogue_hw_utils.h"
+#include "nir/nir.h"
+#include "pvr_bo.h"
+#include "pvr_csb.h"
+#include "pvr_pds.h"
+#include "pvr_private.h"
+#include "pvr_shader.h"
+#include "pvr_usc_compute_shader.h"
+#include "pvr_winsys.h"
+#include "rogue/rogue.h"
+#include "rogue/rogue_build_data.h"
+#include "util/log.h"
+#include "util/macros.h"
+#include "util/ralloc.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+#include "vk_object.h"
+#include "vk_util.h"
+
+#define WORKGROUP_DIMENSIONS 3U
+
+/* FIXME: Remove this when the compiler is hooked up. */
+/******************************************************************************
+ Hard coding
+ ******************************************************************************/
+/* This section contains hard coding related structs. */
+
+struct pvr_explicit_constant_usage {
+ /* Hardware register number assigned to the explicit constant with the lower
+ * pre_assigned offset.
+ */
+ uint32_t start_offset;
+};
+
+static const struct {
+ uint32_t local_invocation_regs[2];
+
+ uint32_t work_group_regs[WORKGROUP_DIMENSIONS];
+
+ uint32_t barrier_reg;
+
+ uint32_t usc_temps;
+} pvr_pds_compute_program_params = {
+ .local_invocation_regs = { 0, 1 },
+
+ .work_group_regs = { 0, 1, 2 },
+
+ .barrier_reg = ROGUE_REG_UNUSED,
+
+ .usc_temps = 0,
+};
+
+/*****************************************************************************
+ PDS functions
+*****************************************************************************/
+
+/* If allocator == NULL, the internal one will be used. */
+static VkResult pvr_pds_coeff_program_create_and_upload(
+ struct pvr_device *device,
+ const VkAllocationCallbacks *allocator,
+ const uint32_t *fpu_iterators,
+ uint32_t fpu_iterators_count,
+ const uint32_t *destinations,
+ struct pvr_pds_upload *const pds_upload_out)
+{
+ struct pvr_pds_coeff_loading_program program = {
+ .num_fpu_iterators = fpu_iterators_count,
+ };
+ uint32_t staging_buffer_size;
+ uint32_t *staging_buffer;
+ VkResult result;
+
+ assert(fpu_iterators_count < PVR_MAXIMUM_ITERATIONS);
+
+ /* Get the size of the program and then allocate that much memory. */
+ pvr_pds_coefficient_loading(&program, NULL, PDS_GENERATE_SIZES);
+
+ staging_buffer_size =
+ (program.code_size + program.data_size) * sizeof(*staging_buffer);
+
+ staging_buffer = vk_alloc2(&device->vk.alloc,
+ allocator,
+ staging_buffer_size,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!staging_buffer)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* FIXME: Should we save pointers when we redesign the pds gen api ? */
+ typed_memcpy(program.FPU_iterators,
+ fpu_iterators,
+ program.num_fpu_iterators);
+
+ typed_memcpy(program.destination, destinations, program.num_fpu_iterators);
+
+ /* Generate the program into is the staging_buffer. */
+ pvr_pds_coefficient_loading(&program,
+ staging_buffer,
+ PDS_GENERATE_CODEDATA_SEGMENTS);
+
+ /* FIXME: Figure out the define for alignment of 16. */
+ result = pvr_gpu_upload_pds(device,
+ &staging_buffer[0],
+ program.data_size,
+ 16,
+ &staging_buffer[program.data_size],
+ program.code_size,
+ 16,
+ 16,
+ pds_upload_out);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, allocator, staging_buffer);
+ return result;
+ }
+
+ vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+ return VK_SUCCESS;
+}
+
+/* FIXME: move this elsewhere since it's also called in pvr_pass.c? */
+/* If allocator == NULL, the internal one will be used. */
+VkResult pvr_pds_fragment_program_create_and_upload(
+ struct pvr_device *device,
+ const VkAllocationCallbacks *allocator,
+ const struct pvr_bo *fragment_shader_bo,
+ uint32_t fragment_temp_count,
+ enum rogue_msaa_mode msaa_mode,
+ bool has_phase_rate_change,
+ struct pvr_pds_upload *const pds_upload_out)
+{
+ const enum PVRX(PDSINST_DOUTU_SAMPLE_RATE)
+ sample_rate = pvr_sample_rate_from_usc_msaa_mode(msaa_mode);
+ struct pvr_pds_kickusc_program program = { 0 };
+ uint32_t staging_buffer_size;
+ uint32_t *staging_buffer;
+ VkResult result;
+
+ /* FIXME: Should it be passing in the USC offset rather than address here?
+ */
+ /* Note this is not strictly required to be done before calculating the
+ * staging_buffer_size in this particular case. It can also be done after
+ * allocating the buffer. The size from pvr_pds_kick_usc() is constant.
+ */
+ pvr_pds_setup_doutu(&program.usc_task_control,
+ fragment_shader_bo->vma->dev_addr.addr,
+ fragment_temp_count,
+ sample_rate,
+ has_phase_rate_change);
+
+ pvr_pds_kick_usc(&program, NULL, 0, false, PDS_GENERATE_SIZES);
+
+ staging_buffer_size =
+ (program.code_size + program.data_size) * sizeof(*staging_buffer);
+
+ staging_buffer = vk_alloc2(&device->vk.alloc,
+ allocator,
+ staging_buffer_size,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!staging_buffer)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ pvr_pds_kick_usc(&program,
+ staging_buffer,
+ 0,
+ false,
+ PDS_GENERATE_CODEDATA_SEGMENTS);
+
+ /* FIXME: Figure out the define for alignment of 16. */
+ result = pvr_gpu_upload_pds(device,
+ &staging_buffer[0],
+ program.data_size,
+ 16,
+ &staging_buffer[program.data_size],
+ program.code_size,
+ 16,
+ 16,
+ pds_upload_out);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, allocator, staging_buffer);
+ return result;
+ }
+
+ vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+ return VK_SUCCESS;
+}
+
+static inline size_t pvr_pds_get_max_vertex_program_const_map_size_in_bytes(
+ const struct pvr_device_info *dev_info,
+ bool robust_buffer_access)
+{
+ /* FIXME: Use more local variable to improve formatting. */
+
+ /* Maximum memory allocation needed for const map entries in
+ * pvr_pds_generate_vertex_primary_program().
+ * When robustBufferAccess is disabled, it must be >= 410.
+ * When robustBufferAccess is enabled, it must be >= 570.
+ *
+ * 1. Size of entry for base instance
+ * (pvr_const_map_entry_base_instance)
+ *
+ * 2. Max. number of vertex inputs (PVR_MAX_VERTEX_INPUT_BINDINGS) * (
+ * if (!robustBufferAccess)
+ * size of vertex attribute entry
+ * (pvr_const_map_entry_vertex_attribute_address) +
+ * else
+ * size of robust vertex attribute entry
+ * (pvr_const_map_entry_robust_vertex_attribute_address) +
+ * size of entry for max attribute index
+ * (pvr_const_map_entry_vertex_attribute_max_index) +
+ * fi
+ * size of Unified Store burst entry
+ * (pvr_const_map_entry_literal32) +
+ * size of entry for vertex stride
+ * (pvr_const_map_entry_literal32) +
+ * size of entries for DDMAD control word
+ * (num_ddmad_literals * pvr_const_map_entry_literal32))
+ *
+ * 3. Size of entry for DOUTW vertex/instance control word
+ * (pvr_const_map_entry_literal32)
+ *
+ * 4. Size of DOUTU entry (pvr_const_map_entry_doutu_address)
+ */
+
+ const size_t attribute_size =
+ (!robust_buffer_access)
+ ? sizeof(struct pvr_const_map_entry_vertex_attribute_address)
+ : sizeof(struct pvr_const_map_entry_robust_vertex_attribute_address) +
+ sizeof(struct pvr_const_map_entry_vertex_attribute_max_index);
+
+ /* If has_pds_ddmadt the DDMAD control word is now a DDMADT control word
+ * and is increased by one DWORD to contain the data for the DDMADT's
+ * out-of-bounds check.
+ */
+ const size_t pvr_pds_const_map_vertex_entry_num_ddmad_literals =
+ 1U + (size_t)PVR_HAS_FEATURE(dev_info, pds_ddmadt);
+
+ return (sizeof(struct pvr_const_map_entry_base_instance) +
+ PVR_MAX_VERTEX_INPUT_BINDINGS *
+ (attribute_size +
+ (2 + pvr_pds_const_map_vertex_entry_num_ddmad_literals) *
+ sizeof(struct pvr_const_map_entry_literal32)) +
+ sizeof(struct pvr_const_map_entry_literal32) +
+ sizeof(struct pvr_const_map_entry_doutu_address));
+}
+
+/* This is a const pointer to an array of pvr_pds_vertex_dma structs.
+ * The array being pointed to is of PVR_MAX_VERTEX_ATTRIB_DMAS size.
+ */
+typedef struct pvr_pds_vertex_dma (
+ *const
+ pvr_pds_attrib_dma_descriptions_array_ptr)[PVR_MAX_VERTEX_ATTRIB_DMAS];
+
+/* dma_descriptions_out_ptr is a pointer to the array used as output.
+ * The whole array might not be filled so dma_count_out indicates how many
+ * elements were used.
+ */
+static void pvr_pds_vertex_attrib_init_dma_descriptions(
+ const VkPipelineVertexInputStateCreateInfo *const vertex_input_state,
+ const struct rogue_vs_build_data *vs_data,
+ pvr_pds_attrib_dma_descriptions_array_ptr dma_descriptions_out_ptr,
+ uint32_t *const dma_count_out)
+{
+ struct pvr_pds_vertex_dma *const dma_descriptions =
+ *dma_descriptions_out_ptr;
+ uint32_t dma_count = 0;
+
+ if (!vertex_input_state) {
+ *dma_count_out = 0;
+ return;
+ }
+
+ for (uint32_t i = 0; i < vertex_input_state->vertexAttributeDescriptionCount;
+ i++) {
+ const VkVertexInputAttributeDescription *const attrib_desc =
+ &vertex_input_state->pVertexAttributeDescriptions[i];
+ const VkVertexInputBindingDescription *binding_desc = NULL;
+
+ /* Finding the matching binding description. */
+ for (uint32_t j = 0;
+ j < vertex_input_state->vertexBindingDescriptionCount;
+ j++) {
+ const VkVertexInputBindingDescription *const current_binding_desc =
+ &vertex_input_state->pVertexBindingDescriptions[j];
+
+ if (current_binding_desc->binding == attrib_desc->binding) {
+ binding_desc = current_binding_desc;
+ break;
+ }
+ }
+
+ /* From the Vulkan 1.2.195 spec for
+ * VkPipelineVertexInputStateCreateInfo:
+ *
+ * "For every binding specified by each element of
+ * pVertexAttributeDescriptions, a
+ * VkVertexInputBindingDescription must exist in
+ * pVertexBindingDescriptions with the same value of binding"
+ *
+ * So we don't check if we found the matching binding description
+ * or not.
+ */
+
+ struct pvr_pds_vertex_dma *const dma_desc = &dma_descriptions[dma_count];
+
+ size_t location = attrib_desc->location;
+ assert(location < vs_data->inputs.num_input_vars);
+
+ dma_desc->offset = attrib_desc->offset;
+ dma_desc->stride = binding_desc->stride;
+
+ dma_desc->flags = 0;
+
+ if (binding_desc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
+ dma_desc->flags |= PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE;
+
+ dma_desc->size_in_dwords = vs_data->inputs.components[location];
+ /* TODO: This will be different when other types are supported.
+ * Store in vs_data with base and components?
+ */
+ /* TODO: Use attrib_desc->format. */
+ dma_desc->component_size_in_bytes = ROGUE_REG_SIZE_BYTES;
+ dma_desc->destination = vs_data->inputs.base[location];
+ dma_desc->binding_index = attrib_desc->binding;
+ dma_desc->divisor = 1;
+ dma_desc->robustness_buffer_offset = 0;
+
+ ++dma_count;
+ }
+
+ *dma_count_out = dma_count;
+}
+
+static VkResult pvr_pds_vertex_attrib_program_create_and_upload(
+ struct pvr_device *const device,
+ const VkAllocationCallbacks *const allocator,
+ struct pvr_pds_vertex_primary_program_input *const input,
+ struct pvr_pds_attrib_program *const program_out)
+{
+ const size_t const_entries_size_in_bytes =
+ pvr_pds_get_max_vertex_program_const_map_size_in_bytes(
+ &device->pdevice->dev_info,
+ device->features.robustBufferAccess);
+ struct pvr_pds_upload *const program = &program_out->program;
+ struct pvr_pds_info *const info = &program_out->info;
+ struct pvr_const_map_entry *entries_buffer;
+ ASSERTED uint32_t code_size_in_dwords;
+ size_t staging_buffer_size;
+ uint32_t *staging_buffer;
+ VkResult result;
+
+ memset(info, 0, sizeof(*info));
+
+ entries_buffer = vk_alloc2(&device->vk.alloc,
+ allocator,
+ const_entries_size_in_bytes,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!entries_buffer)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ info->entries = entries_buffer;
+ info->entries_size_in_bytes = const_entries_size_in_bytes;
+
+ pvr_pds_generate_vertex_primary_program(input,
+ NULL,
+ info,
+ device->features.robustBufferAccess,
+ &device->pdevice->dev_info);
+
+ code_size_in_dwords = info->code_size_in_dwords;
+ staging_buffer_size = info->code_size_in_dwords * sizeof(*staging_buffer);
+
+ staging_buffer = vk_alloc2(&device->vk.alloc,
+ allocator,
+ staging_buffer_size,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!staging_buffer) {
+ vk_free2(&device->vk.alloc, allocator, entries_buffer);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ /* This also fills in info->entries. */
+ pvr_pds_generate_vertex_primary_program(input,
+ staging_buffer,
+ info,
+ device->features.robustBufferAccess,
+ &device->pdevice->dev_info);
+
+ assert(info->code_size_in_dwords <= code_size_in_dwords);
+
+ /* FIXME: Add a vk_realloc2() ? */
+ entries_buffer = vk_realloc((!allocator) ? &device->vk.alloc : allocator,
+ entries_buffer,
+ info->entries_written_size_in_bytes,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!entries_buffer) {
+ vk_free2(&device->vk.alloc, allocator, staging_buffer);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ info->entries = entries_buffer;
+ info->entries_size_in_bytes = info->entries_written_size_in_bytes;
+
+ /* FIXME: Figure out the define for alignment of 16. */
+ result = pvr_gpu_upload_pds(device,
+ NULL,
+ 0,
+ 0,
+ staging_buffer,
+ info->code_size_in_dwords,
+ 16,
+ 16,
+ program);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, allocator, entries_buffer);
+ vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+ return VK_SUCCESS;
+}
+
+static inline void pvr_pds_vertex_attrib_program_destroy(
+ struct pvr_device *const device,
+ const struct VkAllocationCallbacks *const allocator,
+ struct pvr_pds_attrib_program *const program)
+{
+ pvr_bo_free(device, program->program.pvr_bo);
+ vk_free2(&device->vk.alloc, allocator, program->info.entries);
+}
+
+/* This is a const pointer to an array of pvr_pds_attrib_program structs.
+ * The array being pointed to is of PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT size.
+ */
+typedef struct pvr_pds_attrib_program (*const pvr_pds_attrib_programs_array_ptr)
+ [PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT];
+
+/* Generate and uploads a PDS program for DMAing vertex attribs into USC vertex
+ * inputs. This will bake the code segment and create a template of the data
+ * segment for the command buffer to fill in.
+ */
+/* If allocator == NULL, the internal one will be used.
+ *
+ * programs_out_ptr is a pointer to the array where the outputs will be placed.
+ * */
+static VkResult pvr_pds_vertex_attrib_programs_create_and_upload(
+ struct pvr_device *device,
+ const VkAllocationCallbacks *const allocator,
+ const VkPipelineVertexInputStateCreateInfo *const vertex_input_state,
+ uint32_t usc_temp_count,
+ const struct rogue_vs_build_data *vs_data,
+ pvr_pds_attrib_programs_array_ptr programs_out_ptr)
+{
+ struct pvr_pds_vertex_dma dma_descriptions[PVR_MAX_VERTEX_ATTRIB_DMAS];
+ struct pvr_pds_attrib_program *const programs_out = *programs_out_ptr;
+ struct pvr_pds_vertex_primary_program_input input = {
+ .dma_list = dma_descriptions,
+ };
+ VkResult result;
+
+ pvr_pds_vertex_attrib_init_dma_descriptions(vertex_input_state,
+ vs_data,
+ &dma_descriptions,
+ &input.dma_count);
+
+ pvr_pds_setup_doutu(&input.usc_task_control,
+ 0,
+ usc_temp_count,
+ PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
+ false);
+
+ /* TODO: If statements for all the "bRequired"s + ui32ExtraFlags. */
+
+ /* Note: programs_out_ptr is a pointer to an array so this is fine. See the
+ * typedef.
+ */
+ for (uint32_t i = 0; i < ARRAY_SIZE(*programs_out_ptr); i++) {
+ switch (i) {
+ case PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC:
+ input.flags = 0;
+ break;
+
+ case PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE:
+ input.flags = PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT;
+ break;
+
+ case PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT:
+ /* We unset INSTANCE and set INDIRECT. */
+ input.flags = PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT;
+ break;
+
+ default:
+ unreachable("Invalid vertex attrib program type.");
+ }
+
+ result =
+ pvr_pds_vertex_attrib_program_create_and_upload(device,
+ allocator,
+ &input,
+ &programs_out[i]);
+ if (result != VK_SUCCESS) {
+ for (uint32_t j = 0; j < i; j++) {
+ pvr_pds_vertex_attrib_program_destroy(device,
+ allocator,
+ &programs_out[j]);
+ }
+
+ return result;
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+static size_t pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes()
+{
+ /* Maximum memory allocation needed for const map entries in
+ * pvr_pds_generate_descriptor_upload_program().
+ * It must be >= 688 bytes. This size is calculated as the sum of:
+ *
+ * 1. Max. number of descriptor sets (8) * (
+ * size of descriptor entry
+ * (pvr_const_map_entry_descriptor_set) +
+ * size of Common Store burst entry
+ * (pvr_const_map_entry_literal32))
+ *
+ * 2. Max. number of PDS program buffers (24) * (
+ * size of the largest buffer structure
+ * (pvr_const_map_entry_constant_buffer) +
+ * size of Common Store burst entry
+ * (pvr_const_map_entry_literal32)
+ *
+ * 3. Size of DOUTU entry (pvr_const_map_entry_doutu_address)
+ */
+
+ /* FIXME: PVR_MAX_DESCRIPTOR_SETS is 4 and not 8. The comment above seems to
+ * say that it should be 8.
+ * Figure our a define for this or is the comment wrong?
+ */
+ return (8 * (sizeof(struct pvr_const_map_entry_descriptor_set) +
+ sizeof(struct pvr_const_map_entry_literal32)) +
+ PVR_PDS_MAX_BUFFERS *
+ (sizeof(struct pvr_const_map_entry_constant_buffer) +
+ sizeof(struct pvr_const_map_entry_literal32)) +
+ sizeof(struct pvr_const_map_entry_doutu_address));
+}
+
+/* This is a const pointer to an array of PVR_PDS_MAX_BUFFERS pvr_pds_buffer
+ * structs.
+ */
+typedef struct pvr_pds_buffer (
+ *const pvr_pds_uniform_program_buffer_array_ptr)[PVR_PDS_MAX_BUFFERS];
+
+static void pvr_pds_uniform_program_setup_buffers(
+ bool robust_buffer_access,
+ const struct rogue_ubo_data *ubo_data,
+ pvr_pds_uniform_program_buffer_array_ptr buffers_out_ptr,
+ uint32_t *const buffer_count_out)
+{
+ struct pvr_pds_buffer *const buffers = *buffers_out_ptr;
+ uint32_t buffer_count = 0;
+
+ for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) {
+ struct pvr_pds_buffer *current_buffer = &buffers[buffer_count++];
+
+ /* This is fine since buffers_out_ptr is a pointer to an array. */
+ assert(buffer_count <= ARRAY_SIZE(*buffers_out_ptr));
+
+ current_buffer->type = PVR_BUFFER_TYPE_UBO;
+ current_buffer->size_in_dwords = ubo_data->size[u];
+ current_buffer->destination = ubo_data->dest[u];
+
+ current_buffer->buffer_id = buffer_count;
+ current_buffer->desc_set = ubo_data->desc_set[u];
+ current_buffer->binding = ubo_data->binding[u];
+ /* TODO: Is this always the case?
+ * E.g. can multiple UBOs have the same base buffer?
+ */
+ current_buffer->source_offset = 0;
+ }
+
+ *buffer_count_out = buffer_count;
+}
+
+static VkResult pvr_pds_uniform_program_create_and_upload(
+ struct pvr_device *const device,
+ const VkAllocationCallbacks *const allocator,
+ const struct rogue_ubo_data *const ubo_data,
+ const struct pvr_explicit_constant_usage *const explicit_const_usage,
+ const struct pvr_pipeline_layout *const layout,
+ enum pvr_stage_allocation stage,
+ struct pvr_pds_upload *const pds_code_upload_out,
+ struct pvr_pds_info *const pds_info_out)
+{
+ const size_t const_entries_size_in_bytes =
+ pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes();
+ struct pvr_descriptor_program_input program = { 0 };
+ struct pvr_const_map_entry *entries_buffer;
+ ASSERTED uint32_t code_size_in_dwords;
+ uint32_t staging_buffer_size;
+ uint32_t *staging_buffer;
+ VkResult result;
+
+ assert(stage != PVR_STAGE_ALLOCATION_COUNT);
+
+ memset(pds_info_out, 0, sizeof(*pds_info_out));
+
+ pvr_pds_uniform_program_setup_buffers(device->features.robustBufferAccess,
+ ubo_data,
+ &program.buffers,
+ &program.buffer_count);
+
+ for (uint32_t dma = 0; dma < program.buffer_count; dma++) {
+ if (program.buffers[dma].type != PVR_BUFFER_TYPES_COMPILE_TIME)
+ continue;
+
+ assert(!"Unimplemented");
+ }
+
+ if (layout->per_stage_reg_info[stage].primary_dynamic_size_in_dwords)
+ assert(!"Unimplemented");
+
+ for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+ const struct pvr_descriptor_set_layout_mem_layout *const reg_layout =
+ &layout->register_layout_in_dwords_per_stage[stage][set_num];
+ const uint32_t start_offset = explicit_const_usage->start_offset;
+
+ /* TODO: Use compiler usage info to optimize this? */
+
+ /* Only dma primaries if they are actually required. */
+ if (reg_layout->primary_size) {
+ program.descriptor_sets[program.descriptor_set_count++] =
+ (struct pvr_pds_descriptor_set){
+ .descriptor_set = set_num,
+ .size_in_dwords = reg_layout->primary_size,
+ .destination = reg_layout->primary_offset + start_offset,
+ .primary = true,
+ };
+ }
+
+ /* Only dma secondaries if they are actually required. */
+ if (!reg_layout->secondary_size)
+ continue;
+
+ program.descriptor_sets[program.descriptor_set_count++] =
+ (struct pvr_pds_descriptor_set){
+ .descriptor_set = set_num,
+ .size_in_dwords = reg_layout->secondary_size,
+ .destination = reg_layout->secondary_offset + start_offset,
+ };
+ }
+
+ entries_buffer = vk_alloc2(&device->vk.alloc,
+ allocator,
+ const_entries_size_in_bytes,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!entries_buffer)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ pds_info_out->entries = entries_buffer;
+ pds_info_out->entries_size_in_bytes = const_entries_size_in_bytes;
+
+ pvr_pds_generate_descriptor_upload_program(&program, NULL, pds_info_out);
+
+ code_size_in_dwords = pds_info_out->code_size_in_dwords;
+ staging_buffer_size =
+ pds_info_out->code_size_in_dwords * sizeof(*staging_buffer);
+
+ if (!staging_buffer_size) {
+ vk_free2(&device->vk.alloc, allocator, entries_buffer);
+
+ memset(pds_info_out, 0, sizeof(*pds_info_out));
+ memset(pds_code_upload_out, 0, sizeof(*pds_code_upload_out));
+ return VK_SUCCESS;
+ }
+
+ staging_buffer = vk_alloc2(&device->vk.alloc,
+ allocator,
+ staging_buffer_size,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!staging_buffer) {
+ vk_free2(&device->vk.alloc, allocator, entries_buffer);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ pvr_pds_generate_descriptor_upload_program(&program,
+ staging_buffer,
+ pds_info_out);
+
+ assert(pds_info_out->code_size_in_dwords <= code_size_in_dwords);
+
+ /* FIXME: use vk_realloc2() ? */
+ entries_buffer = vk_realloc((!allocator) ? &device->vk.alloc : allocator,
+ entries_buffer,
+ pds_info_out->entries_written_size_in_bytes,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+ if (!entries_buffer) {
+ vk_free2(&device->vk.alloc, allocator, staging_buffer);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ pds_info_out->entries = entries_buffer;
+ pds_info_out->entries_size_in_bytes =
+ pds_info_out->entries_written_size_in_bytes;
+
+ /* FIXME: Figure out the define for alignment of 16. */
+ result = pvr_gpu_upload_pds(device,
+ NULL,
+ 0,
+ 0,
+ staging_buffer,
+ pds_info_out->code_size_in_dwords,
+ 16,
+ 16,
+ pds_code_upload_out);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, allocator, entries_buffer);
+ vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+ return VK_SUCCESS;
+}
+
+static void pvr_pds_uniform_program_destroy(
+ struct pvr_device *const device,
+ const struct VkAllocationCallbacks *const allocator,
+ struct pvr_pds_upload *const pds_code,
+ struct pvr_pds_info *const pds_info)
+{
+ pvr_bo_free(device, pds_code->pvr_bo);
+ vk_free2(&device->vk.alloc, allocator, pds_info->entries);
+}
+
+/* FIXME: See if pvr_device_init_compute_pds_program() and this could be merged.
+ */
+static VkResult pvr_pds_compute_program_create_and_upload(
+ struct pvr_device *const device,
+ const VkAllocationCallbacks *const allocator,
+ const uint32_t local_input_regs[static const WORKGROUP_DIMENSIONS],
+ const uint32_t work_group_input_regs[static const WORKGROUP_DIMENSIONS],
+ uint32_t barrier_coefficient,
+ bool add_base_workgroup,
+ uint32_t usc_temps,
+ pvr_dev_addr_t usc_shader_dev_addr,
+ struct pvr_pds_upload *const pds_upload_out,
+ struct pvr_pds_info *const pds_info_out,
+ uint32_t *const base_workgroup_data_patching_offset_out)
+{
+ struct pvr_pds_compute_shader_program program = {
+ /* clang-format off */
+ .local_input_regs = {
+ local_input_regs[0],
+ local_input_regs[1],
+ local_input_regs[2]
+ },
+ .work_group_input_regs = {
+ work_group_input_regs[0],
+ work_group_input_regs[1],
+ work_group_input_regs[2]
+ },
+ .global_input_regs = {
+ [0 ... (WORKGROUP_DIMENSIONS - 1)] =
+ PVR_PDS_COMPUTE_INPUT_REG_UNUSED
+ },
+ /* clang-format on */
+ .barrier_coefficient = barrier_coefficient,
+ .flattened_work_groups = true,
+ .clear_pds_barrier = false,
+ .add_base_workgroup = add_base_workgroup,
+ .kick_usc = true,
+ };
+ struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ uint32_t staging_buffer_size;
+ uint32_t *staging_buffer;
+ VkResult result;
+
+ STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) == WORKGROUP_DIMENSIONS);
+ STATIC_ASSERT(ARRAY_SIZE(program.work_group_input_regs) ==
+ WORKGROUP_DIMENSIONS);
+ STATIC_ASSERT(ARRAY_SIZE(program.global_input_regs) == WORKGROUP_DIMENSIONS);
+
+ assert(!add_base_workgroup || base_workgroup_data_patching_offset_out);
+
+ pvr_pds_setup_doutu(&program.usc_task_control,
+ usc_shader_dev_addr.addr,
+ usc_temps,
+ PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
+ false);
+
+ pvr_pds_compute_shader(&program, NULL, PDS_GENERATE_SIZES, dev_info);
+
+ /* FIXME: According to pvr_device_init_compute_pds_program() the code size
+ * is in bytes. Investigate this.
+ */
+ staging_buffer_size =
+ (program.code_size + program.data_size) * sizeof(*staging_buffer);
+
+ staging_buffer = vk_alloc2(&device->vk.alloc,
+ allocator,
+ staging_buffer_size,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!staging_buffer)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* FIXME: pvr_pds_compute_shader doesn't implement
+ * PDS_GENERATE_CODEDATA_SEGMENTS.
+ */
+ pvr_pds_compute_shader(&program,
+ &staging_buffer[0],
+ PDS_GENERATE_CODE_SEGMENT,
+ dev_info);
+
+ pvr_pds_compute_shader(&program,
+ &staging_buffer[program.code_size],
+ PDS_GENERATE_DATA_SEGMENT,
+ dev_info);
+
+ /* We'll need to patch the base workgroup in the PDS data section before
+ * dispatch so we give back the offsets at which to patch. We only need to
+ * save the offset for the first workgroup id since the workgroup ids are
+ * stored contiguously in the data segment.
+ */
+ if (add_base_workgroup) {
+ *base_workgroup_data_patching_offset_out =
+ program.base_workgroup_constant_offset_in_dwords[0];
+ }
+
+ /* FIXME: Figure out the define for alignment of 16. */
+ result = pvr_gpu_upload_pds(device,
+ &staging_buffer[program.code_size],
+ program.data_size,
+ 16,
+ &staging_buffer[0],
+ program.code_size,
+ 16,
+ 16,
+ pds_upload_out);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, allocator, staging_buffer);
+ return result;
+ }
+
+ *pds_info_out = (struct pvr_pds_info){
+ .temps_required = program.highest_temp,
+ .code_size_in_dwords = program.code_size,
+ .data_size_in_dwords = program.data_size,
+ };
+
+ vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+ return VK_SUCCESS;
+};
+
+static void pvr_pds_compute_program_destroy(
+ struct pvr_device *const device,
+ const struct VkAllocationCallbacks *const allocator,
+ struct pvr_pds_upload *const pds_program,
+ struct pvr_pds_info *const pds_info)
+{
+ /* We don't allocate an entries buffer so we don't need to free it */
+ pvr_bo_free(device, pds_program->pvr_bo);
+}
+
+/******************************************************************************
+ Generic pipeline functions
+ ******************************************************************************/
+
+static void pvr_pipeline_init(struct pvr_device *device,
+ enum pvr_pipeline_type type,
+ struct pvr_pipeline *const pipeline)
+{
+ assert(!pipeline->layout);
+
+ vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
+
+ pipeline->type = type;
+}
+
+static void pvr_pipeline_finish(struct pvr_pipeline *pipeline)
+{
+ vk_object_base_finish(&pipeline->base);
+}
+
+/******************************************************************************
+ Compute pipeline functions
+ ******************************************************************************/
+
+/* Compiles and uploads shaders and PDS programs. */
+static VkResult pvr_compute_pipeline_compile(
+ struct pvr_device *const device,
+ struct pvr_pipeline_cache *pipeline_cache,
+ const VkComputePipelineCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *const allocator,
+ struct pvr_compute_pipeline *const compute_pipeline)
+{
+ /* FIXME: Remove this hard coding. */
+ const struct pvr_explicit_constant_usage explicit_const_usage = {
+ .start_offset = 0,
+ };
+ const struct rogue_ubo_data uniform_program_ubo_data = { 0 };
+
+ const uint32_t cache_line_size =
+ rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+ uint32_t work_group_input_regs[WORKGROUP_DIMENSIONS];
+ uint32_t local_input_regs[WORKGROUP_DIMENSIONS];
+ uint32_t barrier_coefficient;
+ VkResult result;
+
+ /* FIXME: Compile the shader. */
+
+ result = pvr_gpu_upload_usc(device,
+ pvr_usc_compute_shader,
+ sizeof(pvr_usc_compute_shader),
+ cache_line_size,
+ &compute_pipeline->state.bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pvr_pds_uniform_program_create_and_upload(
+ device,
+ allocator,
+ &uniform_program_ubo_data,
+ &explicit_const_usage,
+ compute_pipeline->base.layout,
+ PVR_STAGE_ALLOCATION_COMPUTE,
+ &compute_pipeline->state.uniform.pds_code,
+ &compute_pipeline->state.uniform.pds_info);
+ if (result != VK_SUCCESS)
+ goto err_free_shader;
+
+ /* We make sure that the compiler's unused reg value is compatible with the
+ * pds api.
+ */
+ STATIC_ASSERT(ROGUE_REG_UNUSED == PVR_PDS_COMPUTE_INPUT_REG_UNUSED);
+
+ barrier_coefficient = pvr_pds_compute_program_params.barrier_reg;
+
+ /* TODO: Maybe change the pds api to use pointers so we avoid the copy. */
+ local_input_regs[0] =
+ pvr_pds_compute_program_params.local_invocation_regs[0];
+ local_input_regs[1] =
+ pvr_pds_compute_program_params.local_invocation_regs[1];
+ /* This is not a mistake. We want to assign element 1 to 2. */
+ local_input_regs[2] =
+ pvr_pds_compute_program_params.local_invocation_regs[1];
+
+ STATIC_ASSERT(__same_type(work_group_input_regs,
+ pvr_pds_compute_program_params.work_group_regs));
+ typed_memcpy(work_group_input_regs,
+ pvr_pds_compute_program_params.work_group_regs,
+ WORKGROUP_DIMENSIONS);
+
+ result = pvr_pds_compute_program_create_and_upload(
+ device,
+ allocator,
+ local_input_regs,
+ work_group_input_regs,
+ barrier_coefficient,
+ false,
+ pvr_pds_compute_program_params.usc_temps,
+ compute_pipeline->state.bo->vma->dev_addr,
+ &compute_pipeline->state.primary_program,
+ &compute_pipeline->state.primary_program_info,
+ NULL);
+ if (result != VK_SUCCESS)
+ goto err_free_uniform_program;
+
+ /* If the workgroup ID is required, then we require the base workgroup
+ * variant of the PDS compute program as well.
+ */
+ compute_pipeline->state.flags.base_workgroup =
+ work_group_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
+ work_group_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
+ work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
+
+ if (compute_pipeline->state.flags.base_workgroup) {
+ result = pvr_pds_compute_program_create_and_upload(
+ device,
+ allocator,
+ local_input_regs,
+ work_group_input_regs,
+ barrier_coefficient,
+ true,
+ pvr_pds_compute_program_params.usc_temps,
+ compute_pipeline->state.bo->vma->dev_addr,
+ &compute_pipeline->state.primary_program_base_workgroup_variant,
+ &compute_pipeline->state.primary_program_base_workgroup_variant_info,
+ &compute_pipeline->state.base_workgroup_ids_dword_offset);
+ if (result != VK_SUCCESS)
+ goto err_free_compute_program;
+ }
+
+ return VK_SUCCESS;
+
+err_free_compute_program:
+ if (compute_pipeline->state.flags.base_workgroup)
+ pvr_bo_free(device, compute_pipeline->state.primary_program.pvr_bo);
+
+err_free_uniform_program:
+ pvr_bo_free(device, compute_pipeline->state.uniform.pds_code.pvr_bo);
+
+err_free_shader:
+ pvr_bo_free(device, compute_pipeline->state.bo);
+
+ return result;
+}
+
+static VkResult
+pvr_compute_pipeline_init(struct pvr_device *device,
+ struct pvr_pipeline_cache *pipeline_cache,
+ const VkComputePipelineCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *allocator,
+ struct pvr_compute_pipeline *compute_pipeline)
+{
+ VkResult result;
+
+ pvr_pipeline_init(device,
+ PVR_PIPELINE_TYPE_COMPUTE,
+ &compute_pipeline->base);
+
+ compute_pipeline->base.layout =
+ pvr_pipeline_layout_from_handle(pCreateInfo->layout);
+
+ result = pvr_compute_pipeline_compile(device,
+ pipeline_cache,
+ pCreateInfo,
+ allocator,
+ compute_pipeline);
+ if (result != VK_SUCCESS) {
+ pvr_pipeline_finish(&compute_pipeline->base);
+ return result;
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult
+pvr_compute_pipeline_create(struct pvr_device *device,
+ struct pvr_pipeline_cache *pipeline_cache,
+ const VkComputePipelineCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *allocator,
+ VkPipeline *const pipeline_out)
+{
+ struct pvr_compute_pipeline *compute_pipeline;
+ VkResult result;
+
+ compute_pipeline = vk_zalloc2(&device->vk.alloc,
+ allocator,
+ sizeof(*compute_pipeline),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!compute_pipeline)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* Compiles and uploads shaders and PDS programs. */
+ result = pvr_compute_pipeline_init(device,
+ pipeline_cache,
+ pCreateInfo,
+ allocator,
+ compute_pipeline);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, allocator, compute_pipeline);
+ return result;
+ }
+
+ *pipeline_out = pvr_pipeline_to_handle(&compute_pipeline->base);
+
+ return VK_SUCCESS;
+}
+
+static void pvr_compute_pipeline_destroy(
+ struct pvr_device *const device,
+ const VkAllocationCallbacks *const allocator,
+ struct pvr_compute_pipeline *const compute_pipeline)
+{
+ if (compute_pipeline->state.flags.base_workgroup) {
+ pvr_pds_compute_program_destroy(
+ device,
+ allocator,
+ &compute_pipeline->state.primary_program_base_workgroup_variant,
+ &compute_pipeline->state.primary_program_base_workgroup_variant_info);
+ }
+
+ pvr_pds_compute_program_destroy(
+ device,
+ allocator,
+ &compute_pipeline->state.primary_program,
+ &compute_pipeline->state.primary_program_info);
+ pvr_pds_uniform_program_destroy(device,
+ allocator,
+ &compute_pipeline->state.uniform.pds_code,
+ &compute_pipeline->state.uniform.pds_info);
+ pvr_bo_free(device, compute_pipeline->state.bo);
+
+ pvr_pipeline_finish(&compute_pipeline->base);
+
+ vk_free2(&device->vk.alloc, allocator, compute_pipeline);
+}
+
+VkResult
+pvr_CreateComputePipelines(VkDevice _device,
+ VkPipelineCache pipelineCache,
+ uint32_t createInfoCount,
+ const VkComputePipelineCreateInfo *pCreateInfos,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipelines)
+{
+ PVR_FROM_HANDLE(pvr_pipeline_cache, pipeline_cache, pipelineCache);
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ VkResult result = VK_SUCCESS;
+
+ for (uint32_t i = 0; i < createInfoCount; i++) {
+ const VkResult local_result =
+ pvr_compute_pipeline_create(device,
+ pipeline_cache,
+ &pCreateInfos[i],
+ pAllocator,
+ &pPipelines[i]);
+ if (local_result != VK_SUCCESS) {
+ result = local_result;
+ pPipelines[i] = VK_NULL_HANDLE;
+ }
+ }
+
+ return result;
+}
+
+/******************************************************************************
+ Graphics pipeline functions
+ ******************************************************************************/
+
+static inline uint32_t pvr_dynamic_state_bit_from_vk(VkDynamicState state)
+{
+ switch (state) {
+ case VK_DYNAMIC_STATE_VIEWPORT:
+ return PVR_DYNAMIC_STATE_BIT_VIEWPORT;
+ case VK_DYNAMIC_STATE_SCISSOR:
+ return PVR_DYNAMIC_STATE_BIT_SCISSOR;
+ case VK_DYNAMIC_STATE_LINE_WIDTH:
+ return PVR_DYNAMIC_STATE_BIT_LINE_WIDTH;
+ case VK_DYNAMIC_STATE_DEPTH_BIAS:
+ return PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS;
+ case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
+ return PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS;
+ case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
+ return PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK;
+ case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
+ return PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK;
+ case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
+ return PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE;
+ default:
+ unreachable("Unsupported state.");
+ }
+}
+
+static void
+pvr_graphics_pipeline_destroy(struct pvr_device *const device,
+ const VkAllocationCallbacks *const allocator,
+ struct pvr_graphics_pipeline *const gfx_pipeline)
+{
+ const uint32_t num_vertex_attrib_programs =
+ ARRAY_SIZE(gfx_pipeline->vertex_shader_state.pds_attrib_programs);
+
+ pvr_pds_uniform_program_destroy(
+ device,
+ allocator,
+ &gfx_pipeline->fragment_shader_state.uniform_state.pds_code,
+ &gfx_pipeline->fragment_shader_state.uniform_state.pds_info);
+
+ pvr_pds_uniform_program_destroy(
+ device,
+ allocator,
+ &gfx_pipeline->vertex_shader_state.uniform_state.pds_code,
+ &gfx_pipeline->vertex_shader_state.uniform_state.pds_info);
+
+ for (uint32_t i = 0; i < num_vertex_attrib_programs; i++) {
+ struct pvr_pds_attrib_program *const attrib_program =
+ &gfx_pipeline->vertex_shader_state.pds_attrib_programs[i];
+
+ pvr_pds_vertex_attrib_program_destroy(device, allocator, attrib_program);
+ }
+
+ pvr_bo_free(device,
+ gfx_pipeline->fragment_shader_state.pds_fragment_program.pvr_bo);
+ pvr_bo_free(device,
+ gfx_pipeline->fragment_shader_state.pds_coeff_program.pvr_bo);
+
+ pvr_bo_free(device, gfx_pipeline->fragment_shader_state.bo);
+ pvr_bo_free(device, gfx_pipeline->vertex_shader_state.bo);
+
+ pvr_pipeline_finish(&gfx_pipeline->base);
+
+ vk_free2(&device->vk.alloc, allocator, gfx_pipeline);
+}
+
+static void
+pvr_vertex_state_init(struct pvr_graphics_pipeline *gfx_pipeline,
+ const struct rogue_common_build_data *common_data,
+ const struct rogue_vs_build_data *vs_data)
+{
+ struct pvr_vertex_shader_state *vertex_state =
+ &gfx_pipeline->vertex_shader_state;
+
+ /* TODO: Hard coding these for now. These should be populated based on the
+ * information returned by the compiler.
+ */
+ vertex_state->stage_state.const_shared_reg_count = common_data->shareds;
+ vertex_state->stage_state.const_shared_reg_offset = 0;
+ vertex_state->stage_state.temps_count = common_data->temps;
+ vertex_state->stage_state.coefficient_size = common_data->coeffs;
+ vertex_state->stage_state.uses_atomic_ops = false;
+ vertex_state->stage_state.uses_texture_rw = false;
+ vertex_state->stage_state.uses_barrier = false;
+ vertex_state->stage_state.has_side_effects = false;
+ vertex_state->stage_state.empty_program = false;
+
+ vertex_state->vertex_input_size = vs_data->num_vertex_input_regs;
+ vertex_state->vertex_output_size =
+ vs_data->num_vertex_outputs * ROGUE_REG_SIZE_BYTES;
+ vertex_state->output_selects = 0;
+ vertex_state->user_clip_planes_mask = 0;
+ vertex_state->entry_offset = 0;
+
+ /* TODO: The number of varyings should be checked against the fragment
+ * shader inputs and assigned in the place where that happens.
+ * There will also be an opportunity to cull unused fs inputs/vs outputs.
+ */
+ pvr_csb_pack (&gfx_pipeline->vertex_shader_state.varying[0],
+ TA_STATE_VARYING0,
+ varying0) {
+ varying0.f32_linear = vs_data->num_varyings;
+ varying0.f32_flat = 0;
+ varying0.f32_npc = 0;
+ }
+
+ pvr_csb_pack (&gfx_pipeline->vertex_shader_state.varying[1],
+ TA_STATE_VARYING1,
+ varying1) {
+ varying1.f16_linear = 0;
+ varying1.f16_flat = 0;
+ varying1.f16_npc = 0;
+ }
+}
+
+static void
+pvr_fragment_state_init(struct pvr_graphics_pipeline *gfx_pipeline,
+ const struct rogue_common_build_data *common_data)
+{
+ struct pvr_fragment_shader_state *fragment_state =
+ &gfx_pipeline->fragment_shader_state;
+
+ /* TODO: Hard coding these for now. These should be populated based on the
+ * information returned by the compiler.
+ */
+ fragment_state->stage_state.const_shared_reg_count = 0;
+ fragment_state->stage_state.const_shared_reg_offset = 0;
+ fragment_state->stage_state.temps_count = common_data->temps;
+ fragment_state->stage_state.coefficient_size = common_data->coeffs;
+ fragment_state->stage_state.uses_atomic_ops = false;
+ fragment_state->stage_state.uses_texture_rw = false;
+ fragment_state->stage_state.uses_barrier = false;
+ fragment_state->stage_state.has_side_effects = false;
+ fragment_state->stage_state.empty_program = false;
+
+ fragment_state->pass_type = 0;
+ fragment_state->entry_offset = 0;
+}
+
+/* Compiles and uploads shaders and PDS programs. */
+static VkResult
+pvr_graphics_pipeline_compile(struct pvr_device *const device,
+ struct pvr_pipeline_cache *pipeline_cache,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *const allocator,
+ struct pvr_graphics_pipeline *const gfx_pipeline)
+{
+ /* FIXME: Remove this hard coding. */
+ const struct pvr_explicit_constant_usage explicit_const_usage = {
+ .start_offset = 16,
+ };
+
+ const VkPipelineVertexInputStateCreateInfo *const vertex_input_state =
+ pCreateInfo->pVertexInputState;
+
+ const uint32_t cache_line_size =
+ rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+ struct rogue_compiler *compiler = device->pdevice->compiler;
+ struct rogue_build_ctx *ctx;
+ VkResult result;
+
+ /* Compile the USC shaders. */
+
+ /* Setup shared build context. */
+ ctx = rogue_create_build_context(compiler);
+ if (!ctx)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* NIR middle-end translation. */
+ for (gl_shader_stage stage = MESA_SHADER_FRAGMENT; stage > MESA_SHADER_NONE;
+ stage--) {
+ const VkPipelineShaderStageCreateInfo *create_info;
+ size_t stage_index = gfx_pipeline->stage_indices[stage];
+
+ /* Skip unused/inactive stages. */
+ if (stage_index == ~0)
+ continue;
+
+ create_info = &pCreateInfo->pStages[stage_index];
+
+ /* SPIR-V to NIR. */
+ ctx->nir[stage] = pvr_spirv_to_nir(ctx, stage, create_info);
+ if (!ctx->nir[stage]) {
+ ralloc_free(ctx);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+ }
+
+ /* Pre-back-end analysis and optimization, driver data extraction. */
+ /* TODO: Analyze and cull unused I/O between stages. */
+ /* TODO: Allocate UBOs between stages;
+ * pipeline->layout->set_{count,layout}.
+ */
+
+ /* Back-end translation. */
+ for (gl_shader_stage stage = MESA_SHADER_FRAGMENT; stage > MESA_SHADER_NONE;
+ stage--) {
+ if (!ctx->nir[stage])
+ continue;
+
+ ctx->rogue[stage] = pvr_nir_to_rogue(ctx, ctx->nir[stage]);
+ if (!ctx->rogue[stage]) {
+ ralloc_free(ctx);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ ctx->binary[stage] = pvr_rogue_to_binary(ctx, ctx->rogue[stage]);
+ if (!ctx->binary[stage]) {
+ ralloc_free(ctx);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+ }
+
+ pvr_vertex_state_init(gfx_pipeline,
+ &ctx->common_data[MESA_SHADER_VERTEX],
+ &ctx->stage_data.vs);
+
+ result = pvr_gpu_upload_usc(device,
+ ctx->binary[MESA_SHADER_VERTEX]->data,
+ ctx->binary[MESA_SHADER_VERTEX]->size,
+ cache_line_size,
+ &gfx_pipeline->vertex_shader_state.bo);
+ if (result != VK_SUCCESS)
+ goto err_free_build_context;
+
+ pvr_fragment_state_init(gfx_pipeline,
+ &ctx->common_data[MESA_SHADER_FRAGMENT]);
+
+ result = pvr_gpu_upload_usc(device,
+ ctx->binary[MESA_SHADER_FRAGMENT]->data,
+ ctx->binary[MESA_SHADER_FRAGMENT]->size,
+ cache_line_size,
+ &gfx_pipeline->fragment_shader_state.bo);
+ if (result != VK_SUCCESS)
+ goto err_free_vertex_bo;
+
+ /* TODO: powervr has an optimization where it attempts to recompile shaders.
+ * See PipelineCompileNoISPFeedbackFragmentStage. Unimplemented since in our
+ * case the optimization doesn't happen.
+ */
+
+ /* TODO: The programs we use are hard coded for now, but these should be
+ * selected dynamically.
+ */
+
+ result = pvr_pds_coeff_program_create_and_upload(
+ device,
+ allocator,
+ ctx->stage_data.fs.iterator_args.fpu_iterators,
+ ctx->stage_data.fs.iterator_args.num_fpu_iterators,
+ ctx->stage_data.fs.iterator_args.destination,
+ &gfx_pipeline->fragment_shader_state.pds_coeff_program);
+ if (result != VK_SUCCESS)
+ goto err_free_fragment_bo;
+
+ result = pvr_pds_fragment_program_create_and_upload(
+ device,
+ allocator,
+ gfx_pipeline->fragment_shader_state.bo,
+ ctx->common_data[MESA_SHADER_FRAGMENT].temps,
+ ctx->stage_data.fs.msaa_mode,
+ ctx->stage_data.fs.phas,
+ &gfx_pipeline->fragment_shader_state.pds_fragment_program);
+ if (result != VK_SUCCESS)
+ goto err_free_coeff_program;
+
+ result = pvr_pds_vertex_attrib_programs_create_and_upload(
+ device,
+ allocator,
+ vertex_input_state,
+ ctx->common_data[MESA_SHADER_VERTEX].temps,
+ &ctx->stage_data.vs,
+ &gfx_pipeline->vertex_shader_state.pds_attrib_programs);
+ if (result != VK_SUCCESS)
+ goto err_free_frag_program;
+
+ result = pvr_pds_uniform_program_create_and_upload(
+ device,
+ allocator,
+ &ctx->common_data[MESA_SHADER_VERTEX].ubo_data,
+ &explicit_const_usage,
+ gfx_pipeline->base.layout,
+ PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY,
+ &gfx_pipeline->vertex_shader_state.uniform_state.pds_code,
+ &gfx_pipeline->vertex_shader_state.uniform_state.pds_info);
+ if (result != VK_SUCCESS)
+ goto err_free_vertex_attrib_program;
+
+ /* FIXME: When the temp_buffer_total_size is non-zero we need to allocate a
+ * scratch buffer for both vertex and fragment stage.
+ * Figure out the best place to do this.
+ */
+ /* assert(pvr_pds_uniform_program_variables.temp_buff_total_size == 0); */
+ /* TODO: Implement spilling with the above. */
+
+ /* TODO: Call pvr_pds_uniform_program_create_and_upload in a loop. */
+ /* FIXME: For now we pass in the same explicit_const_usage since it contains
+ * all invalid entries. Fix this by hooking it up to the compiler.
+ */
+ result = pvr_pds_uniform_program_create_and_upload(
+ device,
+ allocator,
+ &ctx->common_data[MESA_SHADER_FRAGMENT].ubo_data,
+ &explicit_const_usage,
+ gfx_pipeline->base.layout,
+ PVR_STAGE_ALLOCATION_FRAGMENT,
+ &gfx_pipeline->fragment_shader_state.uniform_state.pds_code,
+ &gfx_pipeline->fragment_shader_state.uniform_state.pds_info);
+ if (result != VK_SUCCESS)
+ goto err_free_vertex_uniform_program;
+
+ ralloc_free(ctx);
+
+ return VK_SUCCESS;
+
+err_free_vertex_uniform_program:
+ pvr_pds_uniform_program_destroy(
+ device,
+ allocator,
+ &gfx_pipeline->vertex_shader_state.uniform_state.pds_code,
+ &gfx_pipeline->vertex_shader_state.uniform_state.pds_info);
+err_free_vertex_attrib_program:
+ for (uint32_t i = 0;
+ i < ARRAY_SIZE(gfx_pipeline->vertex_shader_state.pds_attrib_programs);
+ i++) {
+ struct pvr_pds_attrib_program *const attrib_program =
+ &gfx_pipeline->vertex_shader_state.pds_attrib_programs[i];
+
+ pvr_pds_vertex_attrib_program_destroy(device, allocator, attrib_program);
+ }
+err_free_frag_program:
+ pvr_bo_free(device,
+ gfx_pipeline->fragment_shader_state.pds_fragment_program.pvr_bo);
+err_free_coeff_program:
+ pvr_bo_free(device,
+ gfx_pipeline->fragment_shader_state.pds_coeff_program.pvr_bo);
+err_free_fragment_bo:
+ pvr_bo_free(device, gfx_pipeline->fragment_shader_state.bo);
+err_free_vertex_bo:
+ pvr_bo_free(device, gfx_pipeline->vertex_shader_state.bo);
+err_free_build_context:
+ ralloc_free(ctx);
+ return result;
+}
+
+static void pvr_graphics_pipeline_init_depth_and_stencil_state(
+ struct pvr_graphics_pipeline *gfx_pipeline,
+ const VkPipelineDepthStencilStateCreateInfo *depth_stencil_state)
+{
+ const VkStencilOpState *front;
+ const VkStencilOpState *back;
+
+ if (!depth_stencil_state)
+ return;
+
+ front = &depth_stencil_state->front;
+ back = &depth_stencil_state->back;
+
+ if (depth_stencil_state->depthTestEnable) {
+ gfx_pipeline->depth_compare_op = depth_stencil_state->depthCompareOp;
+ gfx_pipeline->depth_write_disable =
+ !depth_stencil_state->depthWriteEnable;
+ } else {
+ gfx_pipeline->depth_compare_op = VK_COMPARE_OP_ALWAYS;
+ gfx_pipeline->depth_write_disable = true;
+ }
+
+ if (depth_stencil_state->stencilTestEnable) {
+ gfx_pipeline->stencil_front.compare_op = front->compareOp;
+ gfx_pipeline->stencil_front.fail_op = front->failOp;
+ gfx_pipeline->stencil_front.depth_fail_op = front->depthFailOp;
+ gfx_pipeline->stencil_front.pass_op = front->passOp;
+
+ gfx_pipeline->stencil_back.compare_op = back->compareOp;
+ gfx_pipeline->stencil_back.fail_op = back->failOp;
+ gfx_pipeline->stencil_back.depth_fail_op = back->depthFailOp;
+ gfx_pipeline->stencil_back.pass_op = back->passOp;
+ } else {
+ gfx_pipeline->stencil_front.compare_op = VK_COMPARE_OP_ALWAYS;
+ gfx_pipeline->stencil_front.fail_op = VK_STENCIL_OP_KEEP;
+ gfx_pipeline->stencil_front.depth_fail_op = VK_STENCIL_OP_KEEP;
+ gfx_pipeline->stencil_front.pass_op = VK_STENCIL_OP_KEEP;
+
+ gfx_pipeline->stencil_back = gfx_pipeline->stencil_front;
+ }
+}
+
+static void pvr_graphics_pipeline_init_dynamic_state(
+ struct pvr_graphics_pipeline *gfx_pipeline,
+ const VkPipelineDynamicStateCreateInfo *dynamic_state,
+ const VkPipelineViewportStateCreateInfo *viewport_state,
+ const VkPipelineDepthStencilStateCreateInfo *depth_stencil_state,
+ const VkPipelineColorBlendStateCreateInfo *color_blend_state,
+ const VkPipelineRasterizationStateCreateInfo *rasterization_state)
+{
+ struct pvr_dynamic_state *const internal_dynamic_state =
+ &gfx_pipeline->dynamic_state;
+ uint32_t dynamic_states = 0;
+
+ if (dynamic_state) {
+ for (uint32_t i = 0; i < dynamic_state->dynamicStateCount; i++) {
+ dynamic_states |=
+ pvr_dynamic_state_bit_from_vk(dynamic_state->pDynamicStates[i]);
+ }
+ }
+
+ /* TODO: Verify this.
+ * We don't zero out the pipeline's state if they are dynamic since they
+ * should be set later on in the command buffer.
+ */
+
+ /* TODO: Handle rasterizerDiscardEnable. */
+
+ if (rasterization_state) {
+ if (!(dynamic_states & PVR_DYNAMIC_STATE_BIT_LINE_WIDTH))
+ internal_dynamic_state->line_width = rasterization_state->lineWidth;
+
+ /* TODO: Do we need the depthBiasEnable check? */
+ if (!(dynamic_states & PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS)) {
+ internal_dynamic_state->depth_bias.constant_factor =
+ rasterization_state->depthBiasConstantFactor;
+ internal_dynamic_state->depth_bias.clamp =
+ rasterization_state->depthBiasClamp;
+ internal_dynamic_state->depth_bias.slope_factor =
+ rasterization_state->depthBiasSlopeFactor;
+ }
+ }
+
+ /* TODO: handle viewport state flags. */
+
+ /* TODO: handle static viewport state. */
+ /* We assume the viewport state to by dynamic for now. */
+
+ /* TODO: handle static scissor state. */
+ /* We assume the scissor state to by dynamic for now. */
+
+ if (depth_stencil_state) {
+ const VkStencilOpState *const front = &depth_stencil_state->front;
+ const VkStencilOpState *const back = &depth_stencil_state->back;
+
+ /* VkPhysicalDeviceFeatures->depthBounds is false. */
+ assert(depth_stencil_state->depthBoundsTestEnable == VK_FALSE);
+
+ if (!(dynamic_states & PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK)) {
+ internal_dynamic_state->compare_mask.front = front->compareMask;
+ internal_dynamic_state->compare_mask.back = back->compareMask;
+ }
+
+ if (!(dynamic_states & PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK)) {
+ internal_dynamic_state->write_mask.front = front->writeMask;
+ internal_dynamic_state->write_mask.back = back->writeMask;
+ }
+
+ if (!(dynamic_states & PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE)) {
+ internal_dynamic_state->reference.front = front->reference;
+ internal_dynamic_state->reference.back = back->reference;
+ }
+ }
+
+ if (color_blend_state &&
+ !(dynamic_states & PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS)) {
+ STATIC_ASSERT(__same_type(internal_dynamic_state->blend_constants,
+ color_blend_state->blendConstants));
+
+ typed_memcpy(internal_dynamic_state->blend_constants,
+ color_blend_state->blendConstants,
+ ARRAY_SIZE(internal_dynamic_state->blend_constants));
+ }
+
+ /* TODO: handle STATIC_STATE_DEPTH_BOUNDS ? */
+
+ internal_dynamic_state->mask = dynamic_states;
+}
+
+static VkResult
+pvr_graphics_pipeline_init(struct pvr_device *device,
+ struct pvr_pipeline_cache *pipeline_cache,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *allocator,
+ struct pvr_graphics_pipeline *gfx_pipeline)
+{
+ /* If rasterization is not enabled, various CreateInfo structs must be
+ * ignored.
+ */
+ const bool raster_discard_enabled =
+ pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
+ const VkPipelineViewportStateCreateInfo *vs_info =
+ !raster_discard_enabled ? pCreateInfo->pViewportState : NULL;
+ const VkPipelineDepthStencilStateCreateInfo *dss_info =
+ !raster_discard_enabled ? pCreateInfo->pDepthStencilState : NULL;
+ const VkPipelineRasterizationStateCreateInfo *rs_info =
+ !raster_discard_enabled ? pCreateInfo->pRasterizationState : NULL;
+ const VkPipelineColorBlendStateCreateInfo *cbs_info =
+ !raster_discard_enabled ? pCreateInfo->pColorBlendState : NULL;
+ const VkPipelineMultisampleStateCreateInfo *ms_info =
+ !raster_discard_enabled ? pCreateInfo->pMultisampleState : NULL;
+ VkResult result;
+
+ pvr_pipeline_init(device, PVR_PIPELINE_TYPE_GRAPHICS, &gfx_pipeline->base);
+
+ pvr_finishme("ignoring pCreateInfo flags.");
+ pvr_finishme("ignoring pipeline cache.");
+
+ gfx_pipeline->raster_state.discard_enable = raster_discard_enabled;
+ gfx_pipeline->raster_state.cull_mode =
+ pCreateInfo->pRasterizationState->cullMode;
+ gfx_pipeline->raster_state.front_face =
+ pCreateInfo->pRasterizationState->frontFace;
+ gfx_pipeline->raster_state.depth_bias_enable =
+ pCreateInfo->pRasterizationState->depthBiasEnable;
+ gfx_pipeline->raster_state.depth_clamp_enable =
+ pCreateInfo->pRasterizationState->depthClampEnable;
+
+ /* FIXME: Handle depthClampEnable. */
+
+ pvr_graphics_pipeline_init_depth_and_stencil_state(gfx_pipeline, dss_info);
+ pvr_graphics_pipeline_init_dynamic_state(gfx_pipeline,
+ pCreateInfo->pDynamicState,
+ vs_info,
+ dss_info,
+ cbs_info,
+ rs_info);
+
+ if (pCreateInfo->pInputAssemblyState) {
+ gfx_pipeline->input_asm_state.topology =
+ pCreateInfo->pInputAssemblyState->topology;
+ gfx_pipeline->input_asm_state.primitive_restart =
+ pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
+ }
+
+ memset(gfx_pipeline->stage_indices, ~0, sizeof(gfx_pipeline->stage_indices));
+
+ for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
+ VkShaderStageFlagBits vk_stage = pCreateInfo->pStages[i].stage;
+ gl_shader_stage gl_stage = vk_to_mesa_shader_stage(vk_stage);
+ /* From the Vulkan 1.2.192 spec for VkPipelineShaderStageCreateInfo:
+ *
+ * "stage must not be VK_SHADER_STAGE_ALL_GRAPHICS,
+ * or VK_SHADER_STAGE_ALL."
+ *
+ * So we don't handle that.
+ *
+ * We also don't handle VK_SHADER_STAGE_TESSELLATION_* and
+ * VK_SHADER_STAGE_GEOMETRY_BIT stages as 'tessellationShader' and
+ * 'geometryShader' are set to false in the VkPhysicalDeviceFeatures
+ * structure returned by the driver.
+ */
+ switch (pCreateInfo->pStages[i].stage) {
+ case VK_SHADER_STAGE_VERTEX_BIT:
+ case VK_SHADER_STAGE_FRAGMENT_BIT:
+ gfx_pipeline->stage_indices[gl_stage] = i;
+ break;
+ default:
+ unreachable("Unsupported stage.");
+ }
+ }
+
+ gfx_pipeline->base.layout =
+ pvr_pipeline_layout_from_handle(pCreateInfo->layout);
+
+ if (ms_info) {
+ gfx_pipeline->rasterization_samples = ms_info->rasterizationSamples;
+ gfx_pipeline->sample_mask =
+ (ms_info->pSampleMask) ? ms_info->pSampleMask[0] : 0xFFFFFFFF;
+ } else {
+ gfx_pipeline->rasterization_samples = VK_SAMPLE_COUNT_1_BIT;
+ gfx_pipeline->sample_mask = 0xFFFFFFFF;
+ }
+
+ /* Compiles and uploads shaders and PDS programs. */
+ result = pvr_graphics_pipeline_compile(device,
+ pipeline_cache,
+ pCreateInfo,
+ allocator,
+ gfx_pipeline);
+ if (result != VK_SUCCESS) {
+ pvr_pipeline_finish(&gfx_pipeline->base);
+ return result;
+ }
+
+ return VK_SUCCESS;
+}
+
+/* If allocator == NULL, the internal one will be used. */
+static VkResult
+pvr_graphics_pipeline_create(struct pvr_device *device,
+ struct pvr_pipeline_cache *pipeline_cache,
+ const VkGraphicsPipelineCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *allocator,
+ VkPipeline *const pipeline_out)
+{
+ struct pvr_graphics_pipeline *gfx_pipeline;
+ VkResult result;
+
+ gfx_pipeline = vk_zalloc2(&device->vk.alloc,
+ allocator,
+ sizeof(*gfx_pipeline),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!gfx_pipeline)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* Compiles and uploads shaders and PDS programs too. */
+ result = pvr_graphics_pipeline_init(device,
+ pipeline_cache,
+ pCreateInfo,
+ allocator,
+ gfx_pipeline);
+ if (result != VK_SUCCESS) {
+ vk_free2(&device->vk.alloc, allocator, gfx_pipeline);
+ return result;
+ }
+
+ *pipeline_out = pvr_pipeline_to_handle(&gfx_pipeline->base);
+
+ return VK_SUCCESS;
+}
+
+VkResult
+pvr_CreateGraphicsPipelines(VkDevice _device,
+ VkPipelineCache pipelineCache,
+ uint32_t createInfoCount,
+ const VkGraphicsPipelineCreateInfo *pCreateInfos,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipeline *pPipelines)
+{
+ PVR_FROM_HANDLE(pvr_pipeline_cache, pipeline_cache, pipelineCache);
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ VkResult result = VK_SUCCESS;
+
+ for (uint32_t i = 0; i < createInfoCount; i++) {
+ const VkResult local_result =
+ pvr_graphics_pipeline_create(device,
+ pipeline_cache,
+ &pCreateInfos[i],
+ pAllocator,
+ &pPipelines[i]);
+ if (local_result != VK_SUCCESS) {
+ result = local_result;
+ pPipelines[i] = VK_NULL_HANDLE;
+ }
+ }
+
+ return result;
+}
+
+/*****************************************************************************
+ Other functions
+*****************************************************************************/
+
+void pvr_DestroyPipeline(VkDevice _device,
+ VkPipeline _pipeline,
+ const VkAllocationCallbacks *pAllocator)
+{
+ PVR_FROM_HANDLE(pvr_pipeline, pipeline, _pipeline);
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+
+ if (!pipeline)
+ return;
+
+ switch (pipeline->type) {
+ case PVR_PIPELINE_TYPE_GRAPHICS: {
+ struct pvr_graphics_pipeline *const gfx_pipeline =
+ to_pvr_graphics_pipeline(pipeline);
+
+ pvr_graphics_pipeline_destroy(device, pAllocator, gfx_pipeline);
+ break;
+ }
+
+ case PVR_PIPELINE_TYPE_COMPUTE: {
+ struct pvr_compute_pipeline *const compute_pipeline =
+ to_pvr_compute_pipeline(pipeline);
+
+ pvr_compute_pipeline_destroy(device, pAllocator, compute_pipeline);
+ break;
+ }
+
+ default:
+ unreachable("Unknown pipeline type.");
+ }
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <string.h>
+
+#include "pvr_device_info.h"
+#include "pvr_private.h"
+#include "util/blob.h"
+#include "vk_log.h"
+#include "vk_object.h"
+#include "vulkan/util/vk_util.h"
+
+static void pvr_pipeline_cache_load(struct pvr_pipeline_cache *cache,
+ const void *data,
+ size_t size)
+{
+ struct pvr_device *device = cache->device;
+ struct pvr_physical_device *pdevice = device->pdevice;
+ struct vk_pipeline_cache_header header;
+ struct blob_reader blob;
+
+ blob_reader_init(&blob, data, size);
+
+ blob_copy_bytes(&blob, &header, sizeof(header));
+ if (blob.overrun)
+ return;
+
+ if (header.header_size < sizeof(header))
+ return;
+ if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
+ return;
+ if (header.vendor_id != VK_VENDOR_ID_IMAGINATION)
+ return;
+ if (header.device_id != pdevice->dev_info.ident.device_id)
+ return;
+ if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
+ return;
+
+ /* TODO: There isn't currently any cached data so there's nothing to load
+ * at this point. Once there is something to load then load it now.
+ */
+}
+
+VkResult pvr_CreatePipelineCache(VkDevice _device,
+ const VkPipelineCacheCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkPipelineCache *pPipelineCache)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ struct pvr_pipeline_cache *cache;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
+ assert(pCreateInfo->flags == 0);
+
+ cache = vk_object_alloc(&device->vk,
+ pAllocator,
+ sizeof(*cache),
+ VK_OBJECT_TYPE_PIPELINE_CACHE);
+ if (!cache)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ cache->device = device;
+
+ if (pCreateInfo->initialDataSize > 0) {
+ pvr_pipeline_cache_load(cache,
+ pCreateInfo->pInitialData,
+ pCreateInfo->initialDataSize);
+ }
+
+ *pPipelineCache = pvr_pipeline_cache_to_handle(cache);
+
+ return VK_SUCCESS;
+}
+
+void pvr_DestroyPipelineCache(VkDevice _device,
+ VkPipelineCache _cache,
+ const VkAllocationCallbacks *pAllocator)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_pipeline_cache, cache, _cache);
+
+ if (!cache)
+ return;
+
+ vk_object_free(&device->vk, pAllocator, cache);
+}
+
+VkResult pvr_GetPipelineCacheData(VkDevice _device,
+ VkPipelineCache _cache,
+ size_t *pDataSize,
+ void *pData)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ struct pvr_physical_device *pdevice = device->pdevice;
+ struct blob blob;
+
+ if (pData)
+ blob_init_fixed(&blob, pData, *pDataSize);
+ else
+ blob_init_fixed(&blob, NULL, SIZE_MAX);
+
+ struct vk_pipeline_cache_header header = {
+ .header_size = sizeof(struct vk_pipeline_cache_header),
+ .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
+ .vendor_id = VK_VENDOR_ID_IMAGINATION,
+ .device_id = pdevice->dev_info.ident.device_id,
+ };
+ memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
+ blob_write_bytes(&blob, &header, sizeof(header));
+
+ /* TODO: Once there's some data to cache then this should be written to
+ * 'blob'.
+ */
+
+ *pDataSize = blob.size;
+
+ blob_finish(&blob);
+
+ return VK_SUCCESS;
+}
+
+VkResult pvr_MergePipelineCaches(VkDevice _device,
+ VkPipelineCache destCache,
+ uint32_t srcCacheCount,
+ const VkPipelineCache *pSrcCaches)
+{
+ /* TODO: Once there's some data to cache then this will need to be able to
+ * merge caches together.
+ */
+
+ return VK_SUCCESS;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * based in part on radv driver which is:
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_PRIVATE_H
+#define PVR_PRIVATE_H
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "compiler/shader_enums.h"
+#include "hwdef/rogue_hw_defs.h"
+#include "pvr_csb.h"
+#include "pvr_device_info.h"
+#include "pvr_entrypoints.h"
+#include "pvr_hw_pass.h"
+#include "pvr_job_render.h"
+#include "pvr_limits.h"
+#include "pvr_pds.h"
+#include "pvr_winsys.h"
+#include "rogue/rogue.h"
+#include "util/bitscan.h"
+#include "util/format/u_format.h"
+#include "util/log.h"
+#include "util/macros.h"
+#include "util/u_dynarray.h"
+#include "vk_command_buffer.h"
+#include "vk_device.h"
+#include "vk_image.h"
+#include "vk_instance.h"
+#include "vk_log.h"
+#include "vk_physical_device.h"
+#include "vk_queue.h"
+#include "wsi_common.h"
+
+#ifdef HAVE_VALGRIND
+# include <valgrind/valgrind.h>
+# include <valgrind/memcheck.h>
+# define VG(x) x
+#else
+# define VG(x) ((void)0)
+#endif
+
+#define VK_VENDOR_ID_IMAGINATION 0x1010
+
+#define PVR_STATE_PBE_DWORDS 2U
+
+#define PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT \
+ (uint32_t)(VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT + 1U)
+
+/* TODO: move into a common surface library? */
+enum pvr_memlayout {
+ PVR_MEMLAYOUT_UNDEFINED = 0, /* explicitly treat 0 as undefined */
+ PVR_MEMLAYOUT_LINEAR,
+ PVR_MEMLAYOUT_TWIDDLED,
+ PVR_MEMLAYOUT_3DTWIDDLED,
+};
+
+enum pvr_cmd_buffer_status {
+ PVR_CMD_BUFFER_STATUS_INVALID = 0, /* explicitly treat 0 as invalid */
+ PVR_CMD_BUFFER_STATUS_INITIAL,
+ PVR_CMD_BUFFER_STATUS_RECORDING,
+ PVR_CMD_BUFFER_STATUS_EXECUTABLE,
+};
+
+enum pvr_texture_state {
+ PVR_TEXTURE_STATE_SAMPLE,
+ PVR_TEXTURE_STATE_STORAGE,
+ PVR_TEXTURE_STATE_ATTACHMENT,
+ PVR_TEXTURE_STATE_MAX_ENUM,
+};
+
+enum pvr_sub_cmd_type {
+ PVR_SUB_CMD_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */
+ PVR_SUB_CMD_TYPE_GRAPHICS,
+ PVR_SUB_CMD_TYPE_COMPUTE,
+ PVR_SUB_CMD_TYPE_TRANSFER,
+};
+
+enum pvr_depth_stencil_usage {
+ PVR_DEPTH_STENCIL_USAGE_UNDEFINED = 0, /* explicitly treat 0 as undefined */
+ PVR_DEPTH_STENCIL_USAGE_NEEDED,
+ PVR_DEPTH_STENCIL_USAGE_NEVER,
+};
+
+enum pvr_job_type {
+ PVR_JOB_TYPE_GEOM,
+ PVR_JOB_TYPE_FRAG,
+ PVR_JOB_TYPE_COMPUTE,
+ PVR_JOB_TYPE_TRANSFER,
+ PVR_JOB_TYPE_MAX
+};
+
+enum pvr_pipeline_type {
+ PVR_PIPELINE_TYPE_INVALID = 0, /* explicitly treat 0 as undefined */
+ PVR_PIPELINE_TYPE_GRAPHICS,
+ PVR_PIPELINE_TYPE_COMPUTE,
+};
+
+enum pvr_pipeline_stage_bits {
+ PVR_PIPELINE_STAGE_GEOM_BIT = BITFIELD_BIT(PVR_JOB_TYPE_GEOM),
+ PVR_PIPELINE_STAGE_FRAG_BIT = BITFIELD_BIT(PVR_JOB_TYPE_FRAG),
+ PVR_PIPELINE_STAGE_COMPUTE_BIT = BITFIELD_BIT(PVR_JOB_TYPE_COMPUTE),
+ PVR_PIPELINE_STAGE_TRANSFER_BIT = BITFIELD_BIT(PVR_JOB_TYPE_TRANSFER),
+};
+
+#define PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS \
+ (PVR_PIPELINE_STAGE_GEOM_BIT | PVR_PIPELINE_STAGE_FRAG_BIT)
+
+#define PVR_PIPELINE_STAGE_ALL_BITS \
+ (PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS | PVR_PIPELINE_STAGE_TRANSFER_BIT)
+
+/* TODO: This number must be changed when we add compute support. */
+#define PVR_NUM_SYNC_PIPELINE_STAGES 3U
+
+/* Warning: Do not define an invalid stage as 0 since other code relies on 0
+ * being the first shader stage. This allows for stages to be split or added
+ * in the future. Defining 0 as invalid will very likely cause problems.
+ */
+enum pvr_stage_allocation {
+ PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY,
+ PVR_STAGE_ALLOCATION_FRAGMENT,
+ PVR_STAGE_ALLOCATION_COMPUTE,
+ PVR_STAGE_ALLOCATION_COUNT
+};
+
+/* Scissor accumulation state defines
+ * - Disabled means that a clear has been detected, and scissor accumulation
+ * should stop.
+ * - Check for clear is when there's no clear loadops, but there could be
+ * another clear call that would be broken via scissoring
+ * - Enabled means that a scissor has been set in the pipeline, and
+ * accumulation can continue
+ */
+enum pvr_scissor_accum_state {
+ PVR_SCISSOR_ACCUM_INVALID = 0, /* Explicitly treat 0 as invalid */
+ PVR_SCISSOR_ACCUM_DISABLED,
+ PVR_SCISSOR_ACCUM_CHECK_FOR_CLEAR,
+ PVR_SCISSOR_ACCUM_ENABLED,
+};
+
+struct pvr_bo;
+struct pvr_compute_ctx;
+struct pvr_compute_pipeline;
+struct pvr_free_list;
+struct pvr_graphics_pipeline;
+struct pvr_instance;
+struct pvr_render_ctx;
+struct rogue_compiler;
+
+struct pvr_descriptor_limits {
+ uint32_t max_per_stage_resources;
+ uint32_t max_per_stage_samplers;
+ uint32_t max_per_stage_uniform_buffers;
+ uint32_t max_per_stage_storage_buffers;
+ uint32_t max_per_stage_sampled_images;
+ uint32_t max_per_stage_storage_images;
+ uint32_t max_per_stage_input_attachments;
+};
+
+struct pvr_physical_device {
+ struct vk_physical_device vk;
+
+ /* Back-pointer to instance */
+ struct pvr_instance *instance;
+
+ char *name;
+ int master_fd;
+ int render_fd;
+ char *master_path;
+ char *render_path;
+
+ struct pvr_winsys *ws;
+ struct pvr_device_info dev_info;
+
+ VkPhysicalDeviceMemoryProperties memory;
+
+ uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
+
+ struct wsi_device wsi_device;
+
+ struct rogue_compiler *compiler;
+};
+
+struct pvr_instance {
+ struct vk_instance vk;
+
+ int physical_devices_count;
+ struct pvr_physical_device physical_device;
+};
+
+struct pvr_queue {
+ struct vk_queue vk;
+
+ struct pvr_device *device;
+
+ struct pvr_render_ctx *gfx_ctx;
+ struct pvr_compute_ctx *compute_ctx;
+
+ struct pvr_winsys_syncobj *completion[PVR_JOB_TYPE_MAX];
+};
+
+struct pvr_semaphore {
+ struct vk_object_base base;
+
+ struct pvr_winsys_syncobj *syncobj;
+};
+
+struct pvr_fence {
+ struct vk_object_base base;
+
+ struct pvr_winsys_syncobj *syncobj;
+};
+
+struct pvr_vertex_binding {
+ struct pvr_buffer *buffer;
+ VkDeviceSize offset;
+};
+
+struct pvr_pds_upload {
+ struct pvr_bo *pvr_bo;
+ /* Offset from the pds heap base address. */
+ uint32_t data_offset;
+ /* Offset from the pds heap base address. */
+ uint32_t code_offset;
+
+ /* data_size + code_size = program_size. */
+ uint32_t data_size;
+ uint32_t code_size;
+};
+
+struct pvr_device {
+ struct vk_device vk;
+ struct pvr_instance *instance;
+ struct pvr_physical_device *pdevice;
+
+ int master_fd;
+ int render_fd;
+
+ struct pvr_winsys *ws;
+ struct pvr_winsys_heaps heaps;
+
+ struct pvr_free_list *global_free_list;
+
+ struct pvr_queue *queues;
+ uint32_t queue_count;
+
+ /* Running count of the number of job submissions across all queue. */
+ uint32_t global_queue_job_count;
+
+ /* Running count of the number of presentations across all queues. */
+ uint32_t global_queue_present_count;
+
+ uint32_t pixel_event_data_size_in_dwords;
+
+ struct pvr_pds_upload pds_compute_fence_program;
+
+ VkPhysicalDeviceFeatures features;
+};
+
+struct pvr_device_memory {
+ struct vk_object_base base;
+ struct pvr_winsys_bo *bo;
+};
+
+struct pvr_mip_level {
+ /* Offset of the mip level in bytes */
+ uint32_t offset;
+
+ /* Aligned mip level size in bytes */
+ uint32_t size;
+
+ /* Aligned row length in bytes */
+ uint32_t pitch;
+
+ /* Aligned height in bytes */
+ uint32_t height_pitch;
+};
+
+struct pvr_image {
+ struct vk_image vk;
+
+ /* vma this image is bound to */
+ struct pvr_winsys_vma *vma;
+
+ /* Device address the image is mapped to in device virtual address space */
+ pvr_dev_addr_t dev_addr;
+
+ /* Derived and other state */
+ VkExtent3D physical_extent;
+ enum pvr_memlayout memlayout;
+ VkDeviceSize layer_size;
+ VkDeviceSize size;
+
+ VkDeviceSize alignment;
+
+ struct pvr_mip_level mip_levels[14];
+};
+
+struct pvr_buffer {
+ struct vk_object_base base;
+
+ /* Saved information from pCreateInfo */
+ VkDeviceSize size;
+
+ /* Derived and other state */
+ uint32_t alignment;
+ /* vma this buffer is bound to */
+ struct pvr_winsys_vma *vma;
+ /* Device address the buffer is mapped to in device virtual address space */
+ pvr_dev_addr_t dev_addr;
+};
+
+struct pvr_image_view {
+ struct vk_image_view vk;
+
+ /* Saved information from pCreateInfo. */
+ const struct pvr_image *image;
+
+ /* Prepacked Texture Image dword 0 and 1. It will be copied to the
+ * descriptor info during pvr_UpdateDescriptorSets.
+ *
+ * We create separate texture states for sampling, storage and input
+ * attachment cases.
+ */
+ uint64_t texture_state[PVR_TEXTURE_STATE_MAX_ENUM][2];
+};
+
+struct pvr_sampler {
+ struct vk_object_base base;
+};
+
+struct pvr_descriptor_size_info {
+ /* Non-spillable size for storage in the common store. */
+ uint32_t primary;
+
+ /* Spillable size to accommodate limitation of the common store. */
+ uint32_t secondary;
+
+ uint32_t alignment;
+};
+
+struct pvr_descriptor_set_layout_binding {
+ VkDescriptorType type;
+
+ /* "M" in layout(set = N, binding = M)
+ * Can be used to index bindings in the descriptor_set_layout. Not the
+ * original user specified binding number as those might be non-contiguous.
+ */
+ uint32_t binding_number;
+
+ uint32_t descriptor_count;
+
+ /* Index into the flattened descriptor set */
+ uint16_t descriptor_index;
+
+ VkShaderStageFlags shader_stages;
+ /* Mask composed by shifted PVR_STAGE_ALLOCATION_...
+ * Makes it easier to check active shader stages by just shifting and
+ * ANDing instead of using VkShaderStageFlags and match the PVR_STAGE_...
+ */
+ uint32_t shader_stage_mask;
+
+ struct {
+ uint32_t primary;
+ uint32_t secondary;
+ } per_stage_offset_in_dwords[PVR_STAGE_ALLOCATION_COUNT];
+
+ /* Index at which the samplers can be found in the descriptor_set_layout.
+ * 0 when the samplers are at index 0 or no samplers are present.
+ * Check descriptor_count to differentiate. It will be 0 for 0 samplers.
+ */
+ uint32_t immutable_samplers_index;
+};
+
+/* All sizes are in dwords. */
+struct pvr_descriptor_set_layout_mem_layout {
+ uint32_t primary_offset;
+ uint32_t primary_size;
+
+ uint32_t secondary_offset;
+ uint32_t secondary_size;
+
+ uint32_t primary_dynamic_size;
+ uint32_t secondary_dynamic_size;
+};
+
+struct pvr_descriptor_set_layout {
+ struct vk_object_base base;
+
+ /* Total amount of descriptors contained in this set. */
+ uint32_t descriptor_count;
+
+ /* Count of dynamic buffers. */
+ uint32_t dynamic_buffer_count;
+
+ uint32_t binding_count;
+ struct pvr_descriptor_set_layout_binding *bindings;
+
+ uint32_t immutable_sampler_count;
+ struct pvr_sampler **immutable_samplers;
+
+ /* Shader stages requiring access to descriptors in this set. */
+ VkShaderStageFlags shader_stages;
+
+ /* Count of each VkDescriptorType per shader stage. Dynamically allocated
+ * arrays per stage as to not hard code the max descriptor type here.
+ *
+ * Note: when adding a new type, it might not numerically follow the
+ * previous type so a sparse array will be created. You might want to
+ * readjust how these arrays are created and accessed.
+ */
+ uint32_t *per_stage_descriptor_count[PVR_STAGE_ALLOCATION_COUNT];
+
+ uint32_t total_size_in_dwords;
+ struct pvr_descriptor_set_layout_mem_layout
+ memory_layout_in_dwords_per_stage[PVR_STAGE_ALLOCATION_COUNT];
+};
+
+struct pvr_descriptor_pool {
+ struct vk_object_base base;
+
+ VkAllocationCallbacks alloc;
+
+ /* Saved information from pCreateInfo. */
+ uint32_t max_sets;
+
+ uint32_t total_size_in_dwords;
+ uint32_t current_size_in_dwords;
+
+ /* Derived and other state. */
+ /* List of the descriptor sets created using this pool. */
+ struct list_head descriptor_sets;
+};
+
+struct pvr_descriptor {
+ VkDescriptorType type;
+
+ /* TODO: Follow anv_descriptor layout when adding support for
+ * other descriptor types.
+ */
+ pvr_dev_addr_t buffer_dev_addr;
+ VkDeviceSize buffer_desc_range;
+ VkDeviceSize buffer_create_info_size;
+};
+
+struct pvr_descriptor_set {
+ struct vk_object_base base;
+
+ const struct pvr_descriptor_set_layout *layout;
+ const struct pvr_descriptor_pool *pool;
+
+ struct pvr_bo *pvr_bo;
+
+ /* Links this descriptor set into pvr_descriptor_pool::descriptor_sets list.
+ */
+ struct list_head link;
+
+ /* Array of size layout::descriptor_count. */
+ struct pvr_descriptor descriptors[0];
+};
+
+struct pvr_descriptor_state {
+ struct pvr_descriptor_set *descriptor_sets[PVR_MAX_DESCRIPTOR_SETS];
+ uint32_t valid_mask;
+};
+
+struct pvr_transfer_cmd {
+ /* Node to link this cmd into the transfer_cmds list in
+ * pvr_sub_cmd::transfer structure.
+ */
+ struct list_head link;
+
+ struct pvr_buffer *src;
+ struct pvr_buffer *dst;
+ uint32_t region_count;
+ VkBufferCopy2 regions[0];
+};
+
+struct pvr_sub_cmd {
+ /* This links the subcommand in pvr_cmd_buffer:sub_cmds list. */
+ struct list_head link;
+
+ enum pvr_sub_cmd_type type;
+
+ union {
+ struct {
+ const struct pvr_framebuffer *framebuffer;
+
+ struct pvr_render_job job;
+
+ struct pvr_bo *depth_bias_bo;
+ struct pvr_bo *scissor_bo;
+
+ /* Tracking how the loaded depth/stencil values are being used. */
+ enum pvr_depth_stencil_usage depth_usage;
+ enum pvr_depth_stencil_usage stencil_usage;
+
+ /* Tracking whether the subcommand modifies depth/stencil. */
+ bool modifies_depth;
+ bool modifies_stencil;
+
+ /* Control stream builder object */
+ struct pvr_csb control_stream;
+
+ uint32_t hw_render_idx;
+
+ uint32_t max_tiles_in_flight;
+
+ bool empty_cmd;
+
+ /* True if any fragment shader used in this sub command uses atomic
+ * operations.
+ */
+ bool frag_uses_atomic_ops;
+
+ bool disable_compute_overlap;
+
+ /* True if any fragment shader used in this sub command has side
+ * effects.
+ */
+ bool frag_has_side_effects;
+
+ /* True if any vertex shader used in this sub command contains both
+ * texture reads and texture writes.
+ */
+ bool vertex_uses_texture_rw;
+
+ /* True if any fragment shader used in this sub command contains
+ * both texture reads and texture writes.
+ */
+ bool frag_uses_texture_rw;
+ } gfx;
+
+ struct {
+ /* Control stream builder object. */
+ struct pvr_csb control_stream;
+
+ struct pvr_winsys_compute_submit_info submit_info;
+
+ uint32_t num_shared_regs;
+
+ /* True if any shader used in this sub command uses atomic
+ * operations.
+ */
+ bool uses_atomic_ops;
+
+ bool uses_barrier;
+ } compute;
+
+ struct {
+ /* List of pvr_transfer_cmd type structures. */
+ struct list_head transfer_cmds;
+ } transfer;
+ };
+};
+
+struct pvr_render_pass_info {
+ const struct pvr_render_pass *pass;
+ struct pvr_framebuffer *framebuffer;
+
+ struct pvr_image_view **attachments;
+
+ uint32_t subpass_idx;
+ uint32_t current_hw_subpass;
+
+ VkRect2D render_area;
+
+ uint32_t clear_value_count;
+ VkClearValue *clear_values;
+
+ VkPipelineBindPoint pipeline_bind_point;
+
+ bool process_empty_tiles;
+ bool enable_bg_tag;
+ uint32_t userpass_spawn;
+
+ /* Have we had to scissor a depth/stencil clear because render area was not
+ * tile aligned?
+ */
+ bool scissor_ds_clear;
+};
+
+struct pvr_emit_state {
+ bool ppp_control : 1;
+ bool isp : 1;
+ bool isp_fb : 1;
+ bool isp_ba : 1;
+ bool isp_bb : 1;
+ bool isp_dbsc : 1;
+ bool pds_fragment_stateptr0 : 1;
+ bool pds_fragment_stateptr1 : 1;
+ bool pds_fragment_stateptr2 : 1;
+ bool pds_fragment_stateptr3 : 1;
+ bool region_clip : 1;
+ bool viewport : 1;
+ bool wclamp : 1;
+ bool output_selects : 1;
+ bool varying_word0 : 1;
+ bool varying_word1 : 1;
+ bool varying_word2 : 1;
+ bool stream_out : 1;
+};
+
+struct pvr_ppp_state {
+ uint32_t header;
+
+ struct {
+ /* TODO: Can we get rid of the "control" field? */
+ struct pvr_cmd_struct(TA_STATE_ISPCTL) control_struct;
+ uint32_t control;
+
+ uint32_t front_a;
+ uint32_t front_b;
+ uint32_t back_a;
+ uint32_t back_b;
+ } isp;
+
+ struct {
+ uint16_t scissor_index;
+ uint16_t depthbias_index;
+ } depthbias_scissor_indices;
+
+ struct {
+ uint32_t pixel_shader_base;
+ uint32_t texture_uniform_code_base;
+ uint32_t size_info1;
+ uint32_t size_info2;
+ uint32_t varying_base;
+ uint32_t texture_state_data_base;
+ uint32_t uniform_state_data_base;
+ } pds;
+
+ struct {
+ uint32_t word0;
+ uint32_t word1;
+ } region_clipping;
+
+ struct {
+ uint32_t a0;
+ uint32_t m0;
+ uint32_t a1;
+ uint32_t m1;
+ uint32_t a2;
+ uint32_t m2;
+ } viewports[PVR_MAX_VIEWPORTS];
+
+ uint32_t viewport_count;
+
+ uint32_t output_selects;
+
+ uint32_t varying_word[2];
+
+ uint32_t ppp_control;
+};
+
+#define PVR_DYNAMIC_STATE_BIT_VIEWPORT BITFIELD_BIT(0U)
+#define PVR_DYNAMIC_STATE_BIT_SCISSOR BITFIELD_BIT(1U)
+#define PVR_DYNAMIC_STATE_BIT_LINE_WIDTH BITFIELD_BIT(2U)
+#define PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS BITFIELD_BIT(3U)
+#define PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK BITFIELD_BIT(4U)
+#define PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK BITFIELD_BIT(5U)
+#define PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE BITFIELD_BIT(6U)
+#define PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS BITFIELD_BIT(7U)
+
+#define PVR_DYNAMIC_STATE_ALL_BITS \
+ ((PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS << 1U) - 1U)
+
+struct pvr_dynamic_state {
+ /* Identifies which pipeline state is static or dynamic.
+ * To test for dynamic: & PVR_STATE_BITS_...
+ */
+ uint32_t mask;
+
+ struct {
+ /* TODO: fixme in the original code - figure out what. */
+ uint32_t count;
+ VkViewport viewports[PVR_MAX_VIEWPORTS];
+ } viewport;
+
+ struct {
+ /* TODO: fixme in the original code - figure out what. */
+ uint32_t count;
+ VkRect2D scissors[PVR_MAX_VIEWPORTS];
+ } scissor;
+
+ /* Saved information from pCreateInfo. */
+ float line_width;
+
+ struct {
+ /* Saved information from pCreateInfo. */
+ float constant_factor;
+ float clamp;
+ float slope_factor;
+ } depth_bias;
+ float blend_constants[4];
+ struct {
+ uint32_t front;
+ uint32_t back;
+ } compare_mask;
+ struct {
+ uint32_t front;
+ uint32_t back;
+ } write_mask;
+ struct {
+ uint32_t front;
+ uint32_t back;
+ } reference;
+};
+
+struct pvr_cmd_buffer_draw_state {
+ uint32_t base_instance;
+ uint32_t base_vertex;
+ bool draw_indirect;
+ bool draw_indexed;
+};
+
+struct pvr_cmd_buffer_state {
+ VkResult status;
+
+ /* Pipeline binding. */
+ const struct pvr_graphics_pipeline *gfx_pipeline;
+
+ const struct pvr_compute_pipeline *compute_pipeline;
+
+ struct pvr_render_pass_info render_pass_info;
+
+ struct pvr_sub_cmd *current_sub_cmd;
+
+ struct pvr_ppp_state ppp_state;
+
+ union {
+ struct pvr_emit_state emit_state;
+ /* This is intended to allow setting and clearing of all bits. This
+ * shouldn't be used to access specific bits of ppp_state.
+ */
+ uint32_t emit_state_bits;
+ };
+
+ struct {
+ /* FIXME: Check if we need a dirty state flag for the given scissor
+ * accumulation state.
+ * Check whether these members should be moved in the top level struct
+ * and this struct replaces with just pvr_dynamic_state "dynamic".
+ */
+ enum pvr_scissor_accum_state scissor_accum_state;
+ VkRect2D scissor_accum_bounds;
+
+ struct pvr_dynamic_state common;
+ } dynamic;
+
+ struct pvr_vertex_binding vertex_bindings[PVR_MAX_VERTEX_INPUT_BINDINGS];
+
+ struct {
+ struct pvr_buffer *buffer;
+ VkDeviceSize offset;
+ VkIndexType type;
+ } index_buffer_binding;
+
+ struct {
+ uint8_t data[PVR_MAX_PUSH_CONSTANTS_SIZE];
+ VkShaderStageFlags dirty_stages;
+ } push_constants;
+
+ /* Array size of barriers_needed is based on number of sync pipeline
+ * stages.
+ */
+ uint32_t barriers_needed[4];
+
+ struct pvr_descriptor_state gfx_desc_state;
+ struct pvr_descriptor_state compute_desc_state;
+
+ VkFormat depth_format;
+
+ struct {
+ bool viewport : 1;
+ bool scissor : 1;
+
+ bool compute_pipeline_binding : 1;
+ bool compute_desc_dirty : 1;
+
+ bool gfx_pipeline_binding : 1;
+ bool gfx_desc_dirty : 1;
+
+ bool vertex_bindings : 1;
+ bool index_buffer_binding : 1;
+ bool vertex_descriptors : 1;
+ bool fragment_descriptors : 1;
+
+ bool line_width : 1;
+
+ bool depth_bias : 1;
+
+ bool blend_constants : 1;
+
+ bool compare_mask : 1;
+ bool write_mask : 1;
+ bool reference : 1;
+
+ bool userpass_spawn : 1;
+
+ /* Some draw state needs to be tracked for changes between draw calls
+ * i.e. if we get a draw with baseInstance=0, followed by a call with
+ * baseInstance=1 that needs to cause us to select a different PDS
+ * attrib program and update the BASE_INSTANCE PDS const. If only
+ * baseInstance changes then we just have to update the data section.
+ */
+ bool draw_base_instance : 1;
+ bool draw_variant : 1;
+ } dirty;
+
+ struct pvr_cmd_buffer_draw_state draw_state;
+
+ struct {
+ uint32_t code_offset;
+ const struct pvr_pds_info *info;
+ } pds_shader;
+
+ uint32_t max_shared_regs;
+
+ /* Address of data segment for vertex attrib upload program. */
+ uint32_t pds_vertex_attrib_offset;
+
+ uint32_t pds_fragment_uniform_data_offset;
+};
+
+static_assert(
+ sizeof(((struct pvr_cmd_buffer_state *)(0))->emit_state) <=
+ sizeof(((struct pvr_cmd_buffer_state *)(0))->emit_state_bits),
+ "Size of emit_state_bits must be greater that or equal to emit_state.");
+
+struct pvr_cmd_buffer {
+ struct vk_command_buffer vk;
+
+ struct pvr_device *device;
+
+ /* Buffer status, invalid/initial/recording/executable */
+ enum pvr_cmd_buffer_status status;
+
+ /* Buffer usage flags */
+ VkCommandBufferUsageFlags usage_flags;
+
+ struct util_dynarray depth_bias_array;
+
+ struct util_dynarray scissor_array;
+ uint32_t scissor_words[2];
+
+ struct pvr_cmd_buffer_state state;
+
+ /* List of pvr_bo structs associated with this cmd buffer. */
+ struct list_head bo_list;
+
+ struct list_head sub_cmds;
+};
+
+struct pvr_pipeline_layout {
+ struct vk_object_base base;
+
+ uint32_t set_count;
+ /* Contains set_count amount of descriptor set layouts. */
+ struct pvr_descriptor_set_layout *set_layout[PVR_MAX_DESCRIPTOR_SETS];
+
+ VkShaderStageFlags push_constants_shader_stages;
+
+ VkShaderStageFlags shader_stages;
+
+ /* Per stage masks indicating which set in the layout contains any
+ * descriptor of the appropriate types: VK..._{SAMPLER, SAMPLED_IMAGE,
+ * UNIFORM_TEXEL_BUFFER, UNIFORM_BUFFER, STORAGE_BUFFER}.
+ * Shift by the set's number to check the mask (1U << set_num).
+ */
+ uint32_t per_stage_descriptor_masks[PVR_STAGE_ALLOCATION_COUNT];
+
+ /* Array of descriptor offsets at which the set's descriptors' start, per
+ * stage, within all the sets in the pipeline layout per descriptor type.
+ * Note that we only store into for specific descriptor types
+ * VK_DESCRIPTOR_TYPE_{SAMPLER, SAMPLED_IMAGE, UNIFORM_TEXEL_BUFFER,
+ * UNIFORM_BUFFER, STORAGE_BUFFER}, the rest will be 0.
+ */
+ uint32_t
+ descriptor_offsets[PVR_MAX_DESCRIPTOR_SETS][PVR_STAGE_ALLOCATION_COUNT]
+ [PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT];
+
+ /* There is no accounting for dynamics in here. They will be garbage values.
+ */
+ struct pvr_descriptor_set_layout_mem_layout
+ register_layout_in_dwords_per_stage[PVR_STAGE_ALLOCATION_COUNT]
+ [PVR_MAX_DESCRIPTOR_SETS];
+
+ /* All sizes in dwords. */
+ struct pvr_pipeline_layout_reg_info {
+ uint32_t primary_dynamic_size_in_dwords;
+ uint32_t secondary_dynamic_size_in_dwords;
+ } per_stage_reg_info[PVR_STAGE_ALLOCATION_COUNT];
+};
+
+struct pvr_pipeline_cache {
+ struct vk_object_base base;
+
+ struct pvr_device *device;
+};
+
+struct pvr_stage_allocation_uniform_state {
+ struct pvr_pds_upload pds_code;
+ /* Since we upload the code segment separately from the data segment
+ * pds_code->data_size might be 0 whilst
+ * pds_info->data_size_in_dwords might be >0 in the case of this struct
+ * referring to the code upload.
+ */
+ struct pvr_pds_info pds_info;
+};
+
+struct pvr_pds_attrib_program {
+ struct pvr_pds_info info;
+ /* The uploaded PDS program stored here only contains the code segment,
+ * meaning the data size will be 0, unlike the data size stored in the
+ * 'info' member above.
+ */
+ struct pvr_pds_upload program;
+};
+
+struct pvr_pipeline_stage_state {
+ uint32_t const_shared_reg_count;
+ uint32_t const_shared_reg_offset;
+ uint32_t temps_count;
+
+ uint32_t coefficient_size;
+
+ /* True if this shader uses any atomic operations. */
+ bool uses_atomic_ops;
+
+ /* True if this shader uses both texture reads and texture writes. */
+ bool uses_texture_rw;
+
+ /* Only used for compute stage. */
+ bool uses_barrier;
+
+ /* True if this shader has side effects */
+ bool has_side_effects;
+
+ /* True if this shader is simply a nop.end. */
+ bool empty_program;
+};
+
+struct pvr_vertex_shader_state {
+ /* Pointer to a buffer object that contains the shader binary. */
+ struct pvr_bo *bo;
+ uint32_t entry_offset;
+
+ /* 2 since we only need STATE_VARYING{0,1} state words. */
+ uint32_t varying[2];
+
+ struct pvr_pds_attrib_program
+ pds_attrib_programs[PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT];
+
+ struct pvr_pipeline_stage_state stage_state;
+ /* FIXME: Move this into stage_state? */
+ struct pvr_stage_allocation_uniform_state uniform_state;
+ uint32_t vertex_input_size;
+ uint32_t vertex_output_size;
+ uint32_t output_selects;
+ uint32_t user_clip_planes_mask;
+};
+
+struct pvr_fragment_shader_state {
+ /* Pointer to a buffer object that contains the shader binary. */
+ struct pvr_bo *bo;
+ uint32_t entry_offset;
+
+ struct pvr_pipeline_stage_state stage_state;
+ /* FIXME: Move this into stage_state? */
+ struct pvr_stage_allocation_uniform_state uniform_state;
+ uint32_t pass_type;
+
+ struct pvr_pds_upload pds_coeff_program;
+ struct pvr_pds_upload pds_fragment_program;
+};
+
+struct pvr_pipeline {
+ struct vk_object_base base;
+
+ enum pvr_pipeline_type type;
+
+ /* Saved information from pCreateInfo. */
+ struct pvr_pipeline_layout *layout;
+};
+
+struct pvr_compute_pipeline {
+ struct pvr_pipeline base;
+
+ struct {
+ /* Pointer to a buffer object that contains the shader binary. */
+ struct pvr_bo *bo;
+
+ struct {
+ uint32_t base_workgroup : 1;
+ } flags;
+
+ struct pvr_stage_allocation_uniform_state uniform;
+
+ struct pvr_pds_upload primary_program;
+ struct pvr_pds_info primary_program_info;
+
+ struct pvr_pds_upload primary_program_base_workgroup_variant;
+ struct pvr_pds_info primary_program_base_workgroup_variant_info;
+ /* Offset within the PDS data section at which the base workgroup id
+ * resides.
+ */
+ uint32_t base_workgroup_ids_dword_offset;
+ } state;
+};
+
+struct pvr_graphics_pipeline {
+ struct pvr_pipeline base;
+
+ VkSampleCountFlagBits rasterization_samples;
+ struct pvr_raster_state {
+ /* Derived and other state. */
+ /* Indicates whether primitives are discarded immediately before the
+ * rasterization stage.
+ */
+ bool discard_enable;
+ VkCullModeFlags cull_mode;
+ VkFrontFace front_face;
+ bool depth_bias_enable;
+ bool depth_clamp_enable;
+ } raster_state;
+ struct {
+ VkPrimitiveTopology topology;
+ bool primitive_restart;
+ } input_asm_state;
+ uint32_t sample_mask;
+
+ struct pvr_dynamic_state dynamic_state;
+
+ VkCompareOp depth_compare_op;
+ bool depth_write_disable;
+
+ struct {
+ VkCompareOp compare_op;
+ /* SOP1 */
+ VkStencilOp fail_op;
+ /* SOP2 */
+ VkStencilOp depth_fail_op;
+ /* SOP3 */
+ VkStencilOp pass_op;
+ } stencil_front, stencil_back;
+
+ /* Derived and other state */
+ size_t stage_indices[MESA_SHADER_FRAGMENT + 1];
+
+ struct pvr_vertex_shader_state vertex_shader_state;
+ struct pvr_fragment_shader_state fragment_shader_state;
+};
+
+struct pvr_render_target {
+ struct pvr_rt_dataset *rt_dataset;
+
+ pthread_mutex_t mutex;
+
+ bool valid;
+};
+
+struct pvr_framebuffer {
+ struct vk_object_base base;
+
+ /* Saved information from pCreateInfo. */
+ uint32_t width;
+ uint32_t height;
+ uint32_t layers;
+
+ uint32_t attachment_count;
+ struct pvr_image_view **attachments;
+
+ /* Derived and other state. */
+ struct pvr_bo *ppp_state_bo;
+ /* PPP state size in dwords. */
+ size_t ppp_state_size;
+
+ uint32_t render_targets_count;
+ struct pvr_render_target *render_targets;
+};
+
+struct pvr_render_pass_attachment {
+ /* Saved information from pCreateInfo. */
+ VkAttachmentLoadOp load_op;
+
+ VkAttachmentStoreOp store_op;
+
+ VkAttachmentLoadOp stencil_load_op;
+
+ VkAttachmentStoreOp stencil_store_op;
+
+ VkFormat vk_format;
+ uint32_t sample_count;
+ VkImageLayout initial_layout;
+
+ /* Derived and other state. */
+ /* True if the attachment format includes a stencil component. */
+ bool has_stencil;
+
+ /* Can this surface be resolved by the PBE. */
+ bool is_pbe_downscalable;
+
+ uint32_t index;
+};
+
+struct pvr_render_subpass {
+ /* Saved information from pCreateInfo. */
+ /* The number of samples per color attachment (or depth attachment if
+ * z-only).
+ */
+ /* FIXME: rename to 'samples' to match struct pvr_image */
+ uint32_t sample_count;
+
+ uint32_t color_count;
+ uint32_t *color_attachments;
+ uint32_t *resolve_attachments;
+
+ uint32_t input_count;
+ uint32_t *input_attachments;
+
+ uint32_t *depth_stencil_attachment;
+
+ /* Derived and other state. */
+ uint32_t dep_count;
+ uint32_t *dep_list;
+
+ /* Array with dep_count elements. flush_on_dep[x] is true if this subpass
+ * and the subpass dep_list[x] can't be in the same hardware render.
+ */
+ bool *flush_on_dep;
+
+ uint32_t index;
+
+ uint32_t userpass_spawn;
+
+ VkPipelineBindPoint pipeline_bind_point;
+};
+
+struct pvr_render_pass {
+ struct vk_object_base base;
+
+ /* Saved information from pCreateInfo. */
+ uint32_t attachment_count;
+
+ struct pvr_render_pass_attachment *attachments;
+
+ uint32_t subpass_count;
+
+ struct pvr_render_subpass *subpasses;
+
+ struct pvr_renderpass_hwsetup *hw_setup;
+
+ /* Derived and other state. */
+ /* FIXME: rename to 'max_samples' as we use 'samples' elsewhere */
+ uint32_t max_sample_count;
+
+ /* The maximum number of tile buffers to use in any subpass. */
+ uint32_t max_tilebuffer_count;
+};
+
+struct pvr_load_op {
+ bool is_hw_object;
+
+ uint32_t clear_mask;
+
+ struct pvr_bo *usc_frag_prog_bo;
+ uint32_t const_shareds_count;
+ uint32_t shareds_dest_offset;
+ uint32_t shareds_count;
+
+ struct pvr_pds_upload pds_frag_prog;
+
+ struct pvr_pds_upload pds_tex_state_prog;
+ uint32_t temps_count;
+};
+
+VkResult pvr_wsi_init(struct pvr_physical_device *pdevice);
+void pvr_wsi_finish(struct pvr_physical_device *pdevice);
+
+VkResult pvr_queues_create(struct pvr_device *device,
+ const VkDeviceCreateInfo *pCreateInfo);
+void pvr_queues_destroy(struct pvr_device *device);
+
+VkResult pvr_bind_memory(struct pvr_device *device,
+ struct pvr_device_memory *mem,
+ VkDeviceSize offset,
+ VkDeviceSize size,
+ VkDeviceSize alignment,
+ struct pvr_winsys_vma **const vma_out,
+ pvr_dev_addr_t *const dev_addr_out);
+void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma);
+
+VkResult pvr_gpu_upload(struct pvr_device *device,
+ struct pvr_winsys_heap *heap,
+ const void *data,
+ size_t size,
+ uint64_t alignment,
+ struct pvr_bo **const pvr_bo_out);
+VkResult pvr_gpu_upload_pds(struct pvr_device *device,
+ const uint32_t *data,
+ uint32_t data_size_dwords,
+ uint32_t data_alignment,
+ const uint32_t *code,
+ uint32_t code_size_dwords,
+ uint32_t code_alignment,
+ uint64_t min_alignment,
+ struct pvr_pds_upload *const pds_upload_out);
+
+VkResult pvr_gpu_upload_usc(struct pvr_device *device,
+ const void *code,
+ size_t code_size,
+ uint64_t code_alignment,
+ struct pvr_bo **const pvr_bo_out);
+
+VkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer,
+ struct pvr_transfer_cmd *transfer_cmd);
+
+VkResult pvr_cmd_buffer_alloc_mem(struct pvr_cmd_buffer *cmd_buffer,
+ struct pvr_winsys_heap *heap,
+ uint64_t size,
+ uint32_t flags,
+ struct pvr_bo **const pvr_bo_out);
+
+static inline struct pvr_compute_pipeline *
+to_pvr_compute_pipeline(struct pvr_pipeline *pipeline)
+{
+ assert(pipeline->type == PVR_PIPELINE_TYPE_COMPUTE);
+ return container_of(pipeline, struct pvr_compute_pipeline, base);
+}
+
+static inline struct pvr_graphics_pipeline *
+to_pvr_graphics_pipeline(struct pvr_pipeline *pipeline)
+{
+ assert(pipeline->type == PVR_PIPELINE_TYPE_GRAPHICS);
+ return container_of(pipeline, struct pvr_graphics_pipeline, base);
+}
+
+/* FIXME: Place this in USC specific header? */
+/* clang-format off */
+static inline enum PVRX(PDSINST_DOUTU_SAMPLE_RATE)
+pvr_sample_rate_from_usc_msaa_mode(enum rogue_msaa_mode msaa_mode)
+/* clang-format on */
+{
+ switch (msaa_mode) {
+ case ROGUE_MSAA_MODE_PIXEL:
+ return PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE);
+ case ROGUE_MSAA_MODE_SELECTIVE:
+ return PVRX(PDSINST_DOUTU_SAMPLE_RATE_SELECTIVE);
+ case ROGUE_MSAA_MODE_FULL:
+ return PVRX(PDSINST_DOUTU_SAMPLE_RATE_FULL);
+ default:
+ unreachable("Undefined MSAA mode.");
+ }
+}
+
+VkResult pvr_pds_fragment_program_create_and_upload(
+ struct pvr_device *device,
+ const VkAllocationCallbacks *allocator,
+ const struct pvr_bo *fragment_shader_bo,
+ uint32_t fragment_temp_count,
+ enum rogue_msaa_mode msaa_mode,
+ bool has_phase_rate_change,
+ struct pvr_pds_upload *const pds_upload_out);
+
+#define PVR_FROM_HANDLE(__pvr_type, __name, __handle) \
+ VK_FROM_HANDLE(__pvr_type, __name, __handle)
+
+VK_DEFINE_HANDLE_CASTS(pvr_cmd_buffer,
+ vk.base,
+ VkCommandBuffer,
+ VK_OBJECT_TYPE_COMMAND_BUFFER)
+VK_DEFINE_HANDLE_CASTS(pvr_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
+VK_DEFINE_HANDLE_CASTS(pvr_instance,
+ vk.base,
+ VkInstance,
+ VK_OBJECT_TYPE_INSTANCE)
+VK_DEFINE_HANDLE_CASTS(pvr_physical_device,
+ vk.base,
+ VkPhysicalDevice,
+ VK_OBJECT_TYPE_PHYSICAL_DEVICE)
+VK_DEFINE_HANDLE_CASTS(pvr_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_device_memory,
+ base,
+ VkDeviceMemory,
+ VK_OBJECT_TYPE_DEVICE_MEMORY)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline_cache,
+ base,
+ VkPipelineCache,
+ VK_OBJECT_TYPE_PIPELINE_CACHE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_buffer, base, VkBuffer, VK_OBJECT_TYPE_BUFFER)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_image_view,
+ vk.base,
+ VkImageView,
+ VK_OBJECT_TYPE_IMAGE_VIEW)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_set_layout,
+ base,
+ VkDescriptorSetLayout,
+ VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_set,
+ base,
+ VkDescriptorSet,
+ VK_OBJECT_TYPE_DESCRIPTOR_SET)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_pool,
+ base,
+ VkDescriptorPool,
+ VK_OBJECT_TYPE_DESCRIPTOR_POOL)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_sampler,
+ base,
+ VkSampler,
+ VK_OBJECT_TYPE_SAMPLER)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_semaphore,
+ base,
+ VkSemaphore,
+ VK_OBJECT_TYPE_SEMAPHORE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_fence, base, VkFence, VK_OBJECT_TYPE_FENCE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline_layout,
+ base,
+ VkPipelineLayout,
+ VK_OBJECT_TYPE_PIPELINE_LAYOUT)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline,
+ base,
+ VkPipeline,
+ VK_OBJECT_TYPE_PIPELINE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_framebuffer,
+ base,
+ VkFramebuffer,
+ VK_OBJECT_TYPE_FRAMEBUFFER)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_render_pass,
+ base,
+ VkRenderPass,
+ VK_OBJECT_TYPE_RENDER_PASS)
+
+/**
+ * Warn on ignored extension structs.
+ *
+ * The Vulkan spec requires us to ignore unsupported or unknown structs in
+ * a pNext chain. In debug mode, emitting warnings for ignored structs may
+ * help us discover structs that we should not have ignored.
+ *
+ *
+ * From the Vulkan 1.0.38 spec:
+ *
+ * Any component of the implementation (the loader, any enabled layers,
+ * and drivers) must skip over, without processing (other than reading the
+ * sType and pNext members) any chained structures with sType values not
+ * defined by extensions supported by that component.
+ */
+#define pvr_debug_ignored_stype(sType) \
+ mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
+
+/* Debug helper macros. */
+#define PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer) \
+ do { \
+ struct pvr_cmd_buffer *const _cmd_buffer = (cmd_buffer); \
+ if (_cmd_buffer->status != PVR_CMD_BUFFER_STATUS_RECORDING) { \
+ vk_errorf(_cmd_buffer, \
+ VK_ERROR_OUT_OF_DEVICE_MEMORY, \
+ "Command buffer is not in recording state"); \
+ return; \
+ } else if (_cmd_buffer->state.status < VK_SUCCESS) { \
+ vk_errorf(_cmd_buffer, \
+ _cmd_buffer->state.status, \
+ "Skipping function as command buffer has " \
+ "previous build error"); \
+ return; \
+ } \
+ } while (0)
+
+/**
+ * Print a FINISHME message, including its source location.
+ */
+#define pvr_finishme(format, ...) \
+ do { \
+ static bool reported = false; \
+ if (!reported) { \
+ mesa_logw("%s:%d: FINISHME: " format, \
+ __FILE__, \
+ __LINE__, \
+ ##__VA_ARGS__); \
+ reported = true; \
+ } \
+ } while (false)
+
+/* A non-fatal assert. Useful for debugging. */
+#ifdef DEBUG
+# define pvr_assert(x) \
+ ({ \
+ if (unlikely(!(x))) \
+ mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
+ })
+#else
+# define pvr_assert(x)
+#endif
+
+#endif /* PVR_PRIVATE_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pvr_private.h"
+
+VkResult pvr_CreateQueryPool(VkDevice _device,
+ const VkQueryPoolCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkQueryPool *pQueryPool)
+{
+ assert(!"Unimplemented");
+ return VK_SUCCESS;
+}
+
+void pvr_DestroyQueryPool(VkDevice _device,
+ VkQueryPool queryPool,
+ const VkAllocationCallbacks *pAllocator)
+{
+ assert(!"Unimplemented");
+}
+
+VkResult pvr_GetQueryPoolResults(VkDevice _device,
+ VkQueryPool queryPool,
+ uint32_t firstQuery,
+ uint32_t queryCount,
+ size_t dataSize,
+ void *pData,
+ VkDeviceSize stride,
+ VkQueryResultFlags flags)
+{
+ assert(!"Unimplemented");
+ return VK_SUCCESS;
+}
+
+void pvr_CmdResetQueryPool(VkCommandBuffer commandBuffer,
+ VkQueryPool queryPool,
+ uint32_t firstQuery,
+ uint32_t queryCount)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
+ VkQueryPool queryPool,
+ uint32_t firstQuery,
+ uint32_t queryCount,
+ VkBuffer dstBuffer,
+ VkDeviceSize dstOffset,
+ VkDeviceSize stride,
+ VkQueryResultFlags flags)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer,
+ VkQueryPool queryPool,
+ uint32_t query,
+ VkQueryControlFlags flags)
+{
+ assert(!"Unimplemented");
+}
+
+void pvr_CmdEndQuery(VkCommandBuffer commandBuffer,
+ VkQueryPool queryPool,
+ uint32_t query)
+{
+ assert(!"Unimplemented");
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on radv driver which is:
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * This file implements VkQueue, VkFence, and VkSemaphore
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_job_compute.h"
+#include "pvr_job_context.h"
+#include "pvr_job_render.h"
+#include "pvr_limits.h"
+#include "pvr_private.h"
+#include "util/macros.h"
+#include "util/u_atomic.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+#include "vk_object.h"
+#include "vk_queue.h"
+#include "vk_util.h"
+
+static VkResult pvr_queue_init(struct pvr_device *device,
+ struct pvr_queue *queue,
+ const VkDeviceQueueCreateInfo *pCreateInfo,
+ uint32_t index_in_family)
+{
+ struct pvr_compute_ctx *compute_ctx;
+ struct pvr_render_ctx *gfx_ctx;
+ VkResult result;
+
+ result =
+ vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pvr_compute_ctx_create(device,
+ PVR_WINSYS_CTX_PRIORITY_MEDIUM,
+ &compute_ctx);
+ if (result != VK_SUCCESS)
+ goto err_vk_queue_finish;
+
+ result =
+ pvr_render_ctx_create(device, PVR_WINSYS_CTX_PRIORITY_MEDIUM, &gfx_ctx);
+ if (result != VK_SUCCESS)
+ goto err_compute_ctx_destroy;
+
+ queue->device = device;
+ queue->gfx_ctx = gfx_ctx;
+ queue->compute_ctx = compute_ctx;
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++)
+ queue->completion[i] = NULL;
+
+ return VK_SUCCESS;
+
+err_compute_ctx_destroy:
+ pvr_compute_ctx_destroy(compute_ctx);
+
+err_vk_queue_finish:
+ vk_queue_finish(&queue->vk);
+
+ return result;
+}
+
+VkResult pvr_queues_create(struct pvr_device *device,
+ const VkDeviceCreateInfo *pCreateInfo)
+{
+ VkResult result;
+
+ /* Check requested queue families and queues */
+ assert(pCreateInfo->queueCreateInfoCount == 1);
+ assert(pCreateInfo->pQueueCreateInfos[0].queueFamilyIndex == 0);
+ assert(pCreateInfo->pQueueCreateInfos[0].queueCount <= PVR_MAX_QUEUES);
+
+ const VkDeviceQueueCreateInfo *queue_create = queue_create =
+ &pCreateInfo->pQueueCreateInfos[0];
+
+ device->queues = vk_alloc(&device->vk.alloc,
+ queue_create->queueCount * sizeof(*device->queues),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!device->queues)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ device->queue_count = 0;
+
+ for (uint32_t i = 0; i < queue_create->queueCount; i++) {
+ result = pvr_queue_init(device, &device->queues[i], queue_create, i);
+ if (result != VK_SUCCESS)
+ goto err_queues_finish;
+
+ device->queue_count++;
+ }
+
+ return VK_SUCCESS;
+
+err_queues_finish:
+ pvr_queues_destroy(device);
+ return result;
+}
+
+static void pvr_queue_finish(struct pvr_queue *queue)
+{
+ for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++) {
+ if (queue->completion[i])
+ queue->device->ws->ops->syncobj_destroy(queue->completion[i]);
+ }
+
+ pvr_render_ctx_destroy(queue->gfx_ctx);
+ pvr_compute_ctx_destroy(queue->compute_ctx);
+
+ vk_queue_finish(&queue->vk);
+}
+
+void pvr_queues_destroy(struct pvr_device *device)
+{
+ for (uint32_t q_idx = 0; q_idx < device->queue_count; q_idx++)
+ pvr_queue_finish(&device->queues[q_idx]);
+
+ vk_free(&device->vk.alloc, device->queues);
+}
+
+VkResult pvr_QueueWaitIdle(VkQueue _queue)
+{
+ PVR_FROM_HANDLE(pvr_queue, queue, _queue);
+
+ return queue->device->ws->ops->syncobjs_wait(queue->device->ws,
+ queue->completion,
+ ARRAY_SIZE(queue->completion),
+ true,
+ UINT64_MAX);
+}
+
+VkResult pvr_CreateFence(VkDevice _device,
+ const VkFenceCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkFence *pFence)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ struct pvr_fence *fence;
+ VkResult result;
+
+ fence = vk_object_alloc(&device->vk,
+ pAllocator,
+ sizeof(*fence),
+ VK_OBJECT_TYPE_FENCE);
+ if (!fence)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* We don't really need to create a syncobj here unless it's a signaled
+ * fence.
+ */
+ if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
+ result =
+ device->ws->ops->syncobj_create(device->ws, true, &fence->syncobj);
+ if (result != VK_SUCCESS) {
+ vk_object_free(&device->vk, pAllocator, fence);
+ return result;
+ }
+ } else {
+ fence->syncobj = NULL;
+ }
+
+ *pFence = pvr_fence_to_handle(fence);
+
+ return VK_SUCCESS;
+}
+
+void pvr_DestroyFence(VkDevice _device,
+ VkFence _fence,
+ const VkAllocationCallbacks *pAllocator)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_fence, fence, _fence);
+
+ if (!fence)
+ return;
+
+ if (fence->syncobj)
+ device->ws->ops->syncobj_destroy(fence->syncobj);
+
+ vk_object_free(&device->vk, pAllocator, fence);
+}
+
+VkResult
+pvr_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
+{
+ struct pvr_winsys_syncobj *syncobjs[fenceCount];
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+
+ for (uint32_t i = 0; i < fenceCount; i++) {
+ PVR_FROM_HANDLE(pvr_fence, fence, pFences[i]);
+
+ syncobjs[i] = fence->syncobj;
+ }
+
+ return device->ws->ops->syncobjs_reset(device->ws, syncobjs, fenceCount);
+}
+
+VkResult pvr_GetFenceStatus(VkDevice _device, VkFence _fence)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_fence, fence, _fence);
+ VkResult result;
+
+ result =
+ device->ws->ops->syncobjs_wait(device->ws, &fence->syncobj, 1U, true, 0U);
+ if (result == VK_TIMEOUT)
+ return VK_NOT_READY;
+
+ return result;
+}
+
+VkResult pvr_WaitForFences(VkDevice _device,
+ uint32_t fenceCount,
+ const VkFence *pFences,
+ VkBool32 waitAll,
+ uint64_t timeout)
+{
+ struct pvr_winsys_syncobj *syncobjs[fenceCount];
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+
+ for (uint32_t i = 0; i < fenceCount; i++) {
+ PVR_FROM_HANDLE(pvr_fence, fence, pFences[i]);
+
+ syncobjs[i] = fence->syncobj;
+ }
+
+ return device->ws->ops->syncobjs_wait(device->ws,
+ syncobjs,
+ fenceCount,
+ !!waitAll,
+ timeout);
+}
+
+VkResult pvr_CreateSemaphore(VkDevice _device,
+ const VkSemaphoreCreateInfo *pCreateInfo,
+ const VkAllocationCallbacks *pAllocator,
+ VkSemaphore *pSemaphore)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ struct pvr_semaphore *semaphore;
+
+ semaphore = vk_object_alloc(&device->vk,
+ pAllocator,
+ sizeof(*semaphore),
+ VK_OBJECT_TYPE_SEMAPHORE);
+ if (!semaphore)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ semaphore->syncobj = NULL;
+
+ *pSemaphore = pvr_semaphore_to_handle(semaphore);
+
+ return VK_SUCCESS;
+}
+
+void pvr_DestroySemaphore(VkDevice _device,
+ VkSemaphore _semaphore,
+ const VkAllocationCallbacks *pAllocator)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ PVR_FROM_HANDLE(pvr_semaphore, semaphore, _semaphore);
+
+ if (semaphore->syncobj)
+ device->ws->ops->syncobj_destroy(semaphore->syncobj);
+
+ vk_object_free(&device->vk, pAllocator, semaphore);
+}
+
+static enum pvr_pipeline_stage_bits
+pvr_convert_stage_mask(VkPipelineStageFlags stage_mask)
+{
+ enum pvr_pipeline_stage_bits stages = 0;
+
+ if (stage_mask & VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT ||
+ stage_mask & VK_PIPELINE_STAGE_ALL_COMMANDS_BIT) {
+ return PVR_PIPELINE_STAGE_ALL_BITS;
+ }
+
+ if (stage_mask & (VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT))
+ stages |= PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS;
+
+ if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
+ VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
+ VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
+ VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
+ VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
+ VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) {
+ stages |= PVR_PIPELINE_STAGE_GEOM_BIT;
+ }
+
+ if (stage_mask & (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
+ VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
+ VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
+ VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
+ stages |= PVR_PIPELINE_STAGE_FRAG_BIT;
+ }
+
+ if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
+ VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)) {
+ assert(!"Unimplemented");
+ }
+
+ if (stage_mask & (VK_PIPELINE_STAGE_TRANSFER_BIT))
+ stages |= PVR_PIPELINE_STAGE_TRANSFER_BIT;
+
+ return stages;
+}
+
+static VkResult pvr_process_graphics_cmd(
+ struct pvr_device *device,
+ struct pvr_queue *queue,
+ struct pvr_cmd_buffer *cmd_buffer,
+ struct pvr_sub_cmd *sub_cmd,
+ const VkSemaphore *semaphores,
+ uint32_t semaphore_count,
+ uint32_t *stage_flags,
+ struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
+{
+ const struct pvr_framebuffer *framebuffer = sub_cmd->gfx.framebuffer;
+ struct pvr_winsys_syncobj *syncobj_geom = NULL;
+ struct pvr_winsys_syncobj *syncobj_frag = NULL;
+ uint32_t bo_count = 0;
+ VkResult result;
+
+ STACK_ARRAY(struct pvr_winsys_job_bo, bos, framebuffer->attachment_count);
+ if (!bos)
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* FIXME: DoShadowLoadOrStore() */
+
+ /* FIXME: If the framebuffer being rendered to has multiple layers then we
+ * need to split submissions that run a fragment job into two.
+ */
+ if (sub_cmd->gfx.job.run_frag && framebuffer->layers > 1)
+ pvr_finishme("Split job submission for framebuffers with > 1 layers");
+
+ /* Get any imported buffers used in framebuffer attachments. */
+ for (uint32_t i = 0U; i < framebuffer->attachment_count; i++) {
+ if (!framebuffer->attachments[i]->image->vma->bo->is_imported)
+ continue;
+
+ bos[bo_count].bo = framebuffer->attachments[i]->image->vma->bo;
+ bos[bo_count].flags = PVR_WINSYS_JOB_BO_FLAG_WRITE;
+ bo_count++;
+ }
+
+ /* This passes ownership of the wait fences to pvr_render_job_submit(). */
+ result = pvr_render_job_submit(queue->gfx_ctx,
+ &sub_cmd->gfx.job,
+ bos,
+ bo_count,
+ semaphores,
+ semaphore_count,
+ stage_flags,
+ &syncobj_geom,
+ &syncobj_frag);
+ STACK_ARRAY_FINISH(bos);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* Replace the completion fences. */
+ if (syncobj_geom) {
+ if (completions[PVR_JOB_TYPE_GEOM])
+ device->ws->ops->syncobj_destroy(completions[PVR_JOB_TYPE_GEOM]);
+
+ completions[PVR_JOB_TYPE_GEOM] = syncobj_geom;
+ }
+
+ if (syncobj_frag) {
+ if (completions[PVR_JOB_TYPE_FRAG])
+ device->ws->ops->syncobj_destroy(completions[PVR_JOB_TYPE_FRAG]);
+
+ completions[PVR_JOB_TYPE_FRAG] = syncobj_frag;
+ }
+
+ /* FIXME: DoShadowLoadOrStore() */
+
+ return result;
+}
+
+static VkResult pvr_process_compute_cmd(
+ struct pvr_device *device,
+ struct pvr_queue *queue,
+ struct pvr_sub_cmd *sub_cmd,
+ const VkSemaphore *semaphores,
+ uint32_t semaphore_count,
+ uint32_t *stage_flags,
+ struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
+{
+ struct pvr_winsys_syncobj *syncobj = NULL;
+ VkResult result;
+
+ /* This passes ownership of the wait fences to pvr_compute_job_submit(). */
+ result = pvr_compute_job_submit(queue->compute_ctx,
+ sub_cmd,
+ semaphores,
+ semaphore_count,
+ stage_flags,
+ &syncobj);
+ if (result != VK_SUCCESS)
+ return result;
+
+ /* Replace the completion fences. */
+ if (syncobj) {
+ if (completions[PVR_JOB_TYPE_COMPUTE])
+ device->ws->ops->syncobj_destroy(completions[PVR_JOB_TYPE_COMPUTE]);
+
+ completions[PVR_JOB_TYPE_COMPUTE] = syncobj;
+ }
+
+ return result;
+}
+
+/* FIXME: Implement gpu based transfer support. */
+static VkResult pvr_process_transfer_cmds(
+ struct pvr_device *device,
+ struct pvr_sub_cmd *sub_cmd,
+ const VkSemaphore *semaphores,
+ uint32_t semaphore_count,
+ uint32_t *stage_flags,
+ struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
+{
+ /* Wait for transfer semaphores here before doing any transfers. */
+ for (uint32_t i = 0; i < semaphore_count; i++) {
+ PVR_FROM_HANDLE(pvr_semaphore, sem, semaphores[i]);
+
+ if (sem->syncobj && stage_flags[i] & PVR_PIPELINE_STAGE_TRANSFER_BIT) {
+ VkResult result = device->ws->ops->syncobjs_wait(device->ws,
+ &sem->syncobj,
+ 1,
+ true,
+ UINT64_MAX);
+ if (result != VK_SUCCESS)
+ return result;
+
+ stage_flags[i] &= ~PVR_PIPELINE_STAGE_TRANSFER_BIT;
+ if (stage_flags[i] == 0) {
+ device->ws->ops->syncobj_destroy(sem->syncobj);
+ sem->syncobj = NULL;
+ }
+ }
+ }
+
+ list_for_each_entry_safe (struct pvr_transfer_cmd,
+ transfer_cmd,
+ &sub_cmd->transfer.transfer_cmds,
+ link) {
+ bool src_mapped = false;
+ bool dst_mapped = false;
+ void *src_addr;
+ void *dst_addr;
+ void *ret_ptr;
+
+ /* Map if bo is not mapped. */
+ if (!transfer_cmd->src->vma->bo->map) {
+ src_mapped = true;
+ ret_ptr = device->ws->ops->buffer_map(transfer_cmd->src->vma->bo);
+ if (!ret_ptr)
+ return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
+ }
+
+ if (!transfer_cmd->dst->vma->bo->map) {
+ dst_mapped = true;
+ ret_ptr = device->ws->ops->buffer_map(transfer_cmd->dst->vma->bo);
+ if (!ret_ptr)
+ return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
+ }
+
+ src_addr =
+ transfer_cmd->src->vma->bo->map + transfer_cmd->src->vma->bo_offset;
+ dst_addr =
+ transfer_cmd->dst->vma->bo->map + transfer_cmd->dst->vma->bo_offset;
+
+ for (uint32_t i = 0; i < transfer_cmd->region_count; i++) {
+ VkBufferCopy2 *region = &transfer_cmd->regions[i];
+
+ memcpy(dst_addr + region->dstOffset,
+ src_addr + region->srcOffset,
+ region->size);
+ }
+
+ if (src_mapped)
+ device->ws->ops->buffer_unmap(transfer_cmd->src->vma->bo);
+
+ if (dst_mapped)
+ device->ws->ops->buffer_unmap(transfer_cmd->dst->vma->bo);
+ }
+
+ /* Given we are doing CPU based copy, completion fence should always be -1.
+ * This should be fixed when GPU based copy is implemented.
+ */
+ assert(!completions[PVR_JOB_TYPE_TRANSFER]);
+
+ return VK_SUCCESS;
+}
+
+static VkResult pvr_set_semaphore_payloads(
+ struct pvr_device *device,
+ struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX],
+ const VkSemaphore *semaphores,
+ uint32_t semaphore_count)
+{
+ struct pvr_winsys_syncobj *syncobj = NULL;
+ VkResult result;
+
+ if (!semaphore_count)
+ return VK_SUCCESS;
+
+ for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
+ if (completions[i]) {
+ result =
+ device->ws->ops->syncobjs_merge(completions[i], syncobj, &syncobj);
+ if (result != VK_SUCCESS)
+ goto err_destroy_syncobj;
+ }
+ }
+
+ for (uint32_t i = 0; i < semaphore_count; i++) {
+ PVR_FROM_HANDLE(pvr_semaphore, semaphore, semaphores[i]);
+ struct pvr_winsys_syncobj *dup_signal_fence;
+
+ /* Duplicate signal_fence and store it in each signal semaphore. */
+ result =
+ device->ws->ops->syncobjs_merge(syncobj, NULL, &dup_signal_fence);
+ if (result != VK_SUCCESS)
+ goto err_destroy_syncobj;
+
+ if (semaphore->syncobj)
+ device->ws->ops->syncobj_destroy(semaphore->syncobj);
+ semaphore->syncobj = dup_signal_fence;
+ }
+
+err_destroy_syncobj:
+ if (syncobj)
+ device->ws->ops->syncobj_destroy(syncobj);
+
+ return result;
+}
+
+static VkResult pvr_set_fence_payload(
+ struct pvr_device *device,
+ struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX],
+ VkFence _fence)
+{
+ PVR_FROM_HANDLE(pvr_fence, fence, _fence);
+ struct pvr_winsys_syncobj *syncobj = NULL;
+
+ for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
+ if (completions[i]) {
+ VkResult result =
+ device->ws->ops->syncobjs_merge(completions[i], syncobj, &syncobj);
+ if (result != VK_SUCCESS) {
+ device->ws->ops->syncobj_destroy(syncobj);
+ return result;
+ }
+ }
+ }
+
+ if (fence->syncobj)
+ device->ws->ops->syncobj_destroy(fence->syncobj);
+ fence->syncobj = syncobj;
+
+ return VK_SUCCESS;
+}
+
+static VkResult pvr_process_cmd_buffer(
+ struct pvr_device *device,
+ struct pvr_queue *queue,
+ VkCommandBuffer commandBuffer,
+ const VkSemaphore *semaphores,
+ uint32_t semaphore_count,
+ uint32_t *stage_flags,
+ struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
+{
+ PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+ VkResult result;
+
+ assert(cmd_buffer->status == PVR_CMD_BUFFER_STATUS_EXECUTABLE);
+
+ list_for_each_entry_safe (struct pvr_sub_cmd,
+ sub_cmd,
+ &cmd_buffer->sub_cmds,
+ link) {
+ switch (sub_cmd->type) {
+ case PVR_SUB_CMD_TYPE_GRAPHICS:
+ result = pvr_process_graphics_cmd(device,
+ queue,
+ cmd_buffer,
+ sub_cmd,
+ semaphores,
+ semaphore_count,
+ stage_flags,
+ completions);
+ break;
+
+ case PVR_SUB_CMD_TYPE_COMPUTE:
+ result = pvr_process_compute_cmd(device,
+ queue,
+ sub_cmd,
+ semaphores,
+ semaphore_count,
+ stage_flags,
+ completions);
+ break;
+
+ case PVR_SUB_CMD_TYPE_TRANSFER:
+ result = pvr_process_transfer_cmds(device,
+ sub_cmd,
+ semaphores,
+ semaphore_count,
+ stage_flags,
+ completions);
+ break;
+
+ default:
+ pvr_finishme("Unsupported sub-command type %d", sub_cmd->type);
+ return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+ }
+
+ if (result != VK_SUCCESS) {
+ cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INVALID;
+ return result;
+ }
+
+ p_atomic_inc(&device->global_queue_job_count);
+ }
+
+ return VK_SUCCESS;
+}
+
+static VkResult pvr_process_empty_job(
+ struct pvr_device *device,
+ const VkSemaphore *semaphores,
+ uint32_t semaphore_count,
+ uint32_t *stage_flags,
+ struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
+{
+ for (uint32_t i = 0; i < semaphore_count; i++) {
+ PVR_FROM_HANDLE(pvr_semaphore, semaphore, semaphores[i]);
+
+ if (!semaphore->syncobj)
+ continue;
+
+ for (uint32_t j = 0; j < PVR_NUM_SYNC_PIPELINE_STAGES; j++) {
+ if (stage_flags[i] & (1U << j)) {
+ VkResult result =
+ device->ws->ops->syncobjs_merge(semaphore->syncobj,
+ completions[j],
+ &completions[j]);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+ }
+
+ device->ws->ops->syncobj_destroy(semaphore->syncobj);
+ semaphore->syncobj = NULL;
+ }
+
+ return VK_SUCCESS;
+}
+
+static void
+pvr_update_syncobjs(struct pvr_device *device,
+ struct pvr_winsys_syncobj *src[static PVR_JOB_TYPE_MAX],
+ struct pvr_winsys_syncobj *dst[static PVR_JOB_TYPE_MAX])
+{
+ for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
+ if (src[i]) {
+ if (dst[i])
+ device->ws->ops->syncobj_destroy(dst[i]);
+
+ dst[i] = src[i];
+ }
+ }
+}
+
+VkResult pvr_QueueSubmit(VkQueue _queue,
+ uint32_t submitCount,
+ const VkSubmitInfo *pSubmits,
+ VkFence fence)
+{
+ PVR_FROM_HANDLE(pvr_queue, queue, _queue);
+ struct pvr_winsys_syncobj *completion_syncobjs[PVR_JOB_TYPE_MAX] = {};
+ struct pvr_device *device = queue->device;
+ VkResult result;
+
+ for (uint32_t i = 0; i < submitCount; i++) {
+ struct pvr_winsys_syncobj
+ *per_submit_completion_syncobjs[PVR_JOB_TYPE_MAX] = {};
+ const VkSubmitInfo *desc = &pSubmits[i];
+ uint32_t stage_flags[desc->waitSemaphoreCount];
+
+ for (uint32_t j = 0; j < desc->waitSemaphoreCount; j++)
+ stage_flags[j] = pvr_convert_stage_mask(desc->pWaitDstStageMask[j]);
+
+ if (desc->commandBufferCount > 0U) {
+ for (uint32_t j = 0U; j < desc->commandBufferCount; j++) {
+ result = pvr_process_cmd_buffer(device,
+ queue,
+ desc->pCommandBuffers[j],
+ desc->pWaitSemaphores,
+ desc->waitSemaphoreCount,
+ stage_flags,
+ per_submit_completion_syncobjs);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+ } else {
+ result = pvr_process_empty_job(device,
+ desc->pWaitSemaphores,
+ desc->waitSemaphoreCount,
+ stage_flags,
+ per_submit_completion_syncobjs);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ if (desc->signalSemaphoreCount) {
+ result = pvr_set_semaphore_payloads(device,
+ per_submit_completion_syncobjs,
+ desc->pSignalSemaphores,
+ desc->signalSemaphoreCount);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ pvr_update_syncobjs(device,
+ per_submit_completion_syncobjs,
+ completion_syncobjs);
+ }
+
+ if (fence) {
+ result = pvr_set_fence_payload(device, completion_syncobjs, fence);
+ if (result != VK_SUCCESS)
+ return result;
+ }
+
+ pvr_update_syncobjs(device, completion_syncobjs, queue->completion);
+
+ return VK_SUCCESS;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <vulkan/vulkan.h>
+
+#include "compiler/shader_enums.h"
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "pvr_private.h"
+#include "pvr_shader.h"
+#include "rogue/rogue.h"
+#include "rogue/rogue_shader.h"
+#include "spirv/nir_spirv.h"
+#include "vk_format.h"
+#include "vk_shader_module.h"
+#include "vk_util.h"
+
+/**
+ * \file pvr_shader.c
+ *
+ * \brief Contains top-level functions to compile SPIR-V -> NIR -> Rogue, and
+ * interfaces with the compiler.
+ */
+
+/**
+ * \brief Converts a SPIR-V shader to NIR.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] stage Shader stage.
+ * \param[in] create_info Shader creation info from Vulkan pipeline.
+ * \return A nir_shader* if successful, or NULL if unsuccessful.
+ */
+nir_shader *pvr_spirv_to_nir(struct rogue_build_ctx *ctx,
+ gl_shader_stage stage,
+ const VkPipelineShaderStageCreateInfo *create_info)
+{
+ VK_FROM_HANDLE(vk_shader_module, module, create_info->module);
+ struct nir_spirv_specialization *spec;
+ unsigned num_spec = 0;
+ nir_shader *nir;
+
+ spec =
+ vk_spec_info_to_nir_spirv(create_info->pSpecializationInfo, &num_spec);
+
+ nir = rogue_spirv_to_nir(ctx,
+ stage,
+ create_info->pName,
+ module->size / sizeof(uint32_t),
+ (uint32_t *)module->data,
+ num_spec,
+ spec);
+
+ free(spec);
+
+ return nir;
+}
+
+/**
+ * \brief Converts a NIR shader to Rogue.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] nir NIR shader.
+ * \return A rogue_shader* if successful, or NULL if unsuccessful.
+ */
+struct rogue_shader *pvr_nir_to_rogue(struct rogue_build_ctx *ctx,
+ nir_shader *nir)
+{
+ return rogue_nir_to_rogue(ctx, nir);
+}
+
+/**
+ * \brief Converts a Rogue shader to binary.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] shader Rogue shader.
+ * \return A rogue_shader_binary* if successful, or NULL if unsuccessful.
+ */
+struct rogue_shader_binary *pvr_rogue_to_binary(struct rogue_build_ctx *ctx,
+ struct rogue_shader *shader)
+{
+ return rogue_to_binary(ctx, shader);
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SHADER_H
+#define PVR_SHADER_H
+
+#include <stddef.h>
+
+#include "compiler/shader_enums.h"
+#include "nir/nir.h"
+#include "vulkan/vulkan.h"
+
+struct rogue_build_ctx;
+struct rogue_compiler;
+struct rogue_shader;
+
+nir_shader *
+pvr_spirv_to_nir(struct rogue_build_ctx *ctx,
+ gl_shader_stage stage,
+ const VkPipelineShaderStageCreateInfo *create_info);
+
+struct rogue_shader *pvr_nir_to_rogue(struct rogue_build_ctx *ctx,
+ nir_shader *nir);
+
+struct rogue_shader_binary *pvr_rogue_to_binary(struct rogue_build_ctx *ctx,
+ struct rogue_shader *rogue);
+
+#endif /* PVR_SHADER_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "pvr_csb.h"
+#include "pvr_device_info.h"
+#include "pvr_formats.h"
+#include "pvr_private.h"
+#include "pvr_tex_state.h"
+#include "util/macros.h"
+#include "util/u_math.h"
+#include "vk_format.h"
+#include "vk_log.h"
+
+static enum ROGUE_TEXSTATE_SWIZ pvr_get_hw_swizzle(VkComponentSwizzle comp,
+ enum pipe_swizzle swz)
+{
+ switch (swz) {
+ case PIPE_SWIZZLE_0:
+ return ROGUE_TEXSTATE_SWIZ_SRC_ZERO;
+ case PIPE_SWIZZLE_1:
+ return ROGUE_TEXSTATE_SWIZ_SRC_ONE;
+ case PIPE_SWIZZLE_X:
+ return ROGUE_TEXSTATE_SWIZ_SRCCHAN_0;
+ case PIPE_SWIZZLE_Y:
+ return ROGUE_TEXSTATE_SWIZ_SRCCHAN_1;
+ case PIPE_SWIZZLE_Z:
+ return ROGUE_TEXSTATE_SWIZ_SRCCHAN_2;
+ case PIPE_SWIZZLE_W:
+ return ROGUE_TEXSTATE_SWIZ_SRCCHAN_3;
+ case PIPE_SWIZZLE_NONE:
+ if (comp == VK_COMPONENT_SWIZZLE_A)
+ return ROGUE_TEXSTATE_SWIZ_SRC_ONE;
+ else
+ return ROGUE_TEXSTATE_SWIZ_SRC_ZERO;
+ default:
+ unreachable("Unknown enum pipe_swizzle");
+ };
+}
+
+VkResult
+pvr_pack_tex_state(struct pvr_device *device,
+ struct pvr_texture_state_info *info,
+ uint64_t state[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS])
+{
+ const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+ uint32_t texture_type;
+
+ pvr_csb_pack (&state[0], TEXSTATE_IMAGE_WORD0, word0) {
+ /* Determine texture type */
+ if (info->is_cube && info->tex_state_type == PVR_TEXTURE_STATE_SAMPLE) {
+ word0.textype = texture_type = PVRX(TEXSTATE_TEXTYPE_CUBE);
+ } else if (info->mem_layout == PVR_MEMLAYOUT_TWIDDLED ||
+ info->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
+ if (info->type == VK_IMAGE_VIEW_TYPE_3D) {
+ word0.textype = texture_type = PVRX(TEXSTATE_TEXTYPE_3D);
+ } else if (info->type == VK_IMAGE_VIEW_TYPE_1D ||
+ info->type == VK_IMAGE_VIEW_TYPE_1D_ARRAY) {
+ word0.textype = texture_type = PVRX(TEXSTATE_TEXTYPE_1D);
+ } else if (info->type == VK_IMAGE_VIEW_TYPE_2D ||
+ info->type == VK_IMAGE_VIEW_TYPE_2D_ARRAY) {
+ word0.textype = texture_type = PVRX(TEXSTATE_TEXTYPE_2D);
+ } else {
+ return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED);
+ }
+ } else if (info->mem_layout == PVR_MEMLAYOUT_LINEAR) {
+ word0.textype = texture_type = PVRX(TEXSTATE_TEXTYPE_STRIDE);
+ } else {
+ return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED);
+ }
+
+ word0.texformat = pvr_get_tex_format(info->format);
+ word0.smpcnt = util_logbase2(info->sample_count);
+ word0.swiz0 =
+ pvr_get_hw_swizzle(VK_COMPONENT_SWIZZLE_R, info->swizzle[0]);
+ word0.swiz1 =
+ pvr_get_hw_swizzle(VK_COMPONENT_SWIZZLE_G, info->swizzle[1]);
+ word0.swiz2 =
+ pvr_get_hw_swizzle(VK_COMPONENT_SWIZZLE_B, info->swizzle[2]);
+ word0.swiz3 =
+ pvr_get_hw_swizzle(VK_COMPONENT_SWIZZLE_A, info->swizzle[3]);
+
+ /* Gamma */
+ if (vk_format_is_srgb(info->format)) {
+ /* Gamma for 2 Component Formats has to be handled differently. */
+ if (vk_format_get_nr_components(info->format) == 2) {
+ /* Enable Gamma only for Channel 0 if Channel 1 is an Alpha
+ * Channel.
+ */
+ if (vk_format_has_alpha(info->format)) {
+ word0.twocomp_gamma = PVRX(TEXSTATE_TWOCOMP_GAMMA_R);
+ } else {
+ /* Otherwise Enable Gamma for both the Channels. */
+ word0.twocomp_gamma = PVRX(TEXSTATE_TWOCOMP_GAMMA_RG);
+
+ /* If Channel 0 happens to be the Alpha Channel, the
+ * ALPHA_MSB bit would not be set thereby disabling Gamma
+ * for Channel 0.
+ */
+ }
+ } else {
+ word0.gamma = PVRX(TEXSTATE_GAMMA_ON);
+ }
+ }
+
+ word0.width = info->extent.width - 1;
+ if (info->type != VK_IMAGE_VIEW_TYPE_1D ||
+ info->type != VK_IMAGE_VIEW_TYPE_1D_ARRAY)
+ word0.height = info->extent.height - 1;
+ }
+
+ /* Texture type specific stuff (word 1) */
+ if (texture_type == PVRX(TEXSTATE_TEXTYPE_STRIDE)) {
+ pvr_csb_pack (&state[1], TEXSTATE_STRIDE_IMAGE_WORD1, word1) {
+ word1.stride = info->stride;
+ word1.num_mip_levels = info->mip_levels;
+ word1.mipmaps_present = info->mipmaps_present;
+
+ word1.texaddr = info->addr;
+ word1.texaddr.addr += info->offset;
+
+ if (vk_format_is_alpha_on_msb(info->format))
+ word1.alpha_msb = true;
+
+ if (!PVR_HAS_FEATURE(dev_info, tpu_extended_integer_lookup) &&
+ !PVR_HAS_FEATURE(dev_info, tpu_image_state_v2)) {
+ if (info->flags & PVR_TEXFLAGS_INDEX_LOOKUP ||
+ info->flags & PVR_TEXFLAGS_BUFFER)
+ word1.index_lookup = true;
+ }
+
+ if (info->flags & PVR_TEXFLAGS_BUFFER)
+ word1.mipmaps_present = false;
+
+ if (PVR_HAS_FEATURE(dev_info, tpu_image_state_v2) &&
+ vk_format_is_compressed(info->format))
+ word1.tpu_image_state_v2_compression_mode =
+ PVRX(TEXSTATE_COMPRESSION_MODE_TPU);
+ }
+ } else {
+ pvr_csb_pack (&state[1], TEXSTATE_IMAGE_WORD1, word1) {
+ word1.num_mip_levels = info->mip_levels;
+ word1.mipmaps_present = info->mipmaps_present;
+ word1.baselevel = info->base_level;
+
+ if (info->extent.depth > 0) {
+ word1.depth = info->extent.depth - 1;
+ } else if (PVR_HAS_FEATURE(dev_info, tpu_array_textures)) {
+ uint32_t array_layers = info->array_size;
+
+ if (info->type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY &&
+ info->tex_state_type == PVR_TEXTURE_STATE_SAMPLE)
+ array_layers /= 6;
+
+ word1.depth = array_layers - 1;
+ }
+
+ word1.texaddr = info->addr;
+ word1.texaddr.addr += info->offset;
+
+ if (!PVR_HAS_FEATURE(dev_info, tpu_extended_integer_lookup) &&
+ !PVR_HAS_FEATURE(dev_info, tpu_image_state_v2)) {
+ if (info->flags & PVR_TEXFLAGS_INDEX_LOOKUP ||
+ info->flags & PVR_TEXFLAGS_BUFFER)
+ word1.index_lookup = true;
+ }
+
+ if (info->flags & PVR_TEXFLAGS_BUFFER)
+ word1.mipmaps_present = false;
+
+ if (info->flags & PVR_TEXFLAGS_BORDER)
+ word1.border = true;
+
+ if (vk_format_is_alpha_on_msb(info->format))
+ word1.alpha_msb = true;
+
+ if (PVR_HAS_FEATURE(dev_info, tpu_image_state_v2) &&
+ vk_format_is_compressed(info->format))
+ word1.tpu_image_state_v2_compression_mode =
+ PVRX(TEXSTATE_COMPRESSION_MODE_TPU);
+ }
+ }
+
+ return VK_SUCCESS;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_TEX_STATE_H
+#define PVR_TEX_STATE_H
+
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "pvr_private.h"
+#include "util/macros.h"
+
+/**
+ * Texture requires 32bit index lookups instead of texture coordinate access.
+ */
+#define PVR_TEXFLAGS_INDEX_LOOKUP BITFIELD_BIT(0U)
+
+/** Texture has border texels present. */
+#define PVR_TEXFLAGS_BORDER BITFIELD_BIT(1U)
+
+/**
+ * Resource is actually a buffer, not a texture, and therefore LOD is ignored.
+ * Coordinates are integers.
+ */
+#define PVR_TEXFLAGS_BUFFER BITFIELD_BIT(2U)
+
+/** Parameters for #pvr_pack_tex_state(). */
+struct pvr_texture_state_info {
+ VkFormat format;
+ enum pvr_memlayout mem_layout;
+ uint32_t flags;
+ VkImageViewType type;
+ bool is_cube;
+ enum pvr_texture_state tex_state_type;
+ VkExtent3D extent;
+
+ /**
+ * For array textures, this holds the array dimension, in elements. This can
+ * be zero if texture is not an array.
+ */
+ uint32_t array_size;
+
+ /** Base mipmap level. This is the miplevel you want as the top level. */
+ uint32_t base_level;
+
+ /**
+ * Number of mipmap levels that should be accessed by HW. This is not
+ * necessarily the number of levels that are in memory. (See
+ * mipmaps_present)
+ */
+ uint32_t mip_levels;
+
+ /**
+ * True if the texture is mipmapped.
+ * Note: This is based on the number of mip levels the texture contains, not
+ * on the mip levels that are being used i.e. mip_levels.
+ */
+ bool mipmaps_present;
+
+ /**
+ * Number of samples per texel for multisampling. This should be 1 for none
+ * multisampled textures.
+ */
+ uint32_t sample_count;
+
+ /** Stride, in pixels. Only valid if mem_layout is stride or tiled. */
+ uint32_t stride;
+
+ /**
+ * For buffers, where TPU_BUFFER_LOOKUP is present, this defines
+ * the offset for the buffer, in texels.
+ */
+ uint32_t offset;
+
+ /**
+ * Precomputed (composed from createinfo->components and format swizzle)
+ * swizzles to pass in to the texture state.
+ */
+ uint8_t swizzle[4];
+
+ /** Address of texture, which must be aligned to at least 32bits. */
+ pvr_dev_addr_t addr;
+};
+
+VkResult
+pvr_pack_tex_state(struct pvr_device *device,
+ struct pvr_texture_state_info *info,
+ uint64_t state[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS]);
+
+#endif /* PVR_TEX_STATE_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based on intel anv code:
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+#include "util/u_atomic.h"
+#include "wsi_common.h"
+
+static PFN_vkVoidFunction pvr_wsi_proc_addr(VkPhysicalDevice physicalDevice,
+ const char *pName)
+{
+ PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
+
+ return vk_instance_get_proc_addr_unchecked(&pdevice->instance->vk, pName);
+}
+
+VkResult pvr_wsi_init(struct pvr_physical_device *pdevice)
+{
+ VkResult result;
+
+ result = wsi_device_init(&pdevice->wsi_device,
+ pvr_physical_device_to_handle(pdevice),
+ pvr_wsi_proc_addr,
+ &pdevice->vk.instance->alloc,
+ pdevice->master_fd,
+ NULL,
+ false);
+ if (result != VK_SUCCESS)
+ return result;
+
+ pdevice->wsi_device.supports_modifiers = true;
+ pdevice->vk.wsi_device = &pdevice->wsi_device;
+
+ return VK_SUCCESS;
+}
+
+void pvr_wsi_finish(struct pvr_physical_device *pdevice)
+{
+ pdevice->vk.wsi_device = NULL;
+ wsi_device_finish(&pdevice->wsi_device, &pdevice->vk.instance->alloc);
+}
+
+VkResult pvr_QueuePresentKHR(VkQueue _queue,
+ const VkPresentInfoKHR *pPresentInfo)
+{
+ PVR_FROM_HANDLE(pvr_queue, queue, _queue);
+ VkResult result;
+
+ result = wsi_common_queue_present(&queue->device->pdevice->wsi_device,
+ pvr_device_to_handle(queue->device),
+ _queue,
+ 0,
+ pPresentInfo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ p_atomic_inc(&queue->device->global_queue_present_count);
+
+ return VK_SUCCESS;
+}
+
+VkResult pvr_AcquireNextImage2KHR(VkDevice _device,
+ const VkAcquireNextImageInfoKHR *pAcquireInfo,
+ uint32_t *pImageIndex)
+{
+ PVR_FROM_HANDLE(pvr_device, device, _device);
+ struct pvr_winsys_syncobj *handles[2];
+ uint32_t count = 0U;
+ VkResult result;
+ VkResult ret;
+
+ result = wsi_common_acquire_next_image2(&device->pdevice->wsi_device,
+ _device,
+ pAcquireInfo,
+ pImageIndex);
+ if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR)
+ return result;
+
+ if (pAcquireInfo->fence) {
+ PVR_FROM_HANDLE(pvr_fence, fence, pAcquireInfo->fence);
+ handles[count++] = fence->syncobj;
+ }
+
+ if (pAcquireInfo->semaphore) {
+ PVR_FROM_HANDLE(pvr_semaphore, semaphore, pAcquireInfo->semaphore);
+ handles[count++] = semaphore->syncobj;
+ }
+
+ if (count == 0U)
+ return result;
+
+ /* We need to preserve VK_SUBOPTIMAL_KHR status. */
+ ret = device->ws->ops->syncobjs_signal(device->ws, handles, count);
+ if (ret != VK_SUCCESS)
+ return ret;
+
+ return result;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_CDM_LOAD_SR_H
+#define PVR_CDM_LOAD_SR_H
+
+#include <stdint.h>
+
+/* clang-format off */
+static const uint8_t pvr_cdm_load_sr_code[] = {
+ 0x25, 0x02, 0x87, 0x81,
+ 0x04, 0x00, 0x00, 0x00,
+ 0x84, 0x04, 0x25, 0x02,
+ 0x87, 0x80, 0x04, 0x00,
+ 0x00, 0x00, 0x85, 0x04,
+ 0x25, 0x02, 0x87, 0x83,
+ 0x04, 0x00, 0x00, 0x00,
+ 0x86, 0x04, 0x25, 0x02,
+ 0x87, 0x82, 0x04, 0x00,
+ 0x00, 0x00, 0x87, 0x04,
+ 0x56, 0x20, 0xF1, 0x85,
+ 0x02, 0x80, 0x81, 0xD0,
+ 0xC4, 0x08, 0x00, 0xFF,
+ 0x02, 0x80, 0x6A, 0xFF,
+ 0x67, 0xF0, 0x40, 0x20,
+ 0x41, 0x8C, 0x80, 0x40,
+ 0x00, 0x50, 0x8F, 0xC0,
+ 0x80, 0x02, 0x04, 0x81,
+ 0x60, 0x00, 0x0A, 0x01,
+ 0x00, 0x00, 0x25, 0x36,
+ 0x87, 0x87, 0x00, 0x00,
+ 0x40, 0x05, 0xD1, 0x06,
+ 0x55, 0x20, 0xF1, 0x81,
+ 0x02, 0x00, 0xC0, 0xC6,
+ 0x08, 0x00, 0x02, 0x80,
+ 0x6A, 0xFF, 0x46, 0x42,
+ 0xD0, 0x03, 0xEA, 0xD1,
+ 0x41, 0x00, 0x01, 0x00,
+ 0x00, 0x51, 0x27, 0x06,
+ 0xEB, 0x84, 0x50, 0x20,
+ 0x86, 0x87, 0x04, 0x00,
+ 0xC0, 0x06, 0x87, 0x22,
+ 0x25, 0x32, 0x87, 0x87,
+ 0x00, 0x1F, 0x40, 0xC5,
+ 0x0C, 0xFF, 0x25, 0x02,
+ 0x87, 0xC0, 0x0C, 0x00,
+ 0x00, 0x00, 0x83, 0x0C,
+ 0x47, 0x42, 0xD0, 0x03,
+ 0xEA, 0x85, 0x41, 0x90,
+ 0x01, 0x08, 0x00, 0x00,
+ 0x85, 0x0C, 0x47, 0x42,
+ 0xD0, 0x03, 0xEA, 0xC5,
+ 0x41, 0x90, 0x01, 0x08,
+ 0x00, 0x00, 0xC5, 0x0C,
+ 0x67, 0xF0, 0x40, 0x28,
+ 0x42, 0x8C, 0x80, 0x40,
+ 0x80, 0xC5, 0x80, 0x90,
+ 0x80, 0xFF, 0x04, 0x81,
+ 0x60, 0x00, 0xCC, 0xFF,
+ 0xFF, 0xFF, 0x66, 0xF0,
+ 0x40, 0x28, 0x42, 0x8C,
+ 0x80, 0x40, 0x00, 0x51,
+ 0xD0, 0x80, 0x04, 0x81,
+ 0x60, 0x00, 0x86, 0xFF,
+ 0xFF, 0xFF, 0x46, 0x40,
+ 0xF1, 0xB0, 0xE2, 0x81,
+ 0x4D, 0x01, 0x00, 0x00,
+ 0x00, 0xFF, 0x04, 0x81,
+ 0x60, 0x00, 0x32, 0x00,
+ 0x00, 0x00, 0x02, 0x80,
+ 0x6C, 0xC4, 0x45, 0x12,
+ 0xD3, 0x3F, 0x01, 0x00,
+ 0x00, 0xAB, 0x01, 0xFF,
+ 0x44, 0x12, 0xD3, 0x3F,
+ 0x00, 0x00, 0x00, 0x40,
+ 0x44, 0x10, 0xD3, 0x3F,
+ 0x40, 0x00, 0x00, 0xFF,
+ 0x02, 0x80, 0x6C, 0x84,
+ 0x04, 0x80, 0x60, 0x00,
+ 0x40, 0x00, 0x00, 0x00,
+ 0x46, 0x50, 0xFB, 0xB0,
+ 0x87, 0xE2, 0x81, 0x4B,
+ 0x03, 0x00, 0x00, 0x00,
+ 0x04, 0x81, 0x61, 0x00,
+ 0xF4, 0xFF, 0xFF, 0xFF,
+ 0x02, 0x80, 0x6C, 0xC4,
+ 0x45, 0x12, 0xD3, 0x3F,
+ 0x01, 0x00, 0x00, 0xAA,
+ 0x01, 0xFF, 0x44, 0x12,
+ 0xD3, 0x3F, 0x00, 0x00,
+ 0x00, 0x40, 0x44, 0x10,
+ 0xD3, 0x3F, 0x40, 0x00,
+ 0x00, 0xFF, 0x03, 0x80,
+ 0x6C, 0x84, 0xF1, 0xFF,
+ 0x04, 0x80, 0xEE, 0x00,
+ 0xF2, 0xFF, 0xFF, 0xFF
+};
+/* clang-format on */
+
+#endif /* PVR_CDM_LOAD_SR_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_END_OF_TILE_H
+#define PVR_END_OF_TILE_H
+
+#include <stdint.h>
+
+/* clang-format off */
+static const uint8_t pvr_end_of_tile_program[] = {
+ 0xa9, 0xf2, 0x40, 0x00,
+ 0x47, 0x91, 0x00, 0x50,
+ 0x04, 0x00, 0x80, 0x40,
+ 0x00, 0x00, 0x80, 0x80,
+ 0x24, 0xff, 0xa9, 0xf2,
+ 0x40, 0x00, 0x47, 0x91,
+ 0x20, 0x20, 0x08, 0x00,
+ 0x80, 0x40, 0x00, 0x00,
+ 0x80, 0x80, 0x25, 0xff,
+ 0x45, 0xa0, 0x80, 0xc2,
+ 0xa4, 0x40, 0x00, 0x25,
+ 0x00, 0x00
+};
+/* clang-format on */
+
+#endif /* PVR_END_OF_TILE_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Auto-generated file - don't edit */
+
+#ifndef PVR_USC_COMPUTE_SHADER_H
+#define PVR_USC_COMPUTE_SHADER_H
+
+#include <stdint.h>
+
+/* clang-format off */
+uint8_t pvr_usc_compute_shader[] = {
+ 0x44, 0x12, 0xd3, 0x3f,
+ 0x00, 0x00, 0x00, 0x24,
+ 0x46, 0x40, 0xf9, 0xb0,
+ 0x87, 0x80, 0x40, 0xa0,
+ 0x2a, 0x30, 0x00, 0x02,
+ 0x04, 0x81, 0x61, 0x00,
+ 0x54, 0x00, 0x00, 0x00,
+ 0x02, 0x80, 0x6c, 0xc4,
+ 0x46, 0x40, 0xf9, 0xb0,
+ 0x87, 0x81, 0x40, 0xa0,
+ 0x2d, 0x10, 0x00, 0x02,
+ 0x46, 0x12, 0xd3, 0x3f,
+ 0x80, 0xca, 0x83, 0x10,
+ 0x00, 0x00, 0x25, 0xff,
+ 0x46, 0x13, 0xd3, 0x3f,
+ 0x80, 0xcb, 0x83, 0x10,
+ 0x00, 0x00, 0x25, 0xff,
+ 0x46, 0x40, 0xf9, 0xb0,
+ 0x87, 0x80, 0x40, 0xa0,
+ 0x25, 0x10, 0x00, 0x02,
+ 0x04, 0x81, 0x61, 0x00,
+ 0x14, 0x00, 0x00, 0x00,
+ 0x02, 0x80, 0x6c, 0x44,
+ 0x04, 0x80, 0x60, 0x00,
+ 0xc0, 0xff, 0xff, 0xff,
+ 0x02, 0x80, 0x6c, 0x04,
+ 0x89, 0x52, 0xdf, 0x3c,
+ 0xfc, 0xa0, 0x9c, 0x1e,
+ 0x87, 0x87, 0x80, 0xcf,
+ 0x90, 0x11, 0x01, 0xa0,
+ 0x25, 0xff, 0x46, 0x40,
+ 0xff, 0xd0, 0x87, 0xa5,
+ 0x40, 0xa0, 0x00, 0x10,
+ 0x00, 0x02, 0x44, 0x82,
+ 0x67, 0x38, 0x24, 0x00,
+ 0x24, 0xff, 0x04, 0x80,
+ 0x60, 0x04, 0x68, 0x00,
+ 0x00, 0x00, 0x45, 0x12,
+ 0xd3, 0x3f, 0xc0, 0x04,
+ 0x00, 0x00, 0x00, 0x25,
+ 0x27, 0x02, 0xeb, 0xa5,
+ 0x44, 0xa0, 0x00, 0x80,
+ 0x81, 0x08, 0x00, 0xc0,
+ 0x80, 0x04, 0x27, 0x04,
+ 0xeb, 0xa5, 0x44, 0xa0,
+ 0x00, 0x80, 0x81, 0x08,
+ 0x00, 0xc0, 0x81, 0x04,
+ 0x55, 0x20, 0xf1, 0x84,
+ 0x02, 0x00, 0x82, 0xc0,
+ 0x18, 0x00, 0x02, 0x80,
+ 0x6a, 0xff, 0x27, 0x02,
+ 0xeb, 0xa5, 0x44, 0xa0,
+ 0x02, 0x80, 0x83, 0x08,
+ 0x00, 0xc0, 0x80, 0x04,
+ 0x27, 0x04, 0xeb, 0xa5,
+ 0x44, 0xa0, 0x02, 0x80,
+ 0x83, 0x08, 0x00, 0xc0,
+ 0x81, 0x04, 0x66, 0x20,
+ 0xf2, 0x86, 0xb8, 0x28,
+ 0x00, 0x82, 0xc0, 0x18,
+ 0x00, 0xff, 0x02, 0x80,
+ 0x6a, 0xff, 0x45, 0x82,
+ 0x67, 0x24, 0x24, 0x00,
+ 0x24, 0xff, 0xf1, 0xff,
+ 0x04, 0x80, 0xee, 0x00,
+ 0xf2, 0xff, 0xff, 0xff
+};
+/* clang-format on */
+
+#endif /* PVR_USC_COMPUTE_SHADER_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Auto-generated file - don't edit */
+
+#ifndef PVR_USC_FRAGMENT_SHADER_H
+#define PVR_USC_FRAGMENT_SHADER_H
+
+#include <stdint.h>
+
+/* clang-format off */
+static const uint8_t pvr_usc_fragment_shader[] = {
+ 0x58, 0x9a, 0x80, 0xd3,
+ 0x3f, 0x80, 0x08, 0x00,
+ 0x00, 0x00, 0x20, 0xff,
+ 0xf2, 0xff, 0xff, 0xff
+};
+/* clang-format on */
+
+#endif /* PVR_USC_FRAGMENT_SHADER_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Auto-generated file - don't edit */
+
+#ifndef PVR_VDM_LOAD_SR_H
+#define PVR_VDM_LOAD_SR_H
+
+#include <stdint.h>
+
+/* clang-format off */
+static const uint8_t pvr_vdm_load_sr_code[] =
+{
+ 0x25, 0x02, 0x87, 0x81,
+ 0x04, 0x00, 0x00, 0x00,
+ 0x84, 0x04, 0x25, 0x02,
+ 0x87, 0x80, 0x04, 0x00,
+ 0x00, 0x00, 0x85, 0x04,
+ 0x25, 0x02, 0x87, 0x83,
+ 0x04, 0x00, 0x00, 0x00,
+ 0x86, 0x04, 0x25, 0x02,
+ 0x87, 0x82, 0x04, 0x00,
+ 0x00, 0x00, 0x87, 0x04,
+ 0x56, 0x20, 0xF1, 0x85,
+ 0x02, 0x80, 0x81, 0xD0,
+ 0xC4, 0x08, 0x00, 0xFF,
+ 0x02, 0x80, 0x6A, 0xFF,
+ 0x67, 0xF0, 0x40, 0x20,
+ 0x41, 0x8C, 0x80, 0x40,
+ 0x00, 0x50, 0x8F, 0xC0,
+ 0x80, 0x02, 0x04, 0x81,
+ 0x60, 0x00, 0x9A, 0x00,
+ 0x00, 0x00, 0x25, 0x36,
+ 0x87, 0x87, 0x00, 0x00,
+ 0x40, 0x05, 0xD1, 0x06,
+ 0x55, 0x20, 0xF1, 0x81,
+ 0x02, 0x00, 0xC0, 0xC6,
+ 0x08, 0x00, 0x02, 0x80,
+ 0x6A, 0xFF, 0x46, 0x42,
+ 0xD0, 0x03, 0xEA, 0xD1,
+ 0x41, 0x00, 0x01, 0x00,
+ 0x00, 0x51, 0x27, 0x06,
+ 0xEB, 0x84, 0x50, 0x20,
+ 0x86, 0x87, 0x04, 0x00,
+ 0xC0, 0x06, 0x87, 0x22,
+ 0x25, 0x32, 0x87, 0x87,
+ 0x00, 0x1F, 0x40, 0xC5,
+ 0x0C, 0xFF, 0x25, 0x02,
+ 0x87, 0xC0, 0x0C, 0x00,
+ 0x00, 0x00, 0x83, 0x0C,
+ 0x47, 0x42, 0xD0, 0x03,
+ 0xEA, 0x85, 0x41, 0x90,
+ 0x01, 0x08, 0x00, 0x00,
+ 0x85, 0x0C, 0x47, 0x42,
+ 0xD0, 0x03, 0xEA, 0xC5,
+ 0x41, 0x90, 0x01, 0x08,
+ 0x00, 0x00, 0xC5, 0x0C,
+ 0x67, 0xF0, 0x40, 0x28,
+ 0x42, 0x8C, 0x80, 0x40,
+ 0x80, 0xC5, 0x80, 0x90,
+ 0x80, 0xFF, 0x04, 0x81,
+ 0x60, 0x00, 0xCC, 0xFF,
+ 0xFF, 0xFF, 0x66, 0xF0,
+ 0x40, 0x28, 0x42, 0x8C,
+ 0x80, 0x40, 0x00, 0x51,
+ 0xD0, 0x80, 0x07, 0x81,
+ 0x60, 0x00, 0x86, 0xFF,
+ 0xFF, 0xFF, 0xF3, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF,
+ 0x04, 0x80, 0xEE, 0x00,
+ 0xF2, 0xFF, 0xFF, 0xFF
+};
+/* clang-format on */
+
+#endif /* PVR_VDM_LOAD_SR_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Auto-generated file - don't edit */
+
+#ifndef PVR_VDM_STORE_SR_H
+#define PVR_VDM_STORE_SR_H
+
+#include <stdint.h>
+
+/* clang-format off */
+static const uint8_t pvr_vdm_store_sr_code[] =
+{
+ 0x25, 0x02, 0x87, 0x81,
+ 0x04, 0x00, 0x00, 0x00,
+ 0x40, 0xFF, 0x25, 0x02,
+ 0x87, 0x80, 0x04, 0x00,
+ 0x00, 0x00, 0x41, 0xFF,
+ 0x25, 0x02, 0x87, 0x83,
+ 0x04, 0x00, 0x00, 0x00,
+ 0x86, 0x04, 0x25, 0x02,
+ 0x87, 0x82, 0x04, 0x00,
+ 0x00, 0x00, 0x87, 0x04,
+ 0x68, 0xF2, 0x40, 0x20,
+ 0x41, 0x8C, 0x80, 0x40,
+ 0x80, 0x27, 0x20, 0x8F,
+ 0xC0, 0x80, 0x02, 0x42,
+ 0x66, 0x20, 0xF2, 0x84,
+ 0xB8, 0x28, 0x80, 0xA2,
+ 0xC2, 0xA0, 0x00, 0xFF,
+ 0x44, 0x20, 0xE0, 0x00,
+ 0xC0, 0xA1, 0x00, 0x00,
+ 0x02, 0x80, 0x6A, 0xFF,
+ 0x04, 0x81, 0x60, 0x00,
+ 0xE8, 0x00, 0x00, 0x00,
+ 0x25, 0x36, 0x87, 0x87,
+ 0x00, 0x00, 0x40, 0x05,
+ 0xC3, 0x06, 0x25, 0x02,
+ 0xE2, 0x90, 0x50, 0x00,
+ 0x00, 0x00, 0x00, 0x45,
+ 0x25, 0x02, 0xE2, 0xC5,
+ 0x44, 0x00, 0x00, 0x00,
+ 0x00, 0x45, 0x89, 0xF2,
+ 0x40, 0x21, 0x49, 0x9C,
+ 0xC0, 0x00, 0x80, 0x40,
+ 0x00, 0x42, 0x8F, 0xC6,
+ 0x80, 0x02, 0x44, 0xFF,
+ 0x04, 0x81, 0x60, 0x00,
+ 0x62, 0x00, 0x00, 0x00,
+ 0x68, 0x20, 0xE2, 0x88,
+ 0xB8, 0x28, 0x80, 0x64,
+ 0x00, 0x00, 0x83, 0xC6,
+ 0x98, 0x08, 0x00, 0xFF,
+ 0x45, 0x20, 0xE0, 0x00,
+ 0x86, 0xE1, 0x10, 0x00,
+ 0x00, 0xFF, 0x02, 0x80,
+ 0x6A, 0xFF, 0x47, 0x42,
+ 0xD0, 0x03, 0xE2, 0xC5,
+ 0x41, 0x80, 0x85, 0x80,
+ 0x00, 0x00, 0x85, 0x0C,
+ 0x46, 0x42, 0xD0, 0x03,
+ 0xE2, 0xC3, 0x41, 0x00,
+ 0x01, 0x00, 0x00, 0x43,
+ 0x27, 0x06, 0xE3, 0x84,
+ 0x65, 0x20, 0x86, 0x87,
+ 0x04, 0x00, 0xC0, 0x06,
+ 0x87, 0x22, 0x66, 0xF0,
+ 0x40, 0x28, 0x42, 0x8C,
+ 0x80, 0x40, 0x00, 0x43,
+ 0xC4, 0x80, 0x04, 0x81,
+ 0x60, 0x00, 0xAE, 0xFF,
+ 0xFF, 0xFF, 0x46, 0x42,
+ 0xD0, 0x03, 0xE2, 0x84,
+ 0x4F, 0x00, 0x03, 0x00,
+ 0x00, 0x46, 0x67, 0xF2,
+ 0x40, 0x00, 0x41, 0x8C,
+ 0x80, 0x40, 0x00, 0x42,
+ 0xC6, 0x84, 0x42, 0xFF,
+ 0x66, 0xF0, 0x40, 0x20,
+ 0x41, 0x8C, 0x80, 0x40,
+ 0x00, 0x42, 0xC2, 0x80,
+ 0x04, 0x81, 0x60, 0x00,
+ 0x28, 0x00, 0x00, 0x00,
+ 0x68, 0x20, 0xE2, 0x88,
+ 0xB8, 0x28, 0x80, 0x62,
+ 0x00, 0x42, 0x83, 0xC6,
+ 0x98, 0x08, 0x00, 0xFF,
+ 0x45, 0x20, 0xE0, 0x00,
+ 0x86, 0xE1, 0x10, 0x00,
+ 0x00, 0xFF, 0x03, 0x80,
+ 0x6A, 0xFF, 0xF1, 0xFF,
+ 0x04, 0x80, 0xEE, 0x00,
+ 0xF2, 0xFF, 0xFF, 0xFF
+};
+/* clang-format on */
+
+#endif /* PVR_VDM_STORE_SR_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on radv driver which is:
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Based on u_format.h which is:
+ * Copyright 2009-2010 VMware, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* clang-format off */
+#ifndef VK_FORMAT_H
+#define VK_FORMAT_H
+
+#include <util/format/u_format.h>
+#include <vulkan/util/vk_format.h>
+
+#include <vulkan/vulkan.h>
+
+#include "util/u_endian.h"
+
+static inline bool
+vk_format_is_alpha_on_msb(VkFormat vk_format)
+{
+ const struct util_format_description *desc =
+ vk_format_description(vk_format);
+
+ return (desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
+ desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) &&
+#if defined(UTIL_ARCH_BIG_ENDIAN)
+ desc->swizzle[3] == PIPE_SWIZZLE_X;
+#else
+ desc->swizzle[3] == PIPE_SWIZZLE_W;
+#endif
+}
+
+static inline boolean
+vk_format_has_alpha(VkFormat vk_format)
+{
+ return util_format_has_alpha(vk_format_to_pipe_format(vk_format));
+}
+
+static inline boolean
+vk_format_is_pure_integer(VkFormat vk_format)
+{
+ return util_format_is_pure_integer(vk_format_to_pipe_format(vk_format));
+}
+
+static inline uint
+vk_format_get_blocksizebits(VkFormat vk_format)
+{
+ return util_format_get_blocksizebits(vk_format_to_pipe_format(vk_format));
+}
+
+static inline uint
+vk_format_get_channel_width(VkFormat vk_format, uint32_t channel)
+{
+ const struct util_format_description *desc =
+ vk_format_description(vk_format);
+
+ return desc->channel[channel].size;
+}
+
+static inline boolean
+vk_format_has_32bit_component(VkFormat vk_format)
+{
+ const struct util_format_description *desc =
+ vk_format_description(vk_format);
+
+ for (uint32_t i = 0; i < desc->nr_channels; i++) {
+ if (desc->channel[i].size == 32U)
+ return true;
+ }
+
+ return false;
+}
+
+static inline uint
+vk_format_get_component_size_in_bits(VkFormat vk_format,
+ enum util_format_colorspace colorspace,
+ uint32_t component)
+{
+ return util_format_get_component_bits(vk_format_to_pipe_format(vk_format),
+ colorspace,
+ component);
+}
+
+static inline boolean
+vk_format_is_normalized(VkFormat vk_format)
+{
+ const struct util_format_description *desc =
+ vk_format_description(vk_format);
+
+ for (uint32_t i = 0; i < desc->nr_channels; i++) {
+ if (!desc->channel[i].normalized)
+ return false;
+ }
+
+ return true;
+}
+
+#endif /* VK_FORMAT_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <vulkan/vulkan.h>
+
+#include "pvr_drm_public.h"
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+
+struct pvr_winsys *pvr_drm_winsys_create(int master_fd,
+ int render_fd,
+ const VkAllocationCallbacks *alloc)
+{
+ pvr_finishme("Add implementation once powervr UAPI is stable.");
+
+ return NULL;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRM_PUBLIC_H
+#define PVR_DRM_PUBLIC_H
+
+#include <vulkan/vulkan.h>
+
+#include "pvr_winsys.h"
+
+struct pvr_winsys *pvr_drm_winsys_create(int master_fd,
+ int render_fd,
+ const VkAllocationCallbacks *alloc);
+
+#endif /* PVR_DRM_PUBLIC_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <vulkan/vulkan.h>
+#include <xf86drm.h>
+
+#include "powervr/pvr_drm_public.h"
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+#include "vk_log.h"
+
+#if defined(PVR_SUPPORT_SERVICES_DRIVER)
+# include "pvrsrvkm/pvr_srv_public.h"
+#endif
+
+void pvr_winsys_destroy(struct pvr_winsys *ws)
+{
+ ws->ops->destroy(ws);
+}
+
+struct pvr_winsys *pvr_winsys_create(int master_fd,
+ int render_fd,
+ const VkAllocationCallbacks *alloc)
+{
+#if defined(PVR_SUPPORT_SERVICES_DRIVER)
+ drmVersionPtr version;
+ bool services_driver;
+
+ version = drmGetVersion(render_fd);
+ if (!version) {
+ vk_errorf(NULL,
+ VK_ERROR_INCOMPATIBLE_DRIVER,
+ "Failed to query kernel driver version for device.");
+ return NULL;
+ }
+
+ if (strcmp(version->name, "pvr") == 0) {
+ services_driver = true;
+ } else if (strcmp(version->name, "powervr") == 0) {
+ services_driver = false;
+ } else {
+ drmFreeVersion(version);
+ vk_errorf(
+ NULL,
+ VK_ERROR_INCOMPATIBLE_DRIVER,
+ "Device does not use any of the supported pvrsrvkm or powervr kernel driver.");
+ return NULL;
+ }
+
+ drmFreeVersion(version);
+
+ if (services_driver)
+ return pvr_srv_winsys_create(master_fd, render_fd, alloc);
+#endif
+
+ return pvr_drm_winsys_create(master_fd, render_fd, alloc);
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Based on radv_radeon_winsys.h which is:
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_WINSYS_H
+#define PVR_WINSYS_H
+
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "pvr_rogue_fw.h"
+#include "pvr_limits.h"
+#include "util/macros.h"
+#include "util/vma.h"
+
+struct pvr_device_info;
+
+/* device virtual address */
+typedef struct pvr_dev_addr {
+ uint64_t addr;
+} pvr_dev_addr_t;
+
+/* clang-format off */
+#define PVR_DEV_ADDR_INVALID (pvr_dev_addr_t){ .addr = 0 }
+/* clang-format on */
+
+struct pvr_winsys_heaps {
+ struct pvr_winsys_heap *general_heap;
+ struct pvr_winsys_heap *pds_heap;
+ struct pvr_winsys_heap *rgn_hdr_heap;
+ struct pvr_winsys_heap *usc_heap;
+};
+
+struct pvr_winsys_static_data_offsets {
+ uint64_t eot;
+ uint64_t fence;
+ uint64_t vdm_sync;
+ uint64_t yuv_csc;
+};
+
+struct pvr_winsys_heap {
+ struct pvr_winsys *ws;
+
+ pvr_dev_addr_t base_addr;
+ pvr_dev_addr_t reserved_addr;
+
+ uint64_t size;
+ uint64_t reserved_size;
+
+ uint32_t page_size;
+ uint32_t log2_page_size;
+
+ struct util_vma_heap vma_heap;
+ int ref_count;
+ pthread_mutex_t lock;
+
+ /* These are the offsets from the base at which static data might be
+ * uploaded. Some of these might be invalid since the kernel might not
+ * return all of these offsets per each heap as they might not be
+ * applicable.
+ * You should know which to use beforehand. There should be no need to check
+ * whether an offset is valid or invalid.
+ */
+ struct pvr_winsys_static_data_offsets static_data_offsets;
+};
+
+enum pvr_winsys_bo_type {
+ PVR_WINSYS_BO_TYPE_GPU = 0,
+ PVR_WINSYS_BO_TYPE_DISPLAY = 1,
+};
+
+/**
+ * \brief Flag passed to #pvr_winsys_ops.buffer_create to indicate that the
+ * buffer should be CPU accessible. This is required in order to map the buffer
+ * using #pvr_winsys_ops.buffer_map.
+ */
+#define PVR_WINSYS_BO_FLAG_CPU_ACCESS BITFIELD_BIT(0U)
+/**
+ * \brief Flag passed to #pvr_winsys_ops.buffer_create to indicate that, when
+ * the buffer is mapped to the GPU using #pvr_winsys.vma_map, it should be
+ * mapped uncached.
+ */
+#define PVR_WINSYS_BO_FLAG_GPU_UNCACHED BITFIELD_BIT(1U)
+/**
+ * \brief Flag passed to #pvr_winsys_ops.buffer_create to indicate that, when
+ * the buffer is mapped to the GPU using #pvr_winsys.vma_map, it should only be
+ * accessible to the Parameter Manager unit and firmware processor.
+ */
+#define PVR_WINSYS_BO_FLAG_PM_FW_PROTECT BITFIELD_BIT(2U)
+/**
+ * \brief Flag passed to #pvr_winsys_ops.buffer_create to indicate that the
+ * buffer should be zeroed at allocation time.
+ */
+#define PVR_WINSYS_BO_FLAG_ZERO_ON_ALLOC BITFIELD_BIT(3U)
+
+struct pvr_winsys_bo {
+ struct pvr_winsys *ws;
+ void *map;
+ uint64_t size;
+
+ bool is_imported;
+};
+
+struct pvr_winsys_vma {
+ struct pvr_winsys_heap *heap;
+
+ /* Buffer and offset this vma is bound to. */
+ struct pvr_winsys_bo *bo;
+ VkDeviceSize bo_offset;
+
+ pvr_dev_addr_t dev_addr;
+ uint64_t size;
+ uint64_t mapped_size;
+};
+
+struct pvr_winsys_syncobj {
+ struct pvr_winsys *ws;
+};
+
+struct pvr_winsys_free_list {
+ struct pvr_winsys *ws;
+};
+
+struct pvr_winsys_rt_dataset_create_info {
+ /* Local freelist */
+ struct pvr_winsys_free_list *local_free_list;
+
+ /* ISP register values */
+ uint32_t isp_merge_lower_x;
+ uint32_t isp_merge_lower_y;
+ uint32_t isp_merge_scale_x;
+ uint32_t isp_merge_scale_y;
+ uint32_t isp_merge_upper_x;
+ uint32_t isp_merge_upper_y;
+ uint32_t isp_mtile_size;
+
+ /* PPP register values */
+ uint64_t ppp_multi_sample_ctl;
+ uint64_t ppp_multi_sample_ctl_y_flipped;
+ uint32_t ppp_screen;
+
+ /* TE register values */
+ uint32_t te_aa;
+ uint32_t te_mtile1;
+ uint32_t te_mtile2;
+ uint32_t te_screen;
+
+ /* Allocations and associated information */
+ pvr_dev_addr_t vheap_table_dev_addr;
+ pvr_dev_addr_t rtc_dev_addr;
+
+ pvr_dev_addr_t tpc_dev_addr;
+ uint32_t tpc_stride;
+ uint32_t tpc_size;
+
+ struct {
+ pvr_dev_addr_t pm_mlist_dev_addr;
+ pvr_dev_addr_t macrotile_array_dev_addr;
+ pvr_dev_addr_t rgn_header_dev_addr;
+ } rt_datas[ROGUE_NUM_RTDATAS];
+ uint64_t rgn_header_size;
+
+ /* Miscellaneous */
+ uint32_t mtile_stride;
+ uint16_t max_rts;
+};
+
+struct pvr_winsys_rt_dataset {
+ struct pvr_winsys *ws;
+};
+
+enum pvr_winsys_ctx_priority {
+ PVR_WINSYS_CTX_PRIORITY_LOW,
+ PVR_WINSYS_CTX_PRIORITY_MEDIUM,
+ PVR_WINSYS_CTX_PRIORITY_HIGH,
+};
+
+struct pvr_winsys_render_ctx_create_info {
+ enum pvr_winsys_ctx_priority priority;
+ pvr_dev_addr_t vdm_callstack_addr;
+
+ struct pvr_winsys_render_ctx_static_state {
+ uint64_t vdm_ctx_state_base_addr;
+ uint64_t geom_ctx_state_base_addr;
+
+ struct {
+ uint64_t vdm_ctx_store_task0;
+ uint32_t vdm_ctx_store_task1;
+ uint64_t vdm_ctx_store_task2;
+
+ uint64_t vdm_ctx_resume_task0;
+ uint32_t vdm_ctx_resume_task1;
+ uint64_t vdm_ctx_resume_task2;
+ } geom_state[2];
+ } static_state;
+};
+
+struct pvr_winsys_render_ctx {
+ struct pvr_winsys *ws;
+};
+
+struct pvr_winsys_compute_ctx_create_info {
+ enum pvr_winsys_ctx_priority priority;
+
+ struct pvr_winsys_compute_ctx_static_state {
+ uint64_t cdm_ctx_state_base_addr;
+
+ uint64_t cdm_ctx_store_pds0;
+ uint64_t cdm_ctx_store_pds0_b;
+ uint64_t cdm_ctx_store_pds1;
+
+ uint64_t cdm_ctx_terminate_pds;
+ uint64_t cdm_ctx_terminate_pds1;
+
+ uint64_t cdm_ctx_resume_pds0;
+ uint64_t cdm_ctx_resume_pds0_b;
+ } static_state;
+};
+
+struct pvr_winsys_compute_ctx {
+ struct pvr_winsys *ws;
+};
+
+#define PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP BITFIELD_BIT(0U)
+#define PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE BITFIELD_BIT(1U)
+
+struct pvr_winsys_compute_submit_info {
+ uint32_t frame_num;
+ uint32_t job_num;
+
+ /* semaphores and stage_flags are arrays of length semaphore_count. */
+ const VkSemaphore *semaphores;
+ uint32_t *stage_flags;
+ uint32_t semaphore_count;
+
+ struct {
+ uint64_t tpu_border_colour_table;
+ uint64_t cdm_item;
+ uint64_t compute_cluster;
+ uint64_t cdm_ctrl_stream_base;
+ uint32_t tpu;
+ uint32_t cdm_resume_pds1;
+ } regs;
+
+ /* Must be 0 or a combination of PVR_WINSYS_COMPUTE_FLAG_* flags. */
+ uint32_t flags;
+};
+
+#define PVR_WINSYS_JOB_BO_FLAG_WRITE BITFIELD_BIT(0U)
+
+struct pvr_winsys_job_bo {
+ struct pvr_winsys_bo *bo;
+ /* Must be 0 or a combination of PVR_WINSYS_JOB_BO_FLAG_* flags. */
+ uint32_t flags;
+};
+
+#define PVR_WINSYS_GEOM_FLAG_FIRST_GEOMETRY BITFIELD_BIT(0U)
+#define PVR_WINSYS_GEOM_FLAG_LAST_GEOMETRY BITFIELD_BIT(1U)
+#define PVR_WINSYS_GEOM_FLAG_SINGLE_CORE BITFIELD_BIT(2U)
+
+#define PVR_WINSYS_FRAG_FLAG_DEPTH_BUFFER_PRESENT BITFIELD_BIT(0U)
+#define PVR_WINSYS_FRAG_FLAG_STENCIL_BUFFER_PRESENT BITFIELD_BIT(1U)
+#define PVR_WINSYS_FRAG_FLAG_PREVENT_CDM_OVERLAP BITFIELD_BIT(2U)
+#define PVR_WINSYS_FRAG_FLAG_SINGLE_CORE BITFIELD_BIT(3U)
+
+struct pvr_winsys_render_submit_info {
+ struct pvr_winsys_rt_dataset *rt_dataset;
+ uint8_t rt_data_idx;
+
+ uint32_t frame_num;
+ uint32_t job_num;
+
+ uint32_t bo_count;
+ const struct pvr_winsys_job_bo *bos;
+
+ /* FIXME: should this be flags instead? */
+ bool run_frag;
+
+ /* semaphores and stage_flags are arrays of length semaphore_count. */
+ const VkSemaphore *semaphores;
+ uint32_t *stage_flags;
+ uint32_t semaphore_count;
+
+ struct pvr_winsys_geometry_state {
+ struct {
+ uint32_t pds_ctrl;
+ uint32_t ppp_ctrl;
+ uint32_t te_psg;
+ uint32_t tpu;
+ uint64_t tpu_border_colour_table;
+ uint64_t vdm_ctrl_stream_base;
+ uint32_t vdm_ctx_resume_task0_size;
+ } regs;
+
+ /* Must be 0 or a combination of PVR_WINSYS_GEOM_FLAG_* flags. */
+ uint32_t flags;
+ } geometry;
+
+ struct pvr_winsys_fragment_state {
+ struct {
+ uint32_t event_pixel_pds_data;
+ uint32_t event_pixel_pds_info;
+ uint32_t isp_aa;
+ uint32_t isp_bgobjdepth;
+ uint32_t isp_bgobjvals;
+ uint32_t isp_ctl;
+ uint64_t isp_dbias_base;
+ uint64_t isp_oclqry_base;
+ uint64_t isp_scissor_base;
+ uint64_t isp_stencil_load_store_base;
+ uint64_t isp_zload_store_base;
+ uint64_t isp_zlsctl;
+ uint64_t isp_zls_pixels;
+ uint64_t pbe_word[PVR_MAX_COLOR_ATTACHMENTS]
+ [ROGUE_NUM_PBESTATE_REG_WORDS];
+ uint32_t pixel_phantom;
+ uint64_t pds_bgnd[ROGUE_NUM_CR_PDS_BGRND_WORDS];
+ uint64_t pds_pr_bgnd[ROGUE_NUM_CR_PDS_BGRND_WORDS];
+ uint32_t tpu;
+ uint64_t tpu_border_colour_table;
+ uint32_t usc_pixel_output_ctrl;
+ } regs;
+
+ /* Must be 0 or a combination of PVR_WINSYS_FRAG_FLAG_* flags. */
+ uint32_t flags;
+ uint32_t zls_stride;
+ uint32_t sls_stride;
+ } fragment;
+};
+
+struct pvr_winsys_ops {
+ void (*destroy)(struct pvr_winsys *ws);
+ int (*device_info_init)(struct pvr_winsys *ws,
+ struct pvr_device_info *dev_info);
+ void (*get_heaps_info)(struct pvr_winsys *ws,
+ struct pvr_winsys_heaps *heaps);
+
+ VkResult (*buffer_create)(struct pvr_winsys *ws,
+ uint64_t size,
+ uint64_t alignment,
+ enum pvr_winsys_bo_type type,
+ uint32_t flags,
+ struct pvr_winsys_bo **const bo_out);
+ VkResult (*buffer_create_from_fd)(struct pvr_winsys *ws,
+ int fd,
+ struct pvr_winsys_bo **const bo_out);
+ void (*buffer_destroy)(struct pvr_winsys_bo *bo);
+
+ VkResult (*buffer_get_fd)(struct pvr_winsys_bo *bo, int *const fd_out);
+
+ void *(*buffer_map)(struct pvr_winsys_bo *bo);
+ void (*buffer_unmap)(struct pvr_winsys_bo *bo);
+
+ struct pvr_winsys_vma *(*heap_alloc)(struct pvr_winsys_heap *heap,
+ uint64_t size,
+ uint64_t alignment);
+ void (*heap_free)(struct pvr_winsys_vma *vma);
+
+ pvr_dev_addr_t (*vma_map)(struct pvr_winsys_vma *vma,
+ struct pvr_winsys_bo *bo,
+ uint64_t offset,
+ uint64_t size);
+ void (*vma_unmap)(struct pvr_winsys_vma *vma);
+
+ VkResult (*syncobj_create)(struct pvr_winsys *ws,
+ bool signaled,
+ struct pvr_winsys_syncobj **const syncobj_out);
+ void (*syncobj_destroy)(struct pvr_winsys_syncobj *syncobj);
+ VkResult (*syncobjs_reset)(struct pvr_winsys *ws,
+ struct pvr_winsys_syncobj **const syncobjs,
+ uint32_t count);
+ VkResult (*syncobjs_signal)(struct pvr_winsys *ws,
+ struct pvr_winsys_syncobj **const syncobjs,
+ uint32_t count);
+ VkResult (*syncobjs_wait)(struct pvr_winsys *ws,
+ struct pvr_winsys_syncobj **const syncobjs,
+ uint32_t count,
+ bool wait_all,
+ uint64_t timeout);
+ VkResult (*syncobjs_merge)(struct pvr_winsys_syncobj *src,
+ struct pvr_winsys_syncobj *target,
+ struct pvr_winsys_syncobj **out);
+
+ VkResult (*free_list_create)(
+ struct pvr_winsys *ws,
+ struct pvr_winsys_vma *free_list_vma,
+ uint32_t initial_num_pages,
+ uint32_t max_num_pages,
+ uint32_t grow_num_pages,
+ uint32_t grow_threshold,
+ struct pvr_winsys_free_list *parent_free_list,
+ struct pvr_winsys_free_list **const free_list_out);
+ void (*free_list_destroy)(struct pvr_winsys_free_list *free_list);
+
+ VkResult (*render_target_dataset_create)(
+ struct pvr_winsys *ws,
+ const struct pvr_winsys_rt_dataset_create_info *create_info,
+ struct pvr_winsys_rt_dataset **const rt_dataset_out);
+ void (*render_target_dataset_destroy)(
+ struct pvr_winsys_rt_dataset *rt_dataset);
+
+ VkResult (*render_ctx_create)(
+ struct pvr_winsys *ws,
+ struct pvr_winsys_render_ctx_create_info *create_info,
+ struct pvr_winsys_render_ctx **const ctx_out);
+ void (*render_ctx_destroy)(struct pvr_winsys_render_ctx *ctx);
+ VkResult (*render_submit)(
+ const struct pvr_winsys_render_ctx *ctx,
+ const struct pvr_winsys_render_submit_info *submit_info,
+ struct pvr_winsys_syncobj **const syncobj_geom_out,
+ struct pvr_winsys_syncobj **const syncobj_frag_out);
+
+ VkResult (*compute_ctx_create)(
+ struct pvr_winsys *ws,
+ const struct pvr_winsys_compute_ctx_create_info *create_info,
+ struct pvr_winsys_compute_ctx **const ctx_out);
+ void (*compute_ctx_destroy)(struct pvr_winsys_compute_ctx *ctx);
+ VkResult (*compute_submit)(
+ const struct pvr_winsys_compute_ctx *ctx,
+ const struct pvr_winsys_compute_submit_info *submit_info,
+ struct pvr_winsys_syncobj **const syncobj_out);
+};
+
+struct pvr_winsys {
+ uint64_t page_size;
+ uint32_t log2_page_size;
+
+ const struct pvr_winsys_ops *ops;
+};
+
+void pvr_winsys_destroy(struct pvr_winsys *ws);
+struct pvr_winsys *pvr_winsys_create(int master_fd,
+ int render_fd,
+ const VkAllocationCallbacks *alloc);
+
+#endif /* PVR_WINSYS_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+#include <xf86drm.h>
+
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+#include "pvr_winsys_helper.h"
+#include "util/u_atomic.h"
+#include "vk_log.h"
+
+int pvr_winsys_helper_display_buffer_create(int master_fd,
+ uint64_t size,
+ uint32_t *const handle_out)
+{
+ struct drm_mode_create_dumb args = {
+ .width = size,
+ .height = 1,
+ .bpp = 8,
+ };
+ int ret;
+
+ ret = drmIoctl(master_fd, DRM_IOCTL_MODE_CREATE_DUMB, &args);
+ if (ret)
+ return ret;
+
+ *handle_out = args.handle;
+
+ return 0;
+}
+
+int pvr_winsys_helper_display_buffer_destroy(int master_fd, uint32_t handle)
+{
+ struct drm_mode_destroy_dumb args = {
+ .handle = handle,
+ };
+
+ return drmIoctl(master_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &args);
+}
+
+/* reserved_size can be 0 when no reserved area is needed. reserved_address must
+ * be 0 if reserved_size is 0.
+ */
+VkResult pvr_winsys_helper_winsys_heap_init(
+ struct pvr_winsys *const ws,
+ pvr_dev_addr_t base_address,
+ uint64_t size,
+ pvr_dev_addr_t reserved_address,
+ uint64_t reserved_size,
+ uint32_t log2_page_size,
+ const struct pvr_winsys_static_data_offsets *const static_data_offsets,
+ struct pvr_winsys_heap *const heap)
+{
+ const bool reserved_area_bottom_of_heap = reserved_address.addr ==
+ base_address.addr;
+ const uint64_t vma_heap_begin_addr =
+ base_address.addr +
+ (uint64_t)reserved_area_bottom_of_heap * reserved_size;
+ const uint64_t vma_heap_size = size - reserved_size;
+
+ assert(base_address.addr);
+ assert(reserved_size <= size);
+
+ /* As per the reserved_base powervr-km uapi documentation the reserved
+ * region can only be at the beginning of the heap or at the end.
+ * reserved_address is 0 if there is no reserved region.
+ * pvrsrv-km doesn't explicitly provide this info and it's assumed that it's
+ * always at the beginning.
+ */
+ assert(reserved_area_bottom_of_heap ||
+ reserved_address.addr + reserved_size == base_address.addr + size ||
+ (!reserved_address.addr && !reserved_size));
+
+ heap->ws = ws;
+ heap->base_addr = base_address;
+ heap->reserved_addr = reserved_address;
+
+ heap->size = size;
+ heap->reserved_size = reserved_size;
+
+ heap->page_size = 1 << log2_page_size;
+ heap->log2_page_size = log2_page_size;
+
+ util_vma_heap_init(&heap->vma_heap, vma_heap_begin_addr, vma_heap_size);
+
+ heap->vma_heap.alloc_high = false;
+
+ /* It's expected that the heap destroy function to be the last thing that's
+ * called, so we start the ref_count at 0.
+ */
+ p_atomic_set(&heap->ref_count, 0);
+
+ if (pthread_mutex_init(&heap->lock, NULL))
+ return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED);
+
+ heap->static_data_offsets = *static_data_offsets;
+
+ return VK_SUCCESS;
+}
+
+bool pvr_winsys_helper_winsys_heap_finish(struct pvr_winsys_heap *const heap)
+{
+ if (p_atomic_read(&heap->ref_count) != 0)
+ return false;
+
+ pthread_mutex_destroy(&heap->lock);
+ util_vma_heap_finish(&heap->vma_heap);
+
+ return true;
+}
+
+bool pvr_winsys_helper_heap_alloc(struct pvr_winsys_heap *const heap,
+ uint64_t size,
+ uint64_t alignment,
+ struct pvr_winsys_vma *const vma_out)
+{
+ struct pvr_winsys_vma vma = {
+ .heap = heap,
+ };
+
+ assert(util_is_power_of_two_nonzero(alignment));
+
+ /* pvr_srv_winsys_buffer_create() page aligns the size. We must do the same
+ * here to ensure enough heap space is allocated to be able to map the
+ * buffer to the GPU.
+ * We have to do this for the powervr kernel mode driver as well, as it
+ * returns a page aligned size when allocating buffers.
+ */
+ alignment = MAX2(alignment, heap->page_size);
+
+ size = ALIGN_POT(size, alignment);
+ vma.size = size;
+
+ pthread_mutex_lock(&heap->lock);
+ vma.dev_addr.addr =
+ util_vma_heap_alloc(&heap->vma_heap, size, heap->page_size);
+ pthread_mutex_unlock(&heap->lock);
+
+ if (!vma.dev_addr.addr) {
+ vk_error(NULL, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+ return false;
+ }
+
+ p_atomic_inc(&heap->ref_count);
+
+ *vma_out = vma;
+
+ return true;
+}
+
+void pvr_winsys_helper_heap_free(struct pvr_winsys_vma *const vma)
+{
+ struct pvr_winsys_heap *const heap = vma->heap;
+
+ /* A vma with an existing device mapping should not be freed. */
+ assert(!vma->bo);
+
+ pthread_mutex_lock(&heap->lock);
+ util_vma_heap_free(&heap->vma_heap, vma->dev_addr.addr, vma->size);
+ pthread_mutex_unlock(&heap->lock);
+
+ p_atomic_dec(&heap->ref_count);
+}
+
+/* Note: the function assumes the heap allocation in the reserved memory area
+ * can be freed with the regular heap allocation free function. The free
+ * function gets called on mapping failure.
+ */
+static VkResult
+pvr_buffer_create_and_map(struct pvr_winsys *const ws,
+ heap_alloc_reserved_func heap_alloc_reserved,
+ struct pvr_winsys_heap *heap,
+ pvr_dev_addr_t dev_addr,
+ uint64_t size,
+ uint64_t alignment,
+ struct pvr_winsys_vma **const vma_out)
+{
+ struct pvr_winsys_vma *vma;
+ struct pvr_winsys_bo *bo;
+ pvr_dev_addr_t addr;
+ VkResult result;
+
+ /* Address should not be NULL, this function is used to allocate and map
+ * reserved addresses and is only supposed to be used internally.
+ */
+ assert(dev_addr.addr);
+
+ result = ws->ops->buffer_create(ws,
+ size,
+ alignment,
+ PVR_WINSYS_BO_TYPE_GPU,
+ PVR_WINSYS_BO_FLAG_CPU_ACCESS,
+ &bo);
+ if (result != VK_SUCCESS)
+ return result;
+
+ vma = heap_alloc_reserved(heap, dev_addr, size, alignment);
+ if (!vma) {
+ result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+ goto err_pvr_winsys_buffer_destroy;
+ }
+
+ addr = ws->ops->vma_map(vma, bo, 0, size);
+ if (!addr.addr) {
+ result = VK_ERROR_MEMORY_MAP_FAILED;
+ goto err_pvr_winsys_heap_free;
+ }
+
+ /* Note this won't destroy bo as its being used by VMA, once vma is
+ * unmapped, bo will be destroyed automatically.
+ */
+ ws->ops->buffer_destroy(bo);
+
+ *vma_out = vma;
+
+ return VK_SUCCESS;
+
+err_pvr_winsys_heap_free:
+ ws->ops->heap_free(vma);
+
+err_pvr_winsys_buffer_destroy:
+ ws->ops->buffer_destroy(bo);
+
+ return result;
+}
+
+static void inline pvr_buffer_destroy_and_unmap(struct pvr_winsys_vma *vma)
+{
+ const struct pvr_winsys *const ws = vma->heap->ws;
+
+ /* Buffer object associated with the vma will be automatically destroyed
+ * once vma is unmapped.
+ */
+ ws->ops->vma_unmap(vma);
+ ws->ops->heap_free(vma);
+}
+
+VkResult pvr_winsys_helper_allocate_static_memory(
+ struct pvr_winsys *const ws,
+ heap_alloc_reserved_func heap_alloc_reserved,
+ struct pvr_winsys_heap *const general_heap,
+ struct pvr_winsys_heap *const pds_heap,
+ struct pvr_winsys_heap *const usc_heap,
+ struct pvr_winsys_vma **const general_vma_out,
+ struct pvr_winsys_vma **const pds_vma_out,
+ struct pvr_winsys_vma **const usc_vma_out)
+{
+ struct pvr_winsys_vma *general_vma;
+ struct pvr_winsys_vma *pds_vma;
+ struct pvr_winsys_vma *usc_vma;
+ VkResult result;
+
+ result = pvr_buffer_create_and_map(ws,
+ heap_alloc_reserved,
+ general_heap,
+ general_heap->reserved_addr,
+ general_heap->reserved_size,
+ general_heap->page_size,
+ &general_vma);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pvr_buffer_create_and_map(ws,
+ heap_alloc_reserved,
+ pds_heap,
+ pds_heap->reserved_addr,
+ pds_heap->reserved_size,
+ pds_heap->page_size,
+ &pds_vma);
+ if (result != VK_SUCCESS)
+ goto err_pvr_buffer_destroy_and_unmap_general;
+
+ result = pvr_buffer_create_and_map(ws,
+ heap_alloc_reserved,
+ usc_heap,
+ usc_heap->reserved_addr,
+ pds_heap->reserved_size,
+ usc_heap->page_size,
+ &usc_vma);
+ if (result != VK_SUCCESS)
+ goto err_pvr_buffer_destroy_and_unmap_pds;
+
+ *general_vma_out = general_vma;
+ *pds_vma_out = pds_vma;
+ *usc_vma_out = usc_vma;
+
+ return VK_SUCCESS;
+
+err_pvr_buffer_destroy_and_unmap_pds:
+ pvr_buffer_destroy_and_unmap(pds_vma);
+
+err_pvr_buffer_destroy_and_unmap_general:
+ pvr_buffer_destroy_and_unmap(general_vma);
+
+ return result;
+}
+
+void pvr_winsys_helper_free_static_memory(
+ struct pvr_winsys_vma *const general_vma,
+ struct pvr_winsys_vma *const pds_vma,
+ struct pvr_winsys_vma *const usc_vma)
+{
+ pvr_buffer_destroy_and_unmap(usc_vma);
+ pvr_buffer_destroy_and_unmap(pds_vma);
+ pvr_buffer_destroy_and_unmap(general_vma);
+}
+
+static void pvr_setup_static_vdm_sync(uint8_t *const pds_ptr,
+ uint64_t pds_sync_offset_in_bytes,
+ uint8_t *const usc_ptr,
+ uint64_t usc_sync_offset_in_bytes)
+{
+ /* TODO: this needs to be auto-generated */
+ const uint8_t state_update[] = { 0x44, 0xA0, 0x80, 0x05,
+ 0x00, 0x00, 0x00, 0xFF };
+
+ struct pvr_pds_kickusc_program ppp_state_update_program = { 0 };
+
+ memcpy(usc_ptr + usc_sync_offset_in_bytes,
+ state_update,
+ sizeof(state_update));
+
+ pvr_pds_setup_doutu(&ppp_state_update_program.usc_task_control,
+ usc_sync_offset_in_bytes,
+ 0,
+ PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
+ false);
+
+ pvr_pds_kick_usc(&ppp_state_update_program,
+ (uint32_t *)&pds_ptr[pds_sync_offset_in_bytes],
+ 0,
+ false,
+ PDS_GENERATE_CODEDATA_SEGMENTS);
+}
+
+static void
+pvr_setup_static_pixel_event_program(uint8_t *const pds_ptr,
+ uint64_t pds_eot_offset_in_bytes)
+{
+ struct pvr_pds_event_program pixel_event_program = { 0 };
+
+ pvr_pds_generate_pixel_event(&pixel_event_program,
+ (uint32_t *)&pds_ptr[pds_eot_offset_in_bytes],
+ PDS_GENERATE_CODE_SEGMENT,
+ NULL);
+}
+
+VkResult
+pvr_winsys_helper_fill_static_memory(struct pvr_winsys *const ws,
+ struct pvr_winsys_vma *const general_vma,
+ struct pvr_winsys_vma *const pds_vma,
+ struct pvr_winsys_vma *const usc_vma)
+{
+ uint8_t *general_ptr, *pds_ptr, *usc_ptr;
+ VkResult result;
+
+ general_ptr = ws->ops->buffer_map(general_vma->bo);
+ if (!general_ptr)
+ return VK_ERROR_MEMORY_MAP_FAILED;
+
+ pds_ptr = ws->ops->buffer_map(pds_vma->bo);
+ if (!pds_ptr) {
+ result = VK_ERROR_MEMORY_MAP_FAILED;
+ goto error_pvr_srv_winsys_buffer_unmap_general;
+ }
+
+ usc_ptr = ws->ops->buffer_map(usc_vma->bo);
+ if (!usc_ptr) {
+ result = VK_ERROR_MEMORY_MAP_FAILED;
+ goto error_pvr_srv_winsys_buffer_unmap_pds;
+ }
+
+ pvr_setup_static_vdm_sync(pds_ptr,
+ pds_vma->heap->static_data_offsets.vdm_sync,
+ usc_ptr,
+ usc_vma->heap->static_data_offsets.vdm_sync);
+
+ pvr_setup_static_pixel_event_program(pds_ptr,
+ pds_vma->heap->static_data_offsets.eot);
+
+ /* TODO: Complete control block copying work. */
+
+ ws->ops->buffer_unmap(usc_vma->bo);
+ ws->ops->buffer_unmap(pds_vma->bo);
+ ws->ops->buffer_unmap(general_vma->bo);
+
+ return VK_SUCCESS;
+
+error_pvr_srv_winsys_buffer_unmap_pds:
+ ws->ops->buffer_unmap(pds_vma->bo);
+
+error_pvr_srv_winsys_buffer_unmap_general:
+ ws->ops->buffer_unmap(general_vma->bo);
+
+ return result;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_WINSYS_HELPER_H
+#define PVR_WINSYS_HELPER_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "pvr_winsys.h"
+
+typedef struct pvr_winsys_vma *(*const heap_alloc_reserved_func)(
+ struct pvr_winsys_heap *const heap,
+ const pvr_dev_addr_t reserved_dev_addr,
+ uint64_t size,
+ uint64_t alignment);
+
+int pvr_winsys_helper_display_buffer_create(int master_fd,
+ uint64_t size,
+ uint32_t *const handle_out);
+int pvr_winsys_helper_display_buffer_destroy(int master_fd, uint32_t handle);
+
+VkResult pvr_winsys_helper_winsys_heap_init(
+ struct pvr_winsys *const ws,
+ pvr_dev_addr_t base_address,
+ uint64_t size,
+ pvr_dev_addr_t reserved_address,
+ uint64_t reserved_size,
+ uint32_t log2_page_size,
+ const struct pvr_winsys_static_data_offsets *const static_data_offsets,
+ struct pvr_winsys_heap *const heap);
+bool pvr_winsys_helper_winsys_heap_finish(struct pvr_winsys_heap *const heap);
+
+bool pvr_winsys_helper_heap_alloc(struct pvr_winsys_heap *const heap,
+ uint64_t size,
+ uint64_t alignment,
+ struct pvr_winsys_vma *const vma);
+void pvr_winsys_helper_heap_free(struct pvr_winsys_vma *const vma);
+
+VkResult pvr_winsys_helper_allocate_static_memory(
+ struct pvr_winsys *const ws,
+ heap_alloc_reserved_func heap_alloc_reserved,
+ struct pvr_winsys_heap *const general_heap,
+ struct pvr_winsys_heap *const pds_heap,
+ struct pvr_winsys_heap *const usc_heap,
+ struct pvr_winsys_vma **const general_vma_out,
+ struct pvr_winsys_vma **const pds_vma_out,
+ struct pvr_winsys_vma **const usc_vma_out);
+void pvr_winsys_helper_free_static_memory(
+ struct pvr_winsys_vma *const general_vma,
+ struct pvr_winsys_vma *const pds_vma,
+ struct pvr_winsys_vma *const usc_vma);
+
+VkResult
+pvr_winsys_helper_fill_static_memory(struct pvr_winsys *const ws,
+ struct pvr_winsys_vma *const general_vma,
+ struct pvr_winsys_vma *const pds_vma,
+ struct pvr_winsys_vma *const usc_vma);
+
+#endif /* PVR_WINSYS_HELPER_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_ROGUE_FWIF_H
+#define PVR_ROGUE_FWIF_H
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "pvr_rogue_fwif_shared.h"
+
+/** Indicates the number of RTDATAs per RTDATASET. */
+#define ROGUE_FWIF_NUM_RTDATAS 2U
+
+/** Render needs flipped sample positions. */
+#define ROGUE_FWIF_RENDERFLAGS_FLIP_SAMPLE_POSITIONS 0x00000001UL
+/**
+ * The scene has been aborted, free the parameters and dummy process to
+ * completion.
+ */
+#define ROGUE_FWIF_RENDERFLAGS_ABORT 0x00000002UL
+/** The TA before this was not marked as LAST. */
+#define ROGUE_FWIF_RENDERFLAGS_3D_ONLY 0x00000004UL
+/** Use single core in a multi core setup. */
+#define ROGUE_FWIF_RENDERFLAGS_SINGLE_CORE 0x00000008UL
+/**
+ * This render has visibility result associated with it. Setting this flag will
+ * cause the firmware to collect the visibility results.
+ */
+#define ROGUE_FWIF_RENDERFLAGS_GETVISRESULTS 0x00000020UL
+/** Indicates whether a depth buffer is present. */
+#define ROGUE_FWIF_RENDERFLAGS_DEPTHBUFFER 0x00000080UL
+/** Indicates whether a stencil buffer is present. */
+#define ROGUE_FWIF_RENDERFLAGS_STENCILBUFFER 0x00000100UL
+/** This render needs DRM Security. */
+#define ROGUE_FWIF_RENDERFLAGS_SECURE 0x00002000UL
+/**
+ * This flags goes in hand with ABORT and explicitly ensures no mem free is
+ * issued in case of first TA job.
+ */
+#define ROGUE_FWIF_RENDERFLAGS_ABORT_NOFREE 0x00004000UL
+/** Force disabling of pixel merging. */
+#define ROGUE_FWIF_RENDERFLAGS_DISABLE_PIXELMERGE 0x00008000UL
+
+/** Force 4 lines of coeffs on render. */
+#define ROGUE_FWIF_RENDERFLAGS_CSRM_MAX_COEFFS 0x00020000UL
+
+/** Partial render must write to scratch buffer. */
+#define ROGUE_FWIF_RENDERFLAGS_SPMSCRATCHBUFFER 0x00080000UL
+
+/** Render uses paired tile feature, empty tiles must always be enabled. */
+#define ROGUE_FWIF_RENDERFLAGS_PAIRED_TILES 0x00100000UL
+
+#define ROGUE_FWIF_RENDERFLAGS_RESERVED 0x01000000UL
+
+/** Disallow compute overlapped with this render. */
+#define ROGUE_FWIF_RENDERFLAGS_PREVENT_CDM_OVERLAP 0x04000000UL
+/**
+ * The host must indicate if this is the first and/or last command to be issued
+ * for the specified task.
+ */
+#define ROGUE_FWIF_TAFLAGS_FIRSTKICK 0x00000001UL
+#define ROGUE_FWIF_TAFLAGS_LASTKICK 0x00000002UL
+#define ROGUE_FWIF_TAFLAGS_FLIP_SAMPLE_POSITIONS 0x00000004UL
+/** Use single core in a multi core setup. */
+#define ROGUE_FWIF_TAFLAGS_SINGLE_CORE 0x00000008UL
+
+/** Enable Tile Region Protection for this TA. */
+#define ROGUE_FWIF_TAFLAGS_TRP 0x00000010UL
+
+/** Indicates the particular TA needs to be aborted. */
+#define ROGUE_FWIF_TAFLAGS_TA_ABORT 0x00000100UL
+#define ROGUE_FWIF_TAFLAGS_SECURE 0x00080000UL
+
+/**
+ * Indicates that the CSRM should be reconfigured to support maximum coeff
+ * space before this command is scheduled.
+ */
+#define ROGUE_FWIF_TAFLAGS_CSRM_MAX_COEFFS 0x00200000UL
+
+#define ROGUE_FWIF_TAFLAGS_PHR_TRIGGER 0x02000000UL
+
+/* Flags for transfer queue commands. */
+#define ROGUE_FWIF_CMDTRANSFER_FLAG_SECURE 0x00000001U
+/** Use single core in a multi core setup. */
+#define ROGUE_FWIF_CMDTRANSFER_SINGLE_CORE 0x00000002U
+
+/* Flags for 2D commands. */
+#define ROGUE_FWIF_CMD2D_FLAG_SECURE 0x00000001U
+
+#define ROGUE_FWIF_CMD3DTQ_SLICE_WIDTH_MASK 0x00000038UL
+#define ROGUE_FWIF_CMD3DTQ_SLICE_WIDTH_SHIFT (3)
+#define ROGUE_FWIF_CMD3DTQ_SLICE_GRANULARITY (0x10U)
+
+/* Flags for compute commands. */
+#define ROGUE_FWIF_COMPUTE_FLAG_SECURE 0x00000001U
+#define ROGUE_FWIF_COMPUTE_FLAG_PREVENT_ALL_OVERLAP 0x00000002U
+#define ROGUE_FWIF_COMPUTE_FLAG_FORCE_TPU_CLK 0x00000004U
+
+#define ROGUE_FWIF_COMPUTE_FLAG_PREVENT_ALL_NON_TAOOM_OVERLAP 0x00000010U
+
+/** Use single core in a multi core setup. */
+#define ROGUE_FWIF_COMPUTE_FLAG_SINGLE_CORE 0x00000020U
+
+/***********************************************
+ Parameter/HWRTData control structures.
+ ***********************************************/
+
+/**
+ * Configuration registers which need to be loaded by the firmware before a TA
+ * job can be started.
+ */
+struct rogue_fwif_ta_regs {
+ uint64_t vdm_ctrl_stream_base;
+ uint64_t tpu_border_colour_table;
+
+ uint32_t ppp_ctrl;
+ uint32_t te_psg;
+ uint32_t tpu;
+
+ uint32_t vdm_context_resume_task0_size;
+
+ /* FIXME: HIGH: FIX_HW_BRN_56279 changes the structure's layout, given we
+ * are supporting Features/ERNs/BRNs at runtime, we need to look into this
+ * and find a solution to keep layout intact.
+ */
+ /* Available if FIX_HW_BRN_56279 is present. */
+ uint32_t pds_ctrl;
+
+ uint32_t view_idx;
+};
+
+/**
+ * Represents a TA command that can be used to tile a whole scene's objects as
+ * per TA behavior.
+ */
+struct rogue_fwif_cmd_ta {
+ /**
+ * rogue_fwif_cmd_ta_3d_shared field must always be at the beginning of the
+ * struct.
+ *
+ * The command struct (rogue_fwif_cmd_ta) is shared between Client and
+ * Firmware. Kernel is unable to perform read/write operations on the
+ * command struct, the SHARED region is the only exception from this rule.
+ * This region must be the first member so that Kernel can easily access it.
+ * For more info, see pvr_cmd_ta_3d definition.
+ */
+ struct rogue_fwif_cmd_ta_3d_shared cmd_shared;
+
+ struct rogue_fwif_ta_regs ALIGN(8) geom_regs;
+ uint32_t ALIGN(8) flags;
+ /**
+ * Holds the TA/3D fence value to allow the 3D partial render command
+ * to go through.
+ */
+ struct rogue_fwif_ufo partial_render_ta_3d_fence;
+};
+
+static_assert(
+ offsetof(struct rogue_fwif_cmd_ta, cmd_shared) == 0U,
+ "rogue_fwif_cmd_ta_3d_shared must be the first member of rogue_fwif_cmd_ta");
+
+static_assert(
+ sizeof(struct rogue_fwif_cmd_ta) <= ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE,
+ "kernel expects command size be increased to match current TA command size");
+
+/**
+ * Configuration registers which need to be loaded by the firmware before ISP
+ * can be started.
+ */
+struct rogue_fwif_3d_regs {
+ /**
+ * All 32 bit values should be added in the top section. This then requires
+ * only a single ALIGN(8) to align all the 64 bit values in the second
+ * section.
+ */
+ uint32_t usc_pixel_output_ctrl;
+ /* FIXME: HIGH: RGX_MAXIMUM_OUTPUT_REGISTERS_PER_PIXEL changes the
+ * structure's layout.
+ */
+#define ROGUE_MAXIMUM_OUTPUT_REGISTERS_PER_PIXEL 8U
+ uint32_t usc_clear_register[ROGUE_MAXIMUM_OUTPUT_REGISTERS_PER_PIXEL];
+
+ uint32_t isp_bgobjdepth;
+ uint32_t isp_bgobjvals;
+ uint32_t isp_aa;
+ uint32_t isp_ctl;
+
+ uint32_t tpu;
+
+ uint32_t event_pixel_pds_info;
+
+ /* FIXME: HIGH: RGX_FEATURE_CLUSTER_GROUPING changes the structure's
+ * layout.
+ */
+ uint32_t pixel_phantom;
+
+ uint32_t view_idx;
+
+ uint32_t event_pixel_pds_data;
+ /* FIXME: HIGH: MULTIBUFFER_OCLQRY changes the structure's layout.
+ * Commenting out for now as it's not supported by 4.V.2.51.
+ */
+ /* uint32_t isp_oclqry_stride; */
+
+ /* All values below the ALIGN(8) must be 64 bit. */
+ uint64_t ALIGN(8) isp_scissor_base;
+ uint64_t isp_dbias_base;
+ uint64_t isp_oclqry_base;
+ uint64_t isp_zlsctl;
+ uint64_t isp_zload_store_base;
+ uint64_t isp_stencil_load_store_base;
+ /* FIXME: HIGH: RGX_FEATURE_ZLS_SUBTILE changes the structure's layout. */
+ uint64_t isp_zls_pixels;
+
+ /* FIXME: HIGH: RGX_HW_REQUIRES_FB_CDC_ZLS_SETUP changes the structure's
+ * layout.
+ */
+ uint64_t deprecated;
+
+ /* FIXME: HIGH: RGX_PBE_WORDS_REQUIRED_FOR_RENDERS changes the structure's
+ * layout.
+ */
+#define ROGUE_PBE_WORDS_REQUIRED_FOR_RENDERS 2U
+ uint64_t pbe_word[8U][ROGUE_PBE_WORDS_REQUIRED_FOR_RENDERS];
+ uint64_t tpu_border_colour_table;
+ uint64_t pds_bgnd[3U];
+ uint64_t pds_pr_bgnd[3U];
+};
+
+struct rogue_fwif_cmd_3d {
+ /**
+ * This struct is shared between Client and Firmware.
+ * Kernel is unable to perform read/write operations on the command struct,
+ * the SHARED region is our only exception from that rule.
+ * This region must be the first member so Kernel can easily access it.
+ * For more info, see rogue_fwif_cmd_ta_3d_shared definition.
+ */
+ struct rogue_fwif_cmd_ta_3d_shared ALIGN(8) cmd_shared;
+
+ struct rogue_fwif_3d_regs ALIGN(8) regs;
+ /** command control flags. */
+ uint32_t flags;
+ /** Stride IN BYTES for Z-Buffer in case of RTAs. */
+ uint32_t zls_stride;
+ /** Stride IN BYTES for S-Buffer in case of RTAs. */
+ uint32_t sls_stride;
+};
+
+static_assert(
+ offsetof(struct rogue_fwif_cmd_3d, cmd_shared) == 0U,
+ "rogue_fwif_cmd_ta_3d_shared must be the first member of rogue_fwif_cmd_3d");
+
+static_assert(
+ sizeof(struct rogue_fwif_cmd_3d) <= ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE,
+ "kernel expects command size be increased to match current 3D command size");
+
+struct rogue_fwif_transfer_regs {
+ /**
+ * All 32 bit values should be added in the top section. This then requires
+ * only a single ALIGN(8) to align all the 8 byte values in the second
+ * section.
+ */
+ uint32_t isp_bgobjvals;
+
+ uint32_t usc_pixel_output_ctrl;
+ uint32_t usc_clear_register0;
+ uint32_t usc_clear_register1;
+ uint32_t usc_clear_register2;
+ uint32_t usc_clear_register3;
+
+ uint32_t isp_mtile_size;
+ uint32_t isp_render_origin;
+ uint32_t isp_ctl;
+
+ uint32_t isp_aa;
+
+ uint32_t event_pixel_pds_info;
+
+ uint32_t event_pixel_pds_code;
+ uint32_t event_pixel_pds_data;
+
+ uint32_t isp_render;
+ uint32_t isp_rgn;
+ /* FIXME: HIGH: RGX_FEATURE_GPU_MULTICORE_SUPPORT changes the structure's
+ * layout. Commenting out for now as it's not supported by 4.V.2.51.
+ */
+ /* uint32_t frag_screen; */
+ /** All values below the RGXFW_ALIGN must be 64 bit. */
+ uint64_t ALIGN(8) pds_bgnd0_base;
+ uint64_t pds_bgnd1_base;
+ uint64_t pds_bgnd3_sizeinfo;
+
+ uint64_t isp_mtile_base;
+ /* FIXME: HIGH: RGX_PBE_WORDS_REQUIRED_FOR_TQS changes the structure's
+ * layout.
+ */
+#define ROGUE_PBE_WORDS_REQUIRED_FOR_TRANSFER 3
+ /* TQ_MAX_RENDER_TARGETS * PBE_STATE_SIZE */
+ uint64_t pbe_wordx_mrty[3 * ROGUE_PBE_WORDS_REQUIRED_FOR_TRANSFER];
+};
+
+struct rogue_fwif_cmd_transfer {
+ struct rogue_fwif_cmd_common ALIGN(8) cmn;
+ struct rogue_fwif_transfer_regs ALIGN(8) regs;
+
+ uint32_t flags;
+};
+
+static_assert(
+ offsetof(struct rogue_fwif_cmd_transfer, cmn) == 0U,
+ "rogue_fwif_cmd_common must be the first member of rogue_fwif_cmd_transfer");
+
+static_assert(
+ sizeof(struct rogue_fwif_cmd_transfer) <=
+ ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE,
+ "kernel expects command size be increased to match current TRANSFER command size");
+
+struct rogue_fwif_2d_regs {
+ uint64_t tla_cmd_stream;
+ uint64_t deprecated_0;
+ uint64_t deprecated_1;
+ uint64_t deprecated_2;
+ uint64_t deprecated_3;
+ /* FIXME: HIGH: FIX_HW_BRN_57193 changes the structure's layout. */
+ uint64_t brn57193_tla_cmd_stream;
+};
+
+struct rogue_fwif_cmd_2d {
+ struct rogue_fwif_cmd_common ALIGN(8) cmn;
+ struct rogue_fwif_2d_regs ALIGN(8) regs;
+
+ uint32_t flags;
+};
+
+static_assert(
+ offsetof(struct rogue_fwif_cmd_2d, cmn) == 0U,
+ "rogue_fwif_cmd_common must be the first member of rogue_fwif_cmd_2d");
+
+static_assert(
+ sizeof(struct rogue_fwif_cmd_2d) <= ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE,
+ "kernel expects command size be increased to match current 2D command size");
+
+/***********************************************
+ Host interface structures.
+ ***********************************************/
+
+/**
+ * Configuration registers which need to be loaded by the firmware before CDM
+ * can be started.
+ */
+struct rogue_fwif_cdm_regs {
+ uint64_t tpu_border_colour_table;
+
+ /* FIXME: HIGH: RGX_FEATURE_COMPUTE_MORTON_CAPABLE changes the structure's
+ * layout.
+ */
+ uint64_t cdm_item;
+ /* FIXME: HIGH: RGX_FEATURE_CLUSTER_GROUPING changes the structure's layout.
+ */
+ uint64_t compute_cluster;
+
+ /* FIXME: HIGH: RGX_FEATURE_TPU_DM_GLOBAL_REGISTERS changes the structure's
+ * layout. Commenting out for now as it's not supported by 4.V.2.51.
+ */
+ /* uint64_t tpu_tag_cdm_ctrl; */
+ uint64_t cdm_ctrl_stream_base;
+
+ uint32_t tpu;
+
+ uint32_t cdm_resume_pds1;
+};
+
+struct rogue_fwif_cmd_compute {
+ struct rogue_fwif_cmd_common ALIGN(8) cmn;
+ struct rogue_fwif_cdm_regs ALIGN(8) regs;
+ uint32_t ALIGN(8) flags;
+};
+
+static_assert(
+ offsetof(struct rogue_fwif_cmd_compute, cmn) == 0U,
+ "rogue_fwif_cmd_common must be the first member of rogue_fwif_cmd_compute");
+
+static_assert(
+ sizeof(struct rogue_fwif_cmd_compute) <=
+ ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE,
+ "kernel expects command size be increased to match current COMPUTE command size");
+
+/* TODO: Rename the RGX_* macros in the comments once they are imported. */
+/* Applied to RGX_CR_VDM_SYNC_PDS_DATA_BASE. */
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_PDS_HEAP_VDM_SYNC_OFFSET_BYTES 0U
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_PDS_HEAP_VDM_SYNC_MAX_SIZE_BYTES 128U
+
+/** Applied to RGX_CR_EVENT_PIXEL_PDS_CODE. */
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_PDS_HEAP_EOT_OFFSET_BYTES 128U
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_PDS_HEAP_EOT_MAX_SIZE_BYTES 128U
+
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_PDS_HEAP_TOTAL_BYTES 4096U
+
+/** Pointed to by PDS code at RGX_CR_VDM_SYNC_PDS_DATA_BASE. */
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_USC_HEAP_VDM_SYNC_OFFSET_BYTES 0U
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_USC_HEAP_VDM_SYNC_MAX_SIZE_BYTES 128U
+
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_USC_HEAP_TOTAL_BYTES 4096U
+
+/**
+ * Applied to RGX_CR_MCU_FENCE, and RGX_CR_PM_MTILE_ARRAY
+ * (defined(RGX_FEATURE_SIMPLE_INTERNAL_PARAMETER_FORMAT)).
+ */
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_GENERAL_HEAP_FENCE_OFFSET_BYTES 0U
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_GENERAL_HEAP_FENCE_MAX_SIZE_BYTES 128U
+
+/** Applied to RGX_CR_TPU_YUV_CSC_COEFFICIENTS. */
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_GENERAL_HEAP_YUV_CSC_OFFSET_BYTES 128U
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_GENERAL_HEAP_YUV_CSC_MAX_SIZE_BYTES 1024U
+
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_GENERAL_HEAP_TOTAL_BYTES 4096U
+
+#endif /* PVR_ROGUE_FWIF_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_ROGUE_FWIF_RF_H
+#define PVR_ROGUE_FWIF_RF_H
+
+#include <stdint.h>
+
+#include "pvr_rogue_fwif_shared.h"
+
+struct rogue_fwif_rf_regs {
+ uint64_t cdm_ctrl_stream_base;
+};
+
+/* Enables the reset framework in the firmware. */
+#define ROGUE_FWIF_RF_FLAG_ENABLE 0x00000001U
+
+struct rogue_fwif_rf_cmd {
+ uint32_t flags;
+
+ /* THIS MUST BE THE LAST MEMBER OF THE CONTAINING STRUCTURE */
+ struct rogue_fwif_rf_regs ALIGN(8) regs;
+};
+
+#endif /* PVR_ROGUE_FWIF_RF_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_ROGUE_FWIF_SHARED_H
+#define PVR_ROGUE_FWIF_SHARED_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#define ALIGN(x) __attribute__((aligned(x)))
+
+/**
+ * Maximum number of UFOs in a CCB command.
+ * The number is based on having 32 sync prims (as originally), plus 32 sync
+ * checkpoints.
+ * Once the use of sync prims is no longer supported, we will retain
+ * the same total (64) as the number of sync checkpoints which may be
+ * supporting a fence is not visible to the client driver and has to
+ * allow for the number of different timelines involved in fence merges.
+ */
+#define ROGUE_FWIF_CCB_CMD_MAX_UFOS (32U + 32U)
+
+/**
+ * This is a generic limit imposed on any DM (TA,3D,CDM,TDM,2D,TRANSFER)
+ * command passed through the bridge.
+ * Just across the bridge in the server, any incoming kick command size is
+ * checked against this maximum limit.
+ * In case the incoming command size is larger than the specified limit,
+ * the bridge call is retired with error.
+ */
+#define ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE (1024U)
+
+#define ROGUE_FWIF_PRBUFFER_START (0)
+#define ROGUE_FWIF_PRBUFFER_ZSBUFFER (0)
+#define ROGUE_FWIF_PRBUFFER_MSAABUFFER (1)
+#define ROGUE_FWIF_PRBUFFER_MAXSUPPORTED (2)
+
+struct rogue_fwif_dev_addr {
+ uint32_t addr;
+};
+
+struct rogue_fwif_dma_addr {
+ uint64_t ALIGN(8) dev_vaddr;
+ struct rogue_fwif_dev_addr fw_addr;
+} ALIGN(8);
+
+struct rogue_fwif_ufo {
+ struct rogue_fwif_dev_addr ufo_addr;
+ uint32_t value;
+};
+
+struct rogue_fwif_cleanup_ctl {
+ /** Number of commands received by the FW. */
+ uint32_t submitted_cmds;
+
+ /** Number of commands executed by the FW. */
+ uint32_t executed_cmds;
+} ALIGN(8);
+
+/**
+ * Used to share frame numbers across UM-KM-FW,
+ * frame number is set in UM,
+ * frame number is required in both KM for HTB and FW for FW trace.
+ *
+ * May be used to house Kick flags in the future.
+ */
+struct rogue_fwif_cmd_common {
+ /** Associated frame number. */
+ uint32_t frame_num;
+};
+
+/**
+ * TA and 3D commands require set of firmware addresses that are stored in the
+ * Kernel. Client has handle(s) to Kernel containers storing these addresses,
+ * instead of raw addresses. We have to patch/write these addresses in KM to
+ * prevent UM from controlling FW addresses directly.
+ * Structures for TA and 3D commands are shared between Client and Firmware
+ * (both single-BVNC). Kernel is implemented in a multi-BVNC manner, so it can't
+ * use TA|3D CMD type definitions directly. Therefore we have a SHARED block
+ * that is shared between UM-KM-FW across all BVNC configurations.
+ */
+struct rogue_fwif_cmd_ta_3d_shared {
+ /** Common command attributes. */
+ struct rogue_fwif_cmd_common cmn;
+
+ /**
+ * RTData associated with this command, this is used for context
+ * selection and for storing out HW-context, when TA is switched out for
+ * continuing later.
+ */
+ struct rogue_fwif_dev_addr hw_rt_data;
+
+ /** Supported PR Buffers like Z/S/MSAA Scratch. */
+ struct rogue_fwif_dev_addr pr_buffers[ROGUE_FWIF_PRBUFFER_MAXSUPPORTED];
+};
+
+/**
+ * Client Circular Command Buffer (CCCB) control structure.
+ * This is shared between the KM driver and the Firmware and holds byte offsets
+ * into the CCCB as well as the wrapping mask to aid wrap around. A given
+ * snapshot of this queue with Cmd 1 running on the GPU might be:
+ *
+ * Roff Doff Woff
+ * [..........|-1----------|=2===|=3===|=4===|~5~~~~|~6~~~~|~7~~~~|..........]
+ * < runnable commands >< !ready to run >
+ *
+ * Cmd 1 : Currently executing on the GPU data master.
+ * Cmd 2,3,4: Fence dependencies met, commands runnable.
+ * Cmd 5... : Fence dependency not met yet.
+ */
+struct rogue_fwif_cccb_ctl {
+ /** Host write offset into CCB. This must be aligned to 16 bytes. */
+ uint32_t write_offset;
+
+ /**
+ * Firmware read offset into CCB. Points to the command that is runnable
+ * on GPU, if R!=W.
+ */
+ uint32_t read_offset;
+
+ /**
+ * Firmware fence dependency offset. Points to commands not ready, i.e.
+ * fence dependencies are not met.
+ */
+ uint32_t dep_offset;
+
+ /** Offset wrapping mask, total capacity in bytes of the CCB-1. */
+ uint32_t wrap_mask;
+} ALIGN(8);
+
+#define ROGUE_FW_LOCAL_FREELIST 0U
+#define ROGUE_FW_GLOBAL_FREELIST 1U
+#define ROGUE_FW_FREELIST_TYPE_LAST ROGUE_FW_GLOBAL_FREELIST
+#define ROGUE_FW_MAX_FREELISTS (ROGUE_FW_FREELIST_TYPE_LAST + 1U)
+
+struct rogue_fwif_ta_regs_cswitch {
+ uint64_t vdm_context_state_base_addr;
+ uint64_t vdm_context_state_resume_addr;
+ uint64_t ta_context_state_base_addr;
+
+ struct {
+ uint64_t vdm_context_store_task0;
+ uint64_t vdm_context_store_task1;
+ uint64_t vdm_context_store_task2;
+
+ /* VDM resume state update controls. */
+ uint64_t vdm_context_resume_task0;
+ uint64_t vdm_context_resume_task1;
+ uint64_t vdm_context_resume_task2;
+
+ uint64_t vdm_context_store_task3;
+ uint64_t vdm_context_store_task4;
+
+ uint64_t vdm_context_resume_task3;
+ uint64_t vdm_context_resume_task4;
+ } ta_state[2];
+};
+
+#define ROGUE_FWIF_TAREGISTERS_CSWITCH_SIZE \
+ sizeof(struct rogue_fwif_taregisters_cswitch)
+
+struct rogue_fwif_cdm_regs_cswitch {
+ uint64_t cdm_context_state_base_addr;
+ uint64_t cdm_context_pds0;
+ uint64_t cdm_context_pds1;
+ uint64_t cdm_terminate_pds;
+ uint64_t cdm_terminate_pds1;
+
+ /* CDM resume controls. */
+ uint64_t cdm_resume_pds0;
+ uint64_t cdm_context_pds0_b;
+ uint64_t cdm_resume_pds0_b;
+};
+
+struct rogue_fwif_static_rendercontext_state {
+ /** Geom registers for ctx switch. */
+ struct rogue_fwif_ta_regs_cswitch ALIGN(8) ctx_switch_regs;
+};
+
+#define ROGUE_FWIF_STATIC_RENDERCONTEXT_SIZE \
+ sizeof(struct rogue_fwif_static_rendercontext_state)
+
+struct rogue_fwif_static_computecontext_state {
+ /** CDM registers for ctx switch. */
+ struct rogue_fwif_cdm_regs_cswitch ALIGN(8) ctx_switch_regs;
+};
+
+#define ROGUE_FWIF_STATIC_COMPUTECONTEXT_SIZE \
+ sizeof(struct rogue_fwif_static_computecontext_state)
+
+enum rogue_fwif_prbuffer_state {
+ ROGUE_FWIF_PRBUFFER_UNBACKED = 0,
+ ROGUE_FWIF_PRBUFFER_BACKED,
+ ROGUE_FWIF_PRBUFFER_BACKING_PENDING,
+ ROGUE_FWIF_PRBUFFER_UNBACKING_PENDING,
+};
+
+struct rogue_fwif_prbuffer {
+ /** Buffer ID. */
+ uint32_t buffer_id;
+ /** Needs On-demand Z/S/MSAA Buffer allocation. */
+ bool ALIGN(4) on_demand;
+ /** Z/S/MSAA -Buffer state. */
+ enum rogue_fwif_prbuffer_state state;
+ /** Cleanup state. */
+ struct rogue_fwif_cleanup_ctl cleanup_state;
+ /** Compatibility and other flags. */
+ uint32_t pr_buffer_flags;
+} ALIGN(8);
+
+/* Last reset reason for a context. */
+enum rogue_context_reset_reason {
+ /** No reset reason recorded. */
+ ROGUE_CONTEXT_RESET_REASON_NONE = 0,
+ /** Caused a reset due to locking up. */
+ ROGUE_CONTEXT_RESET_REASON_GUILTY_LOCKUP = 1,
+ /** Affected by another context locking up. */
+ ROGUE_CONTEXT_RESET_REASON_INNOCENT_LOCKUP = 2,
+ /** Overran the global deadline. */
+ ROGUE_CONTEXT_RESET_REASON_GUILTY_OVERRUNING = 3,
+ /** Affected by another context overrunning. */
+ ROGUE_CONTEXT_RESET_REASON_INNOCENT_OVERRUNING = 4,
+ /** Forced reset to ensure scheduling requirements. */
+ ROGUE_CONTEXT_RESET_REASON_HARD_CONTEXT_SWITCH = 5,
+};
+
+struct rogue_context_reset_reason_data {
+ enum rogue_context_reset_reason reset_reason;
+ uint32_t reset_ext_job_ref;
+};
+
+#endif /* PVR_ROGUE_FWIF_SHARED_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <xf86drm.h>
+
+#include "pvr_csb.h"
+#include "pvr_device_info.h"
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_srv_bo.h"
+#include "pvr_srv_bridge.h"
+#include "pvr_srv_job_compute.h"
+#include "pvr_srv_job_render.h"
+#include "pvr_srv_public.h"
+#include "pvr_srv_syncobj.h"
+#include "pvr_winsys.h"
+#include "pvr_winsys_helper.h"
+#include "util/log.h"
+#include "util/os_misc.h"
+#include "vk_log.h"
+
+/* Amount of space used to hold sync prim values (in bytes). */
+#define PVR_SRV_SYNC_PRIM_VALUE_SIZE 4U
+
+static VkResult pvr_srv_heap_init(
+ struct pvr_srv_winsys *srv_ws,
+ struct pvr_srv_winsys_heap *srv_heap,
+ uint32_t heap_idx,
+ const struct pvr_winsys_static_data_offsets *const static_data_offsets)
+{
+ pvr_dev_addr_t base_address;
+ uint32_t log2_page_size;
+ uint64_t reserved_size;
+ VkResult result;
+ uint64_t size;
+
+ result = pvr_srv_get_heap_details(srv_ws->render_fd,
+ heap_idx,
+ 0,
+ NULL,
+ &base_address,
+ &size,
+ &reserved_size,
+ &log2_page_size);
+ if (result != VK_SUCCESS)
+ return result;
+
+ result = pvr_winsys_helper_winsys_heap_init(&srv_ws->base,
+ base_address,
+ size,
+ base_address,
+ reserved_size,
+ log2_page_size,
+ static_data_offsets,
+ &srv_heap->base);
+ if (result != VK_SUCCESS)
+ return result;
+
+ assert(srv_heap->base.page_size == srv_ws->base.page_size);
+ assert(srv_heap->base.log2_page_size == srv_ws->base.log2_page_size);
+ assert(srv_heap->base.reserved_size % PVR_SRV_RESERVED_SIZE_GRANULARITY ==
+ 0);
+
+ /* Create server-side counterpart of Device Memory heap */
+ result = pvr_srv_int_heap_create(srv_ws->render_fd,
+ srv_heap->base.base_addr,
+ srv_heap->base.size,
+ srv_heap->base.log2_page_size,
+ srv_ws->server_memctx,
+ &srv_heap->server_heap);
+ if (result != VK_SUCCESS) {
+ pvr_winsys_helper_winsys_heap_finish(&srv_heap->base);
+ return result;
+ }
+
+ return VK_SUCCESS;
+}
+
+static bool pvr_srv_heap_finish(struct pvr_srv_winsys *srv_ws,
+ struct pvr_srv_winsys_heap *srv_heap)
+{
+ if (!pvr_winsys_helper_winsys_heap_finish(&srv_heap->base))
+ return false;
+
+ pvr_srv_int_heap_destroy(srv_ws->render_fd, srv_heap->server_heap);
+
+ return true;
+}
+
+static VkResult pvr_srv_memctx_init(struct pvr_srv_winsys *srv_ws)
+{
+ const struct pvr_winsys_static_data_offsets
+ general_heap_static_data_offsets = {
+ .yuv_csc = FWIF_GENERAL_HEAP_YUV_CSC_OFFSET_BYTES,
+ };
+ const struct pvr_winsys_static_data_offsets pds_heap_static_data_offsets = {
+ .eot = FWIF_PDS_HEAP_EOT_OFFSET_BYTES,
+ .vdm_sync = FWIF_PDS_HEAP_VDM_SYNC_OFFSET_BYTES,
+ };
+ const struct pvr_winsys_static_data_offsets usc_heap_static_data_offsets = {
+ .vdm_sync = FWIF_USC_HEAP_VDM_SYNC_OFFSET_BYTES,
+ };
+ const struct pvr_winsys_static_data_offsets
+ rgn_hdr_heap_static_data_offsets = { 0 };
+
+ char heap_name[PVR_SRV_DEVMEM_HEAPNAME_MAXLENGTH];
+ int general_heap_idx = -1;
+ int rgn_hdr_heap_idx = -1;
+ int pds_heap_idx = -1;
+ int usc_heap_idx = -1;
+ uint32_t heap_count;
+ VkResult result;
+
+ result = pvr_srv_int_ctx_create(srv_ws->render_fd,
+ &srv_ws->server_memctx,
+ &srv_ws->server_memctx_data);
+ if (result != VK_SUCCESS)
+ return result;
+
+ os_get_page_size(&srv_ws->base.page_size);
+ srv_ws->base.log2_page_size = util_logbase2(srv_ws->base.page_size);
+
+ result = pvr_srv_get_heap_count(srv_ws->render_fd, &heap_count);
+ if (result != VK_SUCCESS)
+ goto err_pvr_srv_int_ctx_destroy;
+
+ assert(heap_count > 0);
+
+ for (uint32_t i = 0; i < heap_count; i++) {
+ result = pvr_srv_get_heap_details(srv_ws->render_fd,
+ i,
+ sizeof(heap_name),
+ heap_name,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+ if (result != VK_SUCCESS)
+ goto err_pvr_srv_int_ctx_destroy;
+
+ if (general_heap_idx == -1 &&
+ strncmp(heap_name,
+ PVR_SRV_GENERAL_HEAP_IDENT,
+ sizeof(PVR_SRV_GENERAL_HEAP_IDENT)) == 0) {
+ general_heap_idx = i;
+ } else if (pds_heap_idx == -1 &&
+ strncmp(heap_name,
+ PVR_SRV_PDSCODEDATA_HEAP_IDENT,
+ sizeof(PVR_SRV_PDSCODEDATA_HEAP_IDENT)) == 0) {
+ pds_heap_idx = i;
+ } else if (rgn_hdr_heap_idx == -1 &&
+ strncmp(heap_name,
+ PVR_SRV_RGNHDR_BRN_63142_HEAP_IDENT,
+ sizeof(PVR_SRV_RGNHDR_BRN_63142_HEAP_IDENT)) == 0) {
+ rgn_hdr_heap_idx = i;
+ } else if (usc_heap_idx == -1 &&
+ strncmp(heap_name,
+ PVR_SRV_USCCODE_HEAP_IDENT,
+ sizeof(PVR_SRV_USCCODE_HEAP_IDENT)) == 0) {
+ usc_heap_idx = i;
+ }
+ }
+
+ /* Check for and initialize required heaps. */
+ if (general_heap_idx == -1 || pds_heap_idx == -1 || usc_heap_idx == -1) {
+ result = vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED);
+ goto err_pvr_srv_int_ctx_destroy;
+ }
+
+ result = pvr_srv_heap_init(srv_ws,
+ &srv_ws->general_heap,
+ general_heap_idx,
+ &general_heap_static_data_offsets);
+ if (result != VK_SUCCESS)
+ goto err_pvr_srv_int_ctx_destroy;
+
+ result = pvr_srv_heap_init(srv_ws,
+ &srv_ws->pds_heap,
+ pds_heap_idx,
+ &pds_heap_static_data_offsets);
+ if (result != VK_SUCCESS)
+ goto err_pvr_srv_heap_finish_general;
+
+ result = pvr_srv_heap_init(srv_ws,
+ &srv_ws->usc_heap,
+ usc_heap_idx,
+ &usc_heap_static_data_offsets);
+ if (result != VK_SUCCESS)
+ goto err_pvr_srv_heap_finish_pds;
+
+ /* Check for and set up optional heaps. */
+ if (rgn_hdr_heap_idx != -1) {
+ result = pvr_srv_heap_init(srv_ws,
+ &srv_ws->rgn_hdr_heap,
+ rgn_hdr_heap_idx,
+ &rgn_hdr_heap_static_data_offsets);
+ if (result != VK_SUCCESS)
+ goto err_pvr_srv_heap_finish_usc;
+ srv_ws->rgn_hdr_heap_present = true;
+ } else {
+ srv_ws->rgn_hdr_heap_present = false;
+ }
+
+ result =
+ pvr_winsys_helper_allocate_static_memory(&srv_ws->base,
+ pvr_srv_heap_alloc_reserved,
+ &srv_ws->general_heap.base,
+ &srv_ws->pds_heap.base,
+ &srv_ws->usc_heap.base,
+ &srv_ws->general_vma,
+ &srv_ws->pds_vma,
+ &srv_ws->usc_vma);
+ if (result != VK_SUCCESS)
+ goto err_pvr_srv_heap_finish_rgn_hdr;
+
+ result = pvr_winsys_helper_fill_static_memory(&srv_ws->base,
+ srv_ws->general_vma,
+ srv_ws->pds_vma,
+ srv_ws->usc_vma);
+ if (result != VK_SUCCESS)
+ goto err_pvr_srv_free_static_memory;
+
+ return VK_SUCCESS;
+
+err_pvr_srv_free_static_memory:
+ pvr_winsys_helper_free_static_memory(srv_ws->general_vma,
+ srv_ws->pds_vma,
+ srv_ws->usc_vma);
+
+err_pvr_srv_heap_finish_rgn_hdr:
+ if (srv_ws->rgn_hdr_heap_present)
+ pvr_srv_heap_finish(srv_ws, &srv_ws->rgn_hdr_heap);
+
+err_pvr_srv_heap_finish_usc:
+ pvr_srv_heap_finish(srv_ws, &srv_ws->usc_heap);
+
+err_pvr_srv_heap_finish_pds:
+ pvr_srv_heap_finish(srv_ws, &srv_ws->pds_heap);
+
+err_pvr_srv_heap_finish_general:
+ pvr_srv_heap_finish(srv_ws, &srv_ws->general_heap);
+
+err_pvr_srv_int_ctx_destroy:
+ pvr_srv_int_ctx_destroy(srv_ws->render_fd, srv_ws->server_memctx);
+
+ return result;
+}
+
+static void pvr_srv_memctx_finish(struct pvr_srv_winsys *srv_ws)
+{
+ pvr_winsys_helper_free_static_memory(srv_ws->general_vma,
+ srv_ws->pds_vma,
+ srv_ws->usc_vma);
+
+ if (srv_ws->rgn_hdr_heap_present) {
+ if (!pvr_srv_heap_finish(srv_ws, &srv_ws->rgn_hdr_heap)) {
+ vk_errorf(NULL,
+ VK_ERROR_UNKNOWN,
+ "Region header heap in use, can not deinit");
+ }
+ }
+
+ if (!pvr_srv_heap_finish(srv_ws, &srv_ws->usc_heap))
+ vk_errorf(NULL, VK_ERROR_UNKNOWN, "USC heap in use, can not deinit");
+
+ if (!pvr_srv_heap_finish(srv_ws, &srv_ws->pds_heap))
+ vk_errorf(NULL, VK_ERROR_UNKNOWN, "PDS heap in use, can not deinit");
+
+ if (!pvr_srv_heap_finish(srv_ws, &srv_ws->general_heap)) {
+ vk_errorf(NULL, VK_ERROR_UNKNOWN, "General heap in use, can not deinit");
+ }
+
+ pvr_srv_int_ctx_destroy(srv_ws->render_fd, srv_ws->server_memctx);
+}
+
+static VkResult pvr_srv_sync_prim_block_init(struct pvr_srv_winsys *srv_ws)
+{
+ /* We don't currently make use of this value, but we're required to provide
+ * a valid pointer to pvr_srv_alloc_sync_primitive_block.
+ */
+ void *sync_block_pmr;
+
+ return pvr_srv_alloc_sync_primitive_block(srv_ws->render_fd,
+ &srv_ws->sync_block_handle,
+ &sync_block_pmr,
+ &srv_ws->sync_block_size,
+ &srv_ws->sync_block_fw_addr);
+}
+
+static void pvr_srv_sync_prim_block_finish(struct pvr_srv_winsys *srv_ws)
+{
+ pvr_srv_free_sync_primitive_block(srv_ws->render_fd,
+ srv_ws->sync_block_handle);
+ srv_ws->sync_block_handle = NULL;
+}
+
+static void pvr_srv_winsys_destroy(struct pvr_winsys *ws)
+{
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+ int fd = srv_ws->render_fd;
+
+ pvr_srv_sync_prim_block_finish(srv_ws);
+ pvr_srv_memctx_finish(srv_ws);
+ vk_free(srv_ws->alloc, srv_ws);
+ pvr_srv_connection_destroy(fd);
+}
+
+static int pvr_srv_winsys_device_info_init(struct pvr_winsys *ws,
+ struct pvr_device_info *dev_info)
+{
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+ int ret;
+
+ ret = pvr_device_info_init(dev_info, srv_ws->bvnc);
+ if (ret) {
+ mesa_logw("Unsupported BVNC: %u.%u.%u.%u\n",
+ PVR_BVNC_UNPACK_B(srv_ws->bvnc),
+ PVR_BVNC_UNPACK_V(srv_ws->bvnc),
+ PVR_BVNC_UNPACK_N(srv_ws->bvnc),
+ PVR_BVNC_UNPACK_C(srv_ws->bvnc));
+ return ret;
+ }
+
+ return 0;
+}
+
+static void pvr_srv_winsys_get_heaps_info(struct pvr_winsys *ws,
+ struct pvr_winsys_heaps *heaps)
+{
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+
+ heaps->general_heap = &srv_ws->general_heap.base;
+ heaps->pds_heap = &srv_ws->pds_heap.base;
+ heaps->usc_heap = &srv_ws->usc_heap.base;
+
+ if (srv_ws->rgn_hdr_heap_present)
+ heaps->rgn_hdr_heap = &srv_ws->rgn_hdr_heap.base;
+ else
+ heaps->rgn_hdr_heap = &srv_ws->general_heap.base;
+}
+
+static const struct pvr_winsys_ops srv_winsys_ops = {
+ .destroy = pvr_srv_winsys_destroy,
+ .device_info_init = pvr_srv_winsys_device_info_init,
+ .get_heaps_info = pvr_srv_winsys_get_heaps_info,
+ .buffer_create = pvr_srv_winsys_buffer_create,
+ .buffer_create_from_fd = pvr_srv_winsys_buffer_create_from_fd,
+ .buffer_destroy = pvr_srv_winsys_buffer_destroy,
+ .buffer_get_fd = pvr_srv_winsys_buffer_get_fd,
+ .buffer_map = pvr_srv_winsys_buffer_map,
+ .buffer_unmap = pvr_srv_winsys_buffer_unmap,
+ .heap_alloc = pvr_srv_winsys_heap_alloc,
+ .heap_free = pvr_srv_winsys_heap_free,
+ .vma_map = pvr_srv_winsys_vma_map,
+ .vma_unmap = pvr_srv_winsys_vma_unmap,
+ .syncobj_create = pvr_srv_winsys_syncobj_create,
+ .syncobj_destroy = pvr_srv_winsys_syncobj_destroy,
+ .syncobjs_reset = pvr_srv_winsys_syncobjs_reset,
+ .syncobjs_signal = pvr_srv_winsys_syncobjs_signal,
+ .syncobjs_wait = pvr_srv_winsys_syncobjs_wait,
+ .syncobjs_merge = pvr_srv_winsys_syncobjs_merge,
+ .free_list_create = pvr_srv_winsys_free_list_create,
+ .free_list_destroy = pvr_srv_winsys_free_list_destroy,
+ .render_target_dataset_create = pvr_srv_render_target_dataset_create,
+ .render_target_dataset_destroy = pvr_srv_render_target_dataset_destroy,
+ .render_ctx_create = pvr_srv_winsys_render_ctx_create,
+ .render_ctx_destroy = pvr_srv_winsys_render_ctx_destroy,
+ .render_submit = pvr_srv_winsys_render_submit,
+ .compute_ctx_create = pvr_srv_winsys_compute_ctx_create,
+ .compute_ctx_destroy = pvr_srv_winsys_compute_ctx_destroy,
+ .compute_submit = pvr_srv_winsys_compute_submit,
+};
+
+static bool pvr_is_driver_compatible(int render_fd)
+{
+ drmVersionPtr version;
+
+ version = drmGetVersion(render_fd);
+ if (!version)
+ return false;
+
+ assert(strcmp(version->name, "pvr") == 0);
+
+ /* Only the 1.14 driver is supported for now. */
+ if (version->version_major != PVR_SRV_VERSION_MAJ ||
+ version->version_minor != PVR_SRV_VERSION_MIN) {
+ vk_errorf(NULL,
+ VK_ERROR_INCOMPATIBLE_DRIVER,
+ "Unsupported downstream driver version (%u.%u)",
+ version->version_major,
+ version->version_minor);
+ drmFreeVersion(version);
+
+ return false;
+ }
+
+ drmFreeVersion(version);
+
+ return true;
+}
+
+struct pvr_winsys *pvr_srv_winsys_create(int master_fd,
+ int render_fd,
+ const VkAllocationCallbacks *alloc)
+{
+ struct pvr_srv_winsys *srv_ws;
+ VkResult result;
+ uint64_t bvnc;
+
+ if (!pvr_is_driver_compatible(render_fd))
+ return NULL;
+
+ result = pvr_srv_connection_create(render_fd, &bvnc);
+ if (result != VK_SUCCESS)
+ return NULL;
+
+ srv_ws =
+ vk_zalloc(alloc, sizeof(*srv_ws), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!srv_ws) {
+ vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto err_pvr_srv_connection_destroy;
+ }
+
+ srv_ws->base.ops = &srv_winsys_ops;
+ srv_ws->bvnc = bvnc;
+ srv_ws->master_fd = master_fd;
+ srv_ws->render_fd = render_fd;
+ srv_ws->alloc = alloc;
+
+ result = pvr_srv_memctx_init(srv_ws);
+ if (result != VK_SUCCESS)
+ goto err_vk_free_srv_ws;
+
+ result = pvr_srv_sync_prim_block_init(srv_ws);
+ if (result != VK_SUCCESS)
+ goto err_pvr_srv_memctx_finish;
+
+ return &srv_ws->base;
+
+err_pvr_srv_memctx_finish:
+ pvr_srv_memctx_finish(srv_ws);
+
+err_vk_free_srv_ws:
+ vk_free(alloc, srv_ws);
+
+err_pvr_srv_connection_destroy:
+ pvr_srv_connection_destroy(render_fd);
+
+ return NULL;
+}
+
+struct pvr_srv_sync_prim *pvr_srv_sync_prim_alloc(struct pvr_srv_winsys *srv_ws)
+{
+ struct pvr_srv_sync_prim *sync_prim;
+
+ if (p_atomic_read(&srv_ws->sync_block_offset) == srv_ws->sync_block_size) {
+ vk_error(NULL, VK_ERROR_UNKNOWN);
+ return NULL;
+ }
+
+ sync_prim = vk_alloc(srv_ws->alloc,
+ sizeof(*sync_prim),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!sync_prim) {
+ vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return NULL;
+ }
+
+ /* p_atomic_add_return() returns the new value rather than the old one, so
+ * we have to subtract PVR_SRV_SYNC_PRIM_VALUE_SIZE to get the old value.
+ */
+ sync_prim->offset = p_atomic_add_return(&srv_ws->sync_block_offset,
+ PVR_SRV_SYNC_PRIM_VALUE_SIZE);
+ sync_prim->offset -= PVR_SRV_SYNC_PRIM_VALUE_SIZE;
+ if (sync_prim->offset == srv_ws->sync_block_size) {
+ /* FIXME: need to free offset back to srv_ws->sync_block_offset. */
+ vk_free(srv_ws->alloc, sync_prim);
+
+ vk_error(NULL, VK_ERROR_UNKNOWN);
+
+ return NULL;
+ }
+
+ sync_prim->srv_ws = srv_ws;
+
+ return sync_prim;
+}
+
+/* FIXME: Add support for freeing offsets back to the sync block. */
+void pvr_srv_sync_prim_free(struct pvr_srv_sync_prim *sync_prim)
+{
+ if (sync_prim) {
+ struct pvr_srv_winsys *srv_ws = sync_prim->srv_ws;
+
+ vk_free(srv_ws->alloc, sync_prim);
+ }
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_H
+#define PVR_SRV_H
+
+#include <stdint.h>
+#include <pthread.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_winsys.h"
+#include "util/macros.h"
+#include "util/vma.h"
+
+/*******************************************
+ Misc defines
+ *******************************************/
+
+/* 64KB is MAX anticipated OS page size */
+#define PVR_SRV_RESERVED_SIZE_GRANULARITY 0x10000
+
+#define PVR_SRV_DEVMEM_HEAPNAME_MAXLENGTH 160
+
+#define PVR_SRV_GENERAL_HEAP_IDENT "General"
+#define PVR_SRV_RGNHDR_BRN_63142_HEAP_IDENT "RgnHdr BRN63142"
+#define PVR_SRV_PDSCODEDATA_HEAP_IDENT "PDS Code and Data"
+#define PVR_SRV_USCCODE_HEAP_IDENT "USC Code"
+
+#define FWIF_PDS_HEAP_TOTAL_BYTES 4096
+#define FWIF_PDS_HEAP_VDM_SYNC_OFFSET_BYTES 0
+#define FWIF_PDS_HEAP_EOT_OFFSET_BYTES 128
+#define FWIF_GENERAL_HEAP_TOTAL_BYTES 4096
+#define FWIF_USC_HEAP_TOTAL_BYTES 4096
+#define FWIF_USC_HEAP_VDM_SYNC_OFFSET_BYTES 0
+#define FWIF_GENERAL_HEAP_YUV_CSC_OFFSET_BYTES 128U
+
+/*******************************************
+ structure definitions
+ *******************************************/
+struct pvr_srv_winsys_heap {
+ struct pvr_winsys_heap base;
+
+ void *server_heap;
+};
+
+struct pvr_srv_winsys {
+ struct pvr_winsys base;
+
+ int master_fd;
+ int render_fd;
+
+ const VkAllocationCallbacks *alloc;
+
+ /* Packed bvnc */
+ uint64_t bvnc;
+
+ void *server_memctx;
+ void *server_memctx_data;
+
+ /* Required heaps */
+ struct pvr_srv_winsys_heap general_heap;
+ struct pvr_srv_winsys_heap pds_heap;
+ struct pvr_srv_winsys_heap usc_heap;
+
+ /* Optional heaps */
+ bool rgn_hdr_heap_present;
+ struct pvr_srv_winsys_heap rgn_hdr_heap;
+
+ /* vma's for reserved memory regions */
+ struct pvr_winsys_vma *pds_vma;
+ struct pvr_winsys_vma *usc_vma;
+ struct pvr_winsys_vma *general_vma;
+
+ /* Sync block used for allocating sync primitives. */
+ void *sync_block_handle;
+ uint32_t sync_block_size;
+ uint32_t sync_block_fw_addr;
+ uint16_t sync_block_offset;
+};
+
+struct pvr_srv_sync_prim {
+ struct pvr_srv_winsys *srv_ws;
+ uint32_t offset;
+ uint32_t value;
+};
+
+/*******************************************
+ helper macros
+ *******************************************/
+
+#define to_pvr_srv_winsys(ws) container_of((ws), struct pvr_srv_winsys, base)
+#define to_pvr_srv_winsys_heap(heap) \
+ container_of((heap), struct pvr_srv_winsys_heap, base)
+
+/*******************************************
+ functions
+ *******************************************/
+
+struct pvr_srv_sync_prim *
+pvr_srv_sync_prim_alloc(struct pvr_srv_winsys *srv_ws);
+void pvr_srv_sync_prim_free(struct pvr_srv_sync_prim *sync_prim);
+
+static inline uint32_t
+pvr_srv_sync_prim_get_fw_addr(const struct pvr_srv_sync_prim *const sync_prim)
+{
+ return sync_prim->srv_ws->sync_block_fw_addr + sync_prim->offset;
+}
+
+#endif /* PVR_SRV_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <xf86drm.h>
+
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_srv_bo.h"
+#include "pvr_srv_bridge.h"
+#include "pvr_winsys_helper.h"
+#include "util/u_atomic.h"
+#include "util/bitscan.h"
+#include "util/macros.h"
+#include "util/u_math.h"
+#include "vk_log.h"
+
+/* Note: This function does not have an associated pvr_srv_free_display_pmr
+ * function, use pvr_srv_free_pmr instead.
+ */
+static VkResult pvr_srv_alloc_display_pmr(struct pvr_srv_winsys *srv_ws,
+ uint64_t size,
+ uint64_t srv_flags,
+ void **const pmr_out,
+ uint32_t *const handle_out)
+{
+ uint64_t aligment_out;
+ uint64_t size_out;
+ VkResult result;
+ uint32_t handle;
+ int ret;
+ int fd;
+
+ ret =
+ pvr_winsys_helper_display_buffer_create(srv_ws->master_fd, size, &handle);
+ if (ret)
+ return vk_error(NULL, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+ ret = drmPrimeHandleToFD(srv_ws->master_fd, handle, O_CLOEXEC, &fd);
+ if (ret) {
+ result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto err_display_buffer_destroy;
+ }
+
+ result = pvr_srv_physmem_import_dmabuf(srv_ws->render_fd,
+ fd,
+ srv_flags,
+ pmr_out,
+ &size_out,
+ &aligment_out);
+
+ assert(size_out >= size);
+ assert(aligment_out == srv_ws->base.page_size);
+
+ /* close fd, not needed anymore */
+ close(fd);
+
+ if (result != VK_SUCCESS)
+ goto err_display_buffer_destroy;
+
+ *handle_out = handle;
+
+ return VK_SUCCESS;
+
+err_display_buffer_destroy:
+ pvr_winsys_helper_display_buffer_destroy(srv_ws->master_fd, handle);
+
+ return result;
+}
+
+static void buffer_acquire(struct pvr_srv_winsys_bo *srv_bo)
+{
+ p_atomic_inc(&srv_bo->ref_count);
+}
+
+static void buffer_release(struct pvr_srv_winsys_bo *srv_bo)
+{
+ struct pvr_srv_winsys *srv_ws;
+
+ /* If all references were dropped the pmr can be freed and unlocked */
+ if (p_atomic_dec_return(&srv_bo->ref_count) == 0) {
+ srv_ws = to_pvr_srv_winsys(srv_bo->base.ws);
+ pvr_srv_free_pmr(srv_ws->render_fd, srv_bo->pmr);
+
+ if (srv_bo->is_display_buffer) {
+ pvr_winsys_helper_display_buffer_destroy(srv_ws->master_fd,
+ srv_bo->handle);
+ }
+
+ vk_free(srv_ws->alloc, srv_bo);
+ }
+}
+
+static uint64_t pvr_srv_get_alloc_flags(uint32_t ws_flags)
+{
+ /* TODO: For now we assume that buffers should always be accessible to the
+ * kernel and that the PVR_WINSYS_BO_FLAG_CPU_ACCESS flag only applies to
+ * userspace mappings. Check to see if there's any situations where we
+ * wouldn't want this to be the case.
+ */
+ uint64_t srv_flags = PVR_SRV_MEMALLOCFLAG_GPU_READABLE |
+ PVR_SRV_MEMALLOCFLAG_GPU_WRITEABLE |
+ PVR_SRV_MEMALLOCFLAG_KERNEL_CPU_MAPPABLE |
+ PVR_SRV_MEMALLOCFLAG_CPU_UNCACHED_WC;
+
+ if (ws_flags & PVR_WINSYS_BO_FLAG_CPU_ACCESS) {
+ srv_flags |= PVR_SRV_MEMALLOCFLAG_CPU_READABLE |
+ PVR_SRV_MEMALLOCFLAG_CPU_WRITEABLE;
+ }
+
+ if (ws_flags & PVR_WINSYS_BO_FLAG_GPU_UNCACHED)
+ srv_flags |= PVR_SRV_MEMALLOCFLAG_GPU_UNCACHED;
+ else
+ srv_flags |= PVR_SRV_MEMALLOCFLAG_GPU_CACHE_INCOHERENT;
+
+ if (ws_flags & PVR_WINSYS_BO_FLAG_PM_FW_PROTECT)
+ srv_flags |= PVR_SRV_MEMALLOCFLAG_DEVICE_FLAG(PM_FW_PROTECT);
+
+ if (ws_flags & PVR_WINSYS_BO_FLAG_ZERO_ON_ALLOC)
+ srv_flags |= PVR_SRV_MEMALLOCFLAG_ZERO_ON_ALLOC;
+
+ return srv_flags;
+}
+
+VkResult pvr_srv_winsys_buffer_create(struct pvr_winsys *ws,
+ uint64_t size,
+ uint64_t alignment,
+ enum pvr_winsys_bo_type type,
+ uint32_t ws_flags,
+ struct pvr_winsys_bo **const bo_out)
+{
+ const uint64_t srv_flags = pvr_srv_get_alloc_flags(ws_flags);
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+ struct pvr_srv_winsys_bo *srv_bo;
+ VkResult result;
+
+ assert(util_is_power_of_two_nonzero(alignment));
+
+ /* Kernel will page align the size, we do the same here so we have access to
+ * all the allocated memory.
+ */
+ alignment = MAX2(alignment, ws->page_size);
+ size = ALIGN_POT(size, alignment);
+
+ srv_bo = vk_zalloc(srv_ws->alloc,
+ sizeof(*srv_bo),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!srv_bo)
+ return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ srv_bo->is_display_buffer = (type == PVR_WINSYS_BO_TYPE_DISPLAY);
+ if (srv_bo->is_display_buffer) {
+ result = pvr_srv_alloc_display_pmr(srv_ws,
+ size,
+ srv_flags &
+ PVR_SRV_MEMALLOCFLAGS_PMRFLAGSMASK,
+ &srv_bo->pmr,
+ &srv_bo->handle);
+
+ srv_bo->base.is_imported = true;
+ } else {
+ result =
+ pvr_srv_alloc_pmr(srv_ws->render_fd,
+ size,
+ size,
+ 1,
+ 1,
+ srv_ws->base.log2_page_size,
+ (srv_flags & PVR_SRV_MEMALLOCFLAGS_PMRFLAGSMASK),
+ getpid(),
+ &srv_bo->pmr);
+ }
+
+ if (result != VK_SUCCESS)
+ goto err_vk_free_srv_bo;
+
+ srv_bo->base.size = size;
+ srv_bo->base.ws = ws;
+ srv_bo->flags = srv_flags;
+
+ p_atomic_set(&srv_bo->ref_count, 1);
+
+ *bo_out = &srv_bo->base;
+
+ return VK_SUCCESS;
+
+err_vk_free_srv_bo:
+ vk_free(srv_ws->alloc, srv_bo);
+
+ return result;
+}
+
+VkResult
+pvr_srv_winsys_buffer_create_from_fd(struct pvr_winsys *ws,
+ int fd,
+ struct pvr_winsys_bo **const bo_out)
+{
+ /* FIXME: PVR_SRV_MEMALLOCFLAG_CPU_UNCACHED_WC should be changed to
+ * PVR_SRV_MEMALLOCFLAG_CPU_CACHE_INCOHERENT, as dma-buf is always mapped
+ * as cacheable by the exporter. Flags are not passed to the exporter and it
+ * doesn't really change the behavior, but these can be used for internal
+ * checking so it should reflect the correct cachability of the buffer.
+ * Ref: pvr_GetMemoryFdPropertiesKHR
+ * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops
+ */
+ static const uint64_t srv_flags =
+ PVR_SRV_MEMALLOCFLAG_CPU_READABLE | PVR_SRV_MEMALLOCFLAG_CPU_WRITEABLE |
+ PVR_SRV_MEMALLOCFLAG_CPU_UNCACHED_WC | PVR_SRV_MEMALLOCFLAG_GPU_READABLE |
+ PVR_SRV_MEMALLOCFLAG_GPU_WRITEABLE |
+ PVR_SRV_MEMALLOCFLAG_GPU_CACHE_INCOHERENT;
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+ struct pvr_srv_winsys_bo *srv_bo;
+ uint64_t aligment_out;
+ uint64_t size_out;
+ VkResult result;
+
+ srv_bo = vk_zalloc(srv_ws->alloc,
+ sizeof(*srv_bo),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!srv_bo)
+ return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ result = pvr_srv_physmem_import_dmabuf(srv_ws->render_fd,
+ fd,
+ srv_flags,
+ &srv_bo->pmr,
+ &size_out,
+ &aligment_out);
+ if (result != VK_SUCCESS)
+ goto err_vk_free_srv_bo;
+
+ assert(aligment_out == srv_ws->base.page_size);
+
+ srv_bo->base.ws = ws;
+ srv_bo->base.size = size_out;
+ srv_bo->base.is_imported = true;
+ srv_bo->flags = srv_flags;
+
+ p_atomic_set(&srv_bo->ref_count, 1);
+
+ *bo_out = &srv_bo->base;
+
+ return VK_SUCCESS;
+
+err_vk_free_srv_bo:
+ vk_free(srv_ws->alloc, srv_bo);
+
+ return result;
+}
+
+void pvr_srv_winsys_buffer_destroy(struct pvr_winsys_bo *bo)
+{
+ struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(bo);
+
+ buffer_release(srv_bo);
+}
+
+VkResult pvr_srv_winsys_buffer_get_fd(struct pvr_winsys_bo *bo,
+ int *const fd_out)
+{
+ struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(bo);
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(bo->ws);
+ int ret;
+
+ if (!srv_bo->is_display_buffer)
+ return pvr_srv_physmem_export_dmabuf(srv_ws->render_fd,
+ srv_bo->pmr,
+ fd_out);
+
+ /* For display buffers, export using saved buffer handle */
+ ret =
+ drmPrimeHandleToFD(srv_ws->master_fd, srv_bo->handle, O_CLOEXEC, fd_out);
+ if (ret)
+ return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ return VK_SUCCESS;
+}
+
+void *pvr_srv_winsys_buffer_map(struct pvr_winsys_bo *bo)
+{
+ struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(bo);
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(bo->ws);
+ const int prot =
+ (srv_bo->flags & PVR_SRV_MEMALLOCFLAG_CPU_WRITEABLE ? PROT_WRITE : 0) |
+ (srv_bo->flags & PVR_SRV_MEMALLOCFLAG_CPU_READABLE ? PROT_READ : 0);
+
+ /* assert if memory is already mapped */
+ assert(!bo->map);
+
+ /* Map the full PMR to CPU space */
+ bo->map = mmap(NULL,
+ bo->size,
+ prot,
+ MAP_SHARED,
+ srv_ws->render_fd,
+ (off_t)srv_bo->pmr << srv_ws->base.log2_page_size);
+ if (bo->map == MAP_FAILED) {
+ bo->map = NULL;
+ vk_error(NULL, VK_ERROR_MEMORY_MAP_FAILED);
+ return NULL;
+ }
+
+ VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map,
+ bo->size,
+ 0,
+ srv_bo->flags &
+ PVR_SRV_MEMALLOCFLAG_ZERO_ON_ALLOC));
+
+ buffer_acquire(srv_bo);
+
+ return bo->map;
+}
+
+void pvr_srv_winsys_buffer_unmap(struct pvr_winsys_bo *bo)
+{
+ struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(bo);
+
+ /* output error if trying to unmap memory that is not previously mapped */
+ assert(bo->map);
+
+ /* Unmap the whole PMR from CPU space */
+ if (munmap(bo->map, bo->size))
+ vk_error(NULL, VK_ERROR_UNKNOWN);
+
+ VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
+
+ bo->map = NULL;
+
+ buffer_release(srv_bo);
+}
+
+/* This function must be used to allocate inside reserved region and must be
+ * used internally only. This also means whoever is using it, must know what
+ * they are doing.
+ */
+struct pvr_winsys_vma *
+pvr_srv_heap_alloc_reserved(struct pvr_winsys_heap *heap,
+ const pvr_dev_addr_t reserved_dev_addr,
+ uint64_t size,
+ uint64_t alignment)
+{
+ struct pvr_srv_winsys_heap *srv_heap = to_pvr_srv_winsys_heap(heap);
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(heap->ws);
+ struct pvr_srv_winsys_vma *srv_vma;
+ VkResult result;
+ uint64_t addr;
+
+ assert(util_is_power_of_two_nonzero(alignment));
+
+ /* pvr_srv_winsys_buffer_create() page aligns the size. We must do the same
+ * here to ensure enough heap space is allocated to be able to map the
+ * buffer to the GPU.
+ */
+ alignment = MAX2(alignment, heap->ws->page_size);
+ size = ALIGN_POT(size, alignment);
+
+ srv_vma = vk_alloc(srv_ws->alloc,
+ sizeof(*srv_vma),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!srv_vma) {
+ vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return NULL;
+ }
+
+ /* Just check address is correct and aligned, locking is not required as
+ * user is responsible to provide a distinct address.
+ */
+ if (reserved_dev_addr.addr < heap->base_addr.addr ||
+ reserved_dev_addr.addr + size >
+ heap->base_addr.addr + heap->reserved_size ||
+ reserved_dev_addr.addr & ((srv_ws->base.page_size) - 1))
+ goto err_vk_free_srv_vma;
+
+ addr = reserved_dev_addr.addr;
+
+ /* Reserve the virtual range in the MMU and create a mapping structure */
+ result = pvr_srv_int_reserve_addr(srv_ws->render_fd,
+ srv_heap->server_heap,
+ (pvr_dev_addr_t){ .addr = addr },
+ size,
+ &srv_vma->reservation);
+ if (result != VK_SUCCESS)
+ goto err_vk_free_srv_vma;
+
+ srv_vma->base.dev_addr.addr = addr;
+ srv_vma->base.bo = NULL;
+ srv_vma->base.heap = heap;
+ srv_vma->base.size = size;
+
+ p_atomic_inc(&srv_heap->base.ref_count);
+
+ return &srv_vma->base;
+
+err_vk_free_srv_vma:
+ vk_free(srv_ws->alloc, srv_vma);
+
+ return NULL;
+}
+
+struct pvr_winsys_vma *pvr_srv_winsys_heap_alloc(struct pvr_winsys_heap *heap,
+ uint64_t size,
+ uint64_t alignment)
+{
+ struct pvr_srv_winsys_heap *const srv_heap = to_pvr_srv_winsys_heap(heap);
+ struct pvr_srv_winsys *const srv_ws = to_pvr_srv_winsys(heap->ws);
+ struct pvr_srv_winsys_vma *srv_vma;
+ VkResult result;
+ bool ret;
+
+ srv_vma = vk_alloc(srv_ws->alloc,
+ sizeof(*srv_vma),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!srv_vma) {
+ vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+ return NULL;
+ }
+
+ ret = pvr_winsys_helper_heap_alloc(heap, size, alignment, &srv_vma->base);
+ if (!ret)
+ goto err_pvr_srv_free_vma;
+
+ /* Reserve the virtual range in the MMU and create a mapping structure. */
+ result = pvr_srv_int_reserve_addr(srv_ws->render_fd,
+ srv_heap->server_heap,
+ srv_vma->base.dev_addr,
+ srv_vma->base.size,
+ &srv_vma->reservation);
+ if (result != VK_SUCCESS)
+ goto err_pvr_srv_free_allocation;
+
+ return &srv_vma->base;
+
+err_pvr_srv_free_allocation:
+ pvr_winsys_helper_heap_free(&srv_vma->base);
+
+err_pvr_srv_free_vma:
+ vk_free(srv_ws->alloc, srv_vma);
+
+ return NULL;
+}
+
+void pvr_srv_winsys_heap_free(struct pvr_winsys_vma *vma)
+{
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(vma->heap->ws);
+ struct pvr_srv_winsys_vma *srv_vma = to_pvr_srv_winsys_vma(vma);
+
+ /* A vma with an existing device mapping should not be freed. */
+ assert(!srv_vma->base.bo);
+
+ /* Remove mapping handle and underlying reservation. */
+ pvr_srv_int_unreserve_addr(srv_ws->render_fd, srv_vma->reservation);
+
+ /* Check if we are dealing with reserved address range. */
+ if (vma->dev_addr.addr <
+ (vma->heap->base_addr.addr + vma->heap->reserved_size)) {
+ /* For the reserved addresses just decrement the reference count. */
+ p_atomic_dec(&vma->heap->ref_count);
+ } else {
+ /* Free allocated virtual space. */
+ pvr_winsys_helper_heap_free(vma);
+ }
+
+ vk_free(srv_ws->alloc, srv_vma);
+}
+
+/* * We assume the vma has been allocated with extra space to accommodate the
+ * offset.
+ * * The offset passed in is unchanged and can be used to calculate the extra
+ * size that needs to be mapped and final device virtual address.
+ */
+pvr_dev_addr_t pvr_srv_winsys_vma_map(struct pvr_winsys_vma *vma,
+ struct pvr_winsys_bo *bo,
+ uint64_t offset,
+ uint64_t size)
+{
+ struct pvr_srv_winsys_vma *srv_vma = to_pvr_srv_winsys_vma(vma);
+ struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(bo);
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(bo->ws);
+ const uint64_t srv_flags = srv_bo->flags &
+ PVR_SRV_MEMALLOCFLAGS_VIRTUAL_MASK;
+ const uint32_t virt_offset = offset & (vma->heap->page_size - 1);
+ const uint64_t aligned_virt_size =
+ ALIGN_POT(virt_offset + size, vma->heap->page_size);
+ VkResult result;
+
+ /* Address should not be mapped already */
+ assert(!srv_vma->base.bo);
+
+ if (srv_bo->is_display_buffer) {
+ struct pvr_srv_winsys_heap *srv_heap = to_pvr_srv_winsys_heap(vma->heap);
+
+ /* In case of display buffers, we only support to map whole PMR */
+ if (offset != 0 || bo->size != ALIGN_POT(size, srv_ws->base.page_size) ||
+ vma->size != bo->size) {
+ vk_error(NULL, VK_ERROR_MEMORY_MAP_FAILED);
+ return (pvr_dev_addr_t){ .addr = 0UL };
+ }
+
+ /* Map the requested pmr */
+ result = pvr_srv_int_map_pmr(srv_ws->render_fd,
+ srv_heap->server_heap,
+ srv_vma->reservation,
+ srv_bo->pmr,
+ srv_flags,
+ &srv_vma->mapping);
+
+ } else {
+ const uint32_t phys_page_offset = (offset - virt_offset) >>
+ srv_ws->base.log2_page_size;
+ const uint32_t phys_page_count = aligned_virt_size >>
+ srv_ws->base.log2_page_size;
+
+ /* Check if bo and vma can accommodate the given size and offset */
+ if (ALIGN_POT(offset + size, vma->heap->page_size) > bo->size ||
+ aligned_virt_size > vma->size) {
+ vk_error(NULL, VK_ERROR_MEMORY_MAP_FAILED);
+ return (pvr_dev_addr_t){ .addr = 0UL };
+ }
+
+ /* Map the requested pages */
+ result = pvr_srv_int_map_pages(srv_ws->render_fd,
+ srv_vma->reservation,
+ srv_bo->pmr,
+ phys_page_count,
+ phys_page_offset,
+ srv_flags,
+ vma->dev_addr);
+ }
+
+ if (result != VK_SUCCESS)
+ return (pvr_dev_addr_t){ .addr = 0UL };
+
+ buffer_acquire(srv_bo);
+
+ vma->bo = &srv_bo->base;
+ vma->bo_offset = offset;
+ vma->mapped_size = aligned_virt_size;
+
+ return (pvr_dev_addr_t){ .addr = vma->dev_addr.addr + virt_offset };
+}
+
+void pvr_srv_winsys_vma_unmap(struct pvr_winsys_vma *vma)
+{
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(vma->heap->ws);
+ struct pvr_srv_winsys_vma *srv_vma = to_pvr_srv_winsys_vma(vma);
+ struct pvr_srv_winsys_bo *srv_bo;
+
+ /* Address should be mapped */
+ assert(srv_vma->base.bo);
+
+ srv_bo = to_pvr_srv_winsys_bo(srv_vma->base.bo);
+
+ if (srv_bo->is_display_buffer) {
+ /* Unmap the requested pmr */
+ pvr_srv_int_unmap_pmr(srv_ws->render_fd, srv_vma->mapping);
+ } else {
+ /* Unmap requested pages */
+ pvr_srv_int_unmap_pages(srv_ws->render_fd,
+ srv_vma->reservation,
+ vma->dev_addr,
+ vma->mapped_size >> srv_ws->base.log2_page_size);
+ }
+
+ buffer_release(srv_bo);
+
+ srv_vma->base.bo = NULL;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_BO_H
+#define PVR_SRV_BO_H
+
+#include <stdint.h>
+
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_winsys.h"
+#include "util/macros.h"
+
+/*******************************************
+ MemAlloc flags
+ *******************************************/
+
+/* TODO: remove unused and redundant flags */
+#define PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_OFFSET 26U
+#define PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_MASK \
+ (0x3ULL << PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_OFFSET)
+#define PVR_SRV_MEMALLOCFLAG_CPU_CACHE_CLEAN BITFIELD_BIT(19U)
+#define PVR_SRV_MEMALLOCFLAG_KERNEL_CPU_MAPPABLE BITFIELD_BIT(14U)
+#define PVR_SRV_MEMALLOCFLAG_ZERO_ON_ALLOC BITFIELD_BIT(31U)
+#define PVR_SRV_MEMALLOCFLAG_SVM_ALLOC BITFIELD_BIT(17U)
+#define PVR_SRV_MEMALLOCFLAG_POISON_ON_ALLOC BITFIELD_BIT(30U)
+#define PVR_SRV_MEMALLOCFLAG_POISON_ON_FREE BITFIELD_BIT(29U)
+#define PVR_SRV_MEMALLOCFLAG_GPU_READABLE BITFIELD_BIT(0U)
+#define PVR_SRV_MEMALLOCFLAG_GPU_WRITEABLE BITFIELD_BIT(1U)
+#define PVR_SRV_MEMALLOCFLAG_GPU_CACHE_MODE_MASK (7ULL << 8U)
+#define PVR_SRV_MEMALLOCFLAGS_GPU_MMUFLAGSMASK \
+ (PVR_SRV_MEMALLOCFLAG_GPU_READABLE | PVR_SRV_MEMALLOCFLAG_GPU_WRITEABLE | \
+ PVR_SRV_MEMALLOCFLAG_GPU_CACHE_MODE_MASK)
+#define PVR_SRV_MEMALLOCFLAG_CPU_READABLE BITFIELD_BIT(4U)
+#define PVR_SRV_MEMALLOCFLAG_CPU_WRITEABLE BITFIELD_BIT(5U)
+#define PVR_SRV_MEMALLOCFLAG_CPU_CACHE_MODE_MASK (7ULL << 11U)
+#define PVR_SRV_MEMALLOCFLAG_CPU_CACHE_INCOHERENT (3ULL << 11U)
+#define PVR_SRV_MEMALLOCFLAGS_CPU_MMUFLAGSMASK \
+ (PVR_SRV_MEMALLOCFLAG_CPU_READABLE | PVR_SRV_MEMALLOCFLAG_CPU_WRITEABLE | \
+ PVR_SRV_MEMALLOCFLAG_CPU_CACHE_MODE_MASK)
+#define PVR_SRV_MEMALLOCFLAG_NO_OSPAGES_ON_ALLOC BITFIELD_BIT(15U)
+#define PVR_SRV_MEMALLOCFLAG_SPARSE_NO_DUMMY_BACKING BITFIELD_BIT(18U)
+#define PVR_SRV_MEMALLOCFLAG_SPARSE_ZERO_BACKING BITFIELD_BIT(20U)
+#define PVR_SRV_MEMALLOCFLAG_FW_ALLOC_OSID_MASK (7ULL << 23U)
+#define PVR_SRV_MEMALLOCFLAG_VAL_SECURE_BUFFER BITFIELD64_BIT(34U)
+#define PVR_SRV_MEMALLOCFLAG_VAL_SHARED_BUFFER BITFIELD64_BIT(35U)
+#define PVR_SRV_PHYS_HEAP_HINT_SHIFT (60U)
+#define PVR_SRV_PHYS_HEAP_HINT_MASK (0xFULL << PVR_SRV_PHYS_HEAP_HINT_SHIFT)
+#define PVR_SRV_MEMALLOCFLAG_GPU_UNCACHED BITFIELD_BIT(8U)
+#define PVR_SRV_MEMALLOCFLAG_GPU_CACHE_INCOHERENT (3ULL << 8U)
+#define PVR_SRV_MEMALLOCFLAG_CPU_UNCACHED_WC (0ULL << 11U)
+#define PVR_SRV_MEMALLOCFLAG_GPU_READ_PERMITTED BITFIELD_BIT(2U)
+#define PVR_SRV_MEMALLOCFLAG_GPU_WRITE_PERMITTED BITFIELD_BIT(3U)
+#define PVR_SRV_MEMALLOCFLAG_CPU_READ_PERMITTED BITFIELD_BIT(6U)
+#define PVR_SRV_MEMALLOCFLAG_CPU_WRITE_PERMITTED BITFIELD_BIT(7U)
+
+#define PVR_SRV_MEMALLOCFLAGS_PMRFLAGSMASK \
+ (PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_MASK | \
+ PVR_SRV_MEMALLOCFLAG_CPU_CACHE_CLEAN | \
+ PVR_SRV_MEMALLOCFLAG_KERNEL_CPU_MAPPABLE | \
+ PVR_SRV_MEMALLOCFLAG_ZERO_ON_ALLOC | PVR_SRV_MEMALLOCFLAG_SVM_ALLOC | \
+ PVR_SRV_MEMALLOCFLAG_POISON_ON_ALLOC | \
+ PVR_SRV_MEMALLOCFLAG_POISON_ON_FREE | \
+ PVR_SRV_MEMALLOCFLAGS_GPU_MMUFLAGSMASK | \
+ PVR_SRV_MEMALLOCFLAGS_CPU_MMUFLAGSMASK | \
+ PVR_SRV_MEMALLOCFLAG_NO_OSPAGES_ON_ALLOC | \
+ PVR_SRV_MEMALLOCFLAG_SPARSE_NO_DUMMY_BACKING | \
+ PVR_SRV_MEMALLOCFLAG_SPARSE_ZERO_BACKING | \
+ PVR_SRV_MEMALLOCFLAG_FW_ALLOC_OSID_MASK | \
+ PVR_SRV_MEMALLOCFLAG_VAL_SECURE_BUFFER | \
+ PVR_SRV_MEMALLOCFLAG_VAL_SHARED_BUFFER | PVR_SRV_PHYS_HEAP_HINT_MASK)
+
+#define PVR_SRV_MEMALLOCFLAGS_PHYSICAL_MASK \
+ (PVR_SRV_MEMALLOCFLAGS_CPU_MMUFLAGSMASK | \
+ PVR_SRV_MEMALLOCFLAG_GPU_CACHE_MODE_MASK | \
+ PVR_SRV_MEMALLOCFLAG_CPU_READ_PERMITTED | \
+ PVR_SRV_MEMALLOCFLAG_CPU_WRITE_PERMITTED | \
+ PVR_SRV_MEMALLOCFLAG_CPU_CACHE_CLEAN | \
+ PVR_SRV_MEMALLOCFLAG_ZERO_ON_ALLOC | \
+ PVR_SRV_MEMALLOCFLAG_POISON_ON_ALLOC | \
+ PVR_SRV_MEMALLOCFLAG_POISON_ON_FREE | PVR_SRV_PHYS_HEAP_HINT_MASK)
+
+#define PVR_SRV_MEMALLOCFLAGS_VIRTUAL_MASK \
+ (PVR_SRV_MEMALLOCFLAGS_GPU_MMUFLAGSMASK | \
+ PVR_SRV_MEMALLOCFLAG_GPU_READ_PERMITTED | \
+ PVR_SRV_MEMALLOCFLAG_GPU_WRITE_PERMITTED)
+
+/* Device specific MMU flags. */
+/*!< Memory that only the PM and Firmware can access */
+#define PM_FW_PROTECT BITFIELD_BIT(0U)
+
+/* Helper macro for setting device specific MMU flags. */
+#define PVR_SRV_MEMALLOCFLAG_DEVICE_FLAG(n) \
+ (((n) << PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_OFFSET) & \
+ PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_MASK)
+
+/*******************************************
+ struct definitions
+ *******************************************/
+
+struct pvr_srv_winsys_bo {
+ struct pvr_winsys_bo base;
+ uint32_t ref_count;
+ void *pmr;
+
+ bool is_display_buffer;
+ uint32_t handle;
+ uint64_t flags;
+};
+
+struct pvr_srv_winsys_vma {
+ struct pvr_winsys_vma base;
+ void *reservation;
+
+ /* Required when mapping whole PMR, used for display buffers mapping. */
+ void *mapping;
+};
+
+/*******************************************
+ function prototypes
+ *******************************************/
+
+VkResult pvr_srv_winsys_buffer_create(struct pvr_winsys *ws,
+ uint64_t size,
+ uint64_t alignment,
+ enum pvr_winsys_bo_type type,
+ uint32_t ws_flags,
+ struct pvr_winsys_bo **const bo_out);
+VkResult
+pvr_srv_winsys_buffer_create_from_fd(struct pvr_winsys *ws,
+ int fd,
+ struct pvr_winsys_bo **const bo_out);
+void pvr_srv_winsys_buffer_destroy(struct pvr_winsys_bo *bo);
+
+VkResult pvr_srv_winsys_buffer_get_fd(struct pvr_winsys_bo *bo,
+ int *const fd_out);
+
+void *pvr_srv_winsys_buffer_map(struct pvr_winsys_bo *bo);
+void pvr_srv_winsys_buffer_unmap(struct pvr_winsys_bo *bo);
+
+struct pvr_winsys_vma *
+pvr_srv_heap_alloc_reserved(struct pvr_winsys_heap *heap,
+ const pvr_dev_addr_t reserved_dev_addr,
+ uint64_t size,
+ uint64_t alignment);
+struct pvr_winsys_vma *pvr_srv_winsys_heap_alloc(struct pvr_winsys_heap *heap,
+ uint64_t size,
+ uint64_t alignment);
+void pvr_srv_winsys_heap_free(struct pvr_winsys_vma *vma);
+
+pvr_dev_addr_t pvr_srv_winsys_vma_map(struct pvr_winsys_vma *vma,
+ struct pvr_winsys_bo *bo,
+ uint64_t offset,
+ uint64_t size);
+void pvr_srv_winsys_vma_unmap(struct pvr_winsys_vma *vma);
+
+/*******************************************
+ helper macros
+ *******************************************/
+
+#define to_pvr_srv_winsys_bo(bo) \
+ container_of((bo), struct pvr_srv_winsys_bo, base)
+#define to_pvr_srv_winsys_vma(vma) \
+ container_of((vma), struct pvr_srv_winsys_vma, base)
+
+#endif /* PVR_SRV_BO_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <xf86drm.h>
+
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_srv_bridge.h"
+#include "util/log.h"
+#include "util/macros.h"
+#include "vk_log.h"
+
+#define vk_bridge_err(vk_err, bridge_func, bridge_ret) \
+ vk_errorf(NULL, \
+ vk_err, \
+ "%s failed, PVR_SRV_ERROR: %d, Errno: %s", \
+ bridge_func, \
+ (bridge_ret).error, \
+ strerror(errno))
+
+static int pvr_srv_bridge_call(int fd,
+ uint8_t bridge_id,
+ uint32_t function_id,
+ void *input,
+ uint32_t input_buffer_size,
+ void *output,
+ uint32_t output_buffer_size)
+{
+ struct drm_srvkm_cmd cmd = {
+ .bridge_id = bridge_id,
+ .bridge_func_id = function_id,
+ .in_data_ptr = (uint64_t)(uintptr_t)input,
+ .out_data_ptr = (uint64_t)(uintptr_t)output,
+ .in_data_size = input_buffer_size,
+ .out_data_size = output_buffer_size,
+ };
+
+ int ret = drmIoctl(fd, DRM_IOCTL_SRVKM_CMD, &cmd);
+ if (unlikely(ret))
+ return ret;
+
+ VG(VALGRIND_MAKE_MEM_DEFINED(output, output_buffer_size));
+
+ return 0U;
+}
+
+VkResult pvr_srv_connection_create(int fd, uint64_t *const bvnc_out)
+{
+ struct pvr_srv_bridge_connect_cmd cmd = {
+ .flags = PVR_SRV_FLAGS_CLIENT_64BIT_COMPAT,
+ .build_options = RGX_BUILD_OPTIONS,
+ .DDK_version = PVR_SRV_VERSION,
+ .DDK_build = PVR_SRV_VERSION_BUILD,
+ };
+
+ /* Initialize ret.error to a default error */
+ struct pvr_srv_bridge_connect_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_SRVCORE,
+ PVR_SRV_BRIDGE_SRVCORE_CONNECT,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+ "PVR_SRV_BRIDGE_SRVCORE_CONNECT",
+ ret);
+ }
+
+ *bvnc_out = ret.bvnc;
+
+ return VK_SUCCESS;
+}
+
+void pvr_srv_connection_destroy(int fd)
+{
+ /* Initialize ret.error to a default error */
+ struct pvr_srv_bridge_disconnect_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_SRVCORE,
+ PVR_SRV_BRIDGE_SRVCORE_DISCONNECT,
+ NULL,
+ 0,
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ vk_bridge_err(VK_ERROR_UNKNOWN, "PVR_SRV_BRIDGE_SRVCORE_DISCONNECT", ret);
+ }
+}
+
+VkResult pvr_srv_alloc_sync_primitive_block(int fd,
+ void **const handle_out,
+ void **const pmr_out,
+ uint32_t *const size_out,
+ uint32_t *const addr_out)
+{
+ /* Initialize ret.error to a default error */
+ struct pvr_srv_bridge_alloc_sync_primitive_block_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_SYNC,
+ PVR_SRV_BRIDGE_SYNC_ALLOCSYNCPRIMITIVEBLOCK,
+ NULL,
+ 0,
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+ "PVR_SRV_BRIDGE_SYNC_ALLOCSYNCPRIMITIVEBLOCK",
+ ret);
+ }
+
+ *handle_out = ret.handle;
+ *pmr_out = ret.pmr;
+ *size_out = ret.size;
+ *addr_out = ret.addr;
+
+ return VK_SUCCESS;
+}
+
+void pvr_srv_free_sync_primitive_block(int fd, void *handle)
+{
+ struct pvr_srv_bridge_free_sync_primitive_block_cmd cmd = {
+ .handle = handle,
+ };
+
+ /* Initialize ret.error to a default error */
+ struct pvr_srv_bridge_free_sync_primitive_block_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_SYNC,
+ PVR_SRV_BRIDGE_SYNC_FREESYNCPRIMITIVEBLOCK,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ vk_bridge_err(VK_ERROR_UNKNOWN,
+ "PVR_SRV_BRIDGE_SYNC_FREESYNCPRIMITIVEBLOCK",
+ ret);
+ }
+}
+
+VkResult pvr_srv_get_heap_count(int fd, uint32_t *const heap_count_out)
+{
+ struct pvr_srv_heap_count_cmd cmd = {
+ .heap_config_index = 0,
+ };
+
+ struct pvr_srv_heap_count_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_MM,
+ PVR_SRV_BRIDGE_MM_HEAPCFGHEAPCOUNT,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+ "PVR_SRV_BRIDGE_MM_HEAPCFGHEAPCOUNT",
+ ret);
+ }
+
+ *heap_count_out = ret.heap_count;
+
+ return VK_SUCCESS;
+}
+
+VkResult pvr_srv_int_heap_create(int fd,
+ pvr_dev_addr_t base_address,
+ uint64_t size,
+ uint32_t log2_page_size,
+ void *server_memctx,
+ void **const server_heap_out)
+{
+ struct pvr_srv_devmem_int_heap_create_cmd cmd = {
+ .server_memctx = server_memctx,
+ .base_addr = base_address,
+ .size = size,
+ .log2_page_size = log2_page_size,
+ };
+
+ struct pvr_srv_devmem_int_heap_create_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_MM,
+ PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPCREATE,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+ "PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPCREATE",
+ ret);
+ }
+
+ *server_heap_out = ret.server_heap;
+
+ return VK_SUCCESS;
+}
+
+void pvr_srv_int_heap_destroy(int fd, void *server_heap)
+{
+ struct pvr_srv_devmem_int_heap_destroy_cmd cmd = {
+ .server_heap = server_heap,
+ };
+
+ struct pvr_srv_devmem_int_heap_destroy_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_MM,
+ PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPDESTROY,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+ "PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPDESTROY",
+ ret);
+ }
+}
+
+/* This bridge function allows to independently query heap name and heap
+ * details, i-e buffer/base_address/size/reserved_size/log2_page_size pointers
+ * are allowed to be NULL.
+ */
+VkResult pvr_srv_get_heap_details(int fd,
+ uint32_t heap_index,
+ uint32_t buffer_size,
+ char *const buffer_out,
+ pvr_dev_addr_t *const base_address_out,
+ uint64_t *const size_out,
+ uint64_t *const reserved_size_out,
+ uint32_t *const log2_page_size_out)
+{
+ struct pvr_srv_heap_cfg_details_cmd cmd = {
+ .heap_config_index = 0,
+ .heap_index = heap_index,
+ .buffer_size = buffer_size,
+ .buffer = buffer_out,
+ };
+
+ struct pvr_srv_heap_cfg_details_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ .buffer = buffer_out,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_MM,
+ PVR_SRV_BRIDGE_MM_HEAPCFGHEAPDETAILS,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+ "PVR_SRV_BRIDGE_MM_HEAPCFGHEAPDETAILS",
+ ret);
+ }
+
+ VG(VALGRIND_MAKE_MEM_DEFINED(buffer_out, buffer_size));
+
+ if (base_address_out)
+ *base_address_out = ret.base_addr;
+
+ if (size_out)
+ *size_out = ret.size;
+
+ if (reserved_size_out)
+ *reserved_size_out = ret.reserved_size;
+
+ if (log2_page_size_out)
+ *log2_page_size_out = ret.log2_page_size;
+
+ return VK_SUCCESS;
+}
+
+void pvr_srv_int_ctx_destroy(int fd, void *server_memctx)
+{
+ struct pvr_srv_devmem_int_ctx_destroy_cmd cmd = {
+ .server_memctx = server_memctx,
+ };
+
+ struct pvr_srv_devmem_int_ctx_destroy_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_MM,
+ PVR_SRV_BRIDGE_MM_DEVMEMINTCTXDESTROY,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+ "PVR_SRV_BRIDGE_MM_DEVMEMINTCTXDESTROY",
+ ret);
+ }
+}
+
+VkResult pvr_srv_int_ctx_create(int fd,
+ void **const server_memctx_out,
+ void **const server_memctx_data_out)
+{
+ struct pvr_srv_devmem_int_ctx_create_cmd cmd = {
+ .kernel_memory_ctx = false,
+ };
+
+ struct pvr_srv_devmem_int_ctx_create_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_MM,
+ PVR_SRV_BRIDGE_MM_DEVMEMINTCTXCREATE,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+ "PVR_SRV_BRIDGE_MM_DEVMEMINTCTXCREATE",
+ ret);
+ }
+
+ *server_memctx_out = ret.server_memctx;
+ *server_memctx_data_out = ret.server_memctx_data;
+
+ return VK_SUCCESS;
+}
+
+VkResult pvr_srv_int_reserve_addr(int fd,
+ void *server_heap,
+ pvr_dev_addr_t addr,
+ uint64_t size,
+ void **const reservation_out)
+{
+ struct pvr_srv_devmem_int_reserve_range_cmd cmd = {
+ .server_heap = server_heap,
+ .addr = addr,
+ .size = size,
+ };
+
+ struct pvr_srv_devmem_int_reserve_range_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_MM,
+ PVR_SRV_BRIDGE_MM_DEVMEMINTRESERVERANGE,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+ "PVR_SRV_BRIDGE_MM_DEVMEMINTRESERVERANGE",
+ ret);
+ }
+
+ *reservation_out = ret.reservation;
+
+ return VK_SUCCESS;
+}
+
+void pvr_srv_int_unreserve_addr(int fd, void *reservation)
+{
+ struct pvr_srv_bridge_in_devmem_int_unreserve_range_cmd cmd = {
+ .reservation = reservation,
+ };
+
+ struct pvr_srv_bridge_in_devmem_int_unreserve_range_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_MM,
+ PVR_SRV_BRIDGE_MM_DEVMEMINTUNRESERVERANGE,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+ "PVR_SRV_BRIDGE_MM_DEVMEMINTUNRESERVERANGE",
+ ret);
+ }
+}
+
+VkResult pvr_srv_alloc_pmr(int fd,
+ uint64_t size,
+ uint64_t block_size,
+ uint32_t phy_blocks,
+ uint32_t virt_blocks,
+ uint32_t log2_page_size,
+ uint64_t flags,
+ uint32_t pid,
+ void **const pmr_out)
+{
+ const char *annotation = "VK PHYSICAL ALLOCATION";
+ const uint32_t annotation_size =
+ strnlen(annotation, DEVMEM_ANNOTATION_MAX_LEN - 1) + 1;
+ uint32_t mapping_table = 0;
+
+ struct pvr_srv_physmem_new_ram_backed_locked_pmr_cmd cmd = {
+ .size = size,
+ .block_size = block_size,
+ .phy_blocks = phy_blocks,
+ .virt_blocks = virt_blocks,
+ .mapping_table = &mapping_table,
+ .log2_page_size = log2_page_size,
+ .flags = flags,
+ .annotation_size = annotation_size,
+ .annotation = annotation,
+ .pid = pid,
+ .pdump_flags = 0x00000000U,
+ };
+
+ struct pvr_srv_physmem_new_ram_backed_locked_pmr_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_MM,
+ PVR_SRV_BRIDGE_MM_PHYSMEMNEWRAMBACKEDLOCKEDPMR,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_MEMORY_MAP_FAILED,
+ "PVR_SRV_BRIDGE_MM_PHYSMEMNEWRAMBACKEDLOCKEDPMR",
+ ret);
+ }
+
+ *pmr_out = ret.pmr;
+
+ return VK_SUCCESS;
+}
+
+void pvr_srv_free_pmr(int fd, void *pmr)
+{
+ struct pvr_srv_pmr_unref_unlock_pmr_cmd cmd = {
+ .pmr = pmr,
+ };
+
+ struct pvr_srv_pmr_unref_unlock_pmr_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_MM,
+ PVR_SRV_BRIDGE_MM_PMRUNREFUNLOCKPMR,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ vk_bridge_err(VK_ERROR_UNKNOWN,
+ "PVR_SRV_BRIDGE_MM_PMRUNREFUNLOCKPMR",
+ ret);
+ }
+}
+
+VkResult pvr_srv_int_map_pages(int fd,
+ void *reservation,
+ void *pmr,
+ uint32_t page_count,
+ uint32_t page_offset,
+ uint64_t flags,
+ pvr_dev_addr_t addr)
+{
+ struct pvr_srv_devmem_int_map_pages_cmd cmd = {
+ .reservation = reservation,
+ .pmr = pmr,
+ .page_count = page_count,
+ .page_offset = page_offset,
+ .flags = flags,
+ .addr = addr,
+ };
+
+ struct pvr_srv_devmem_int_map_pages_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_MM,
+ PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPAGES,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_MEMORY_MAP_FAILED,
+ "PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPAGES",
+ ret);
+ }
+
+ return VK_SUCCESS;
+}
+
+void pvr_srv_int_unmap_pages(int fd,
+ void *reservation,
+ pvr_dev_addr_t dev_addr,
+ uint32_t page_count)
+{
+ struct pvr_srv_devmem_int_unmap_pages_cmd cmd = {
+ .reservation = reservation,
+ .dev_addr = dev_addr,
+ .page_count = page_count,
+ };
+
+ struct pvr_srv_devmem_int_unmap_pages_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_MM,
+ PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPAGES,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ vk_bridge_err(VK_ERROR_UNKNOWN,
+ "PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPAGES",
+ ret);
+ }
+}
+
+VkResult pvr_srv_int_map_pmr(int fd,
+ void *server_heap,
+ void *reservation,
+ void *pmr,
+ uint64_t flags,
+ void **const mapping_out)
+{
+ struct pvr_srv_devmem_int_map_pmr_cmd cmd = {
+ .server_heap = server_heap,
+ .reservation = reservation,
+ .pmr = pmr,
+ .flags = flags,
+ };
+
+ struct pvr_srv_devmem_int_map_pmr_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_MM,
+ PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPMR,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_MEMORY_MAP_FAILED,
+ "PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPMR",
+ ret);
+ }
+
+ *mapping_out = ret.mapping;
+
+ return VK_SUCCESS;
+}
+
+void pvr_srv_int_unmap_pmr(int fd, void *mapping)
+{
+ struct pvr_srv_devmem_int_unmap_pmr_cmd cmd = {
+ .mapping = mapping,
+ };
+
+ struct pvr_srv_devmem_int_unmap_pmr_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_MM,
+ PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPMR,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ vk_bridge_err(VK_ERROR_UNKNOWN,
+ "PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPMR",
+ ret);
+ }
+}
+
+VkResult pvr_srv_physmem_import_dmabuf(int fd,
+ int buffer_fd,
+ uint64_t flags,
+ void **const pmr_out,
+ uint64_t *const size_out,
+ uint64_t *const align_out)
+{
+ struct pvr_srv_phys_mem_import_dmabuf_cmd cmd = {
+ .buffer_fd = buffer_fd,
+ .flags = flags,
+ .name_size = 0,
+ .name = NULL,
+ };
+
+ struct pvr_srv_phys_mem_import_dmabuf_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_DMABUF,
+ PVR_SRV_BRIDGE_DMABUF_PHYSMEMIMPORTDMABUF,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_INVALID_EXTERNAL_HANDLE,
+ "PVR_SRV_BRIDGE_DMABUF_PHYSMEMIMPORTDMABUF",
+ ret);
+ }
+
+ *pmr_out = ret.pmr;
+ *size_out = ret.size;
+ *align_out = ret.align;
+
+ return VK_SUCCESS;
+}
+
+VkResult pvr_srv_physmem_export_dmabuf(int fd, void *pmr, int *const fd_out)
+{
+ struct pvr_srv_phys_mem_export_dmabuf_cmd cmd = {
+ .pmr = pmr,
+ };
+
+ struct pvr_srv_phys_mem_export_dmabuf_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_DMABUF,
+ PVR_SRV_BRIDGE_DMABUF_PHYSMEMEXPORTDMABUF,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_OUT_OF_HOST_MEMORY,
+ "PVR_SRV_BRIDGE_DMABUF_PHYSMEMEXPORTDMABUF",
+ ret);
+ }
+
+ *fd_out = ret.fd;
+
+ return VK_SUCCESS;
+}
+
+VkResult
+pvr_srv_rgx_create_compute_context(int fd,
+ uint32_t priority,
+ uint32_t reset_framework_cmd_size,
+ uint8_t *reset_framework_cmd,
+ void *priv_data,
+ uint32_t static_compute_context_state_size,
+ uint8_t *static_compute_context_state,
+ uint32_t packed_ccb_size,
+ uint32_t context_flags,
+ uint64_t robustness_address,
+ uint32_t max_deadline_ms,
+ void **const compute_context_out)
+{
+ struct pvr_srv_rgx_create_compute_context_cmd cmd = {
+ .priority = priority,
+ .reset_framework_cmd_size = reset_framework_cmd_size,
+ .reset_framework_cmd = reset_framework_cmd,
+ .priv_data = priv_data,
+ .static_compute_context_state_size = static_compute_context_state_size,
+ .static_compute_context_state = static_compute_context_state,
+ .packed_ccb_size = packed_ccb_size,
+ .context_flags = context_flags,
+ .robustness_address = robustness_address,
+ .max_deadline_ms = max_deadline_ms,
+ };
+
+ struct pvr_srv_rgx_create_compute_context_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_RGXCMP,
+ PVR_SRV_BRIDGE_RGXCMP_RGXCREATECOMPUTECONTEXT,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+ "PVR_SRV_BRIDGE_RGXCMP_RGXCREATECOMPUTECONTEXT",
+ ret);
+ }
+
+ *compute_context_out = ret.compute_context;
+
+ return VK_SUCCESS;
+}
+
+void pvr_srv_rgx_destroy_compute_context(int fd, void *compute_context)
+{
+ struct pvr_srv_rgx_destroy_compute_context_cmd cmd = {
+ .compute_context = compute_context,
+ };
+
+ struct pvr_srv_rgx_destroy_compute_context_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_RGXCMP,
+ PVR_SRV_BRIDGE_RGXCMP_RGXDESTROYCOMPUTECONTEXT,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ vk_bridge_err(VK_ERROR_UNKNOWN,
+ "PVR_SRV_BRIDGE_RGXCMP_RGXDESTROYCOMPUTECONTEXT",
+ ret);
+ }
+}
+
+VkResult pvr_srv_rgx_kick_compute2(int fd,
+ void *compute_context,
+ uint32_t client_cache_op_seq_num,
+ uint32_t client_update_count,
+ void **client_update_ufo_sync_prim_block,
+ uint32_t *client_update_offset,
+ uint32_t *client_update_value,
+ int32_t check_fence,
+ int32_t update_timeline,
+ uint32_t cmd_size,
+ uint8_t *cdm_cmd,
+ uint32_t ext_job_ref,
+ uint32_t num_work_groups,
+ uint32_t num_work_items,
+ uint32_t pdump_flags,
+ uint64_t max_deadline_us,
+ char *update_fence_name,
+ int32_t *const update_fence_out)
+{
+ struct pvr_srv_rgx_kick_cdm2_cmd cmd = {
+ .max_deadline_us = max_deadline_us,
+ .compute_context = compute_context,
+ .client_update_offset = client_update_offset,
+ .client_update_value = client_update_value,
+ .cdm_cmd = cdm_cmd,
+ .update_fence_name = update_fence_name,
+ .client_update_ufo_sync_prim_block = client_update_ufo_sync_prim_block,
+ .check_fence = check_fence,
+ .update_timeline = update_timeline,
+ .client_cache_op_seq_num = client_cache_op_seq_num,
+ .client_update_count = client_update_count,
+ .cmd_size = cmd_size,
+ .ext_job_ref = ext_job_ref,
+ .num_work_groups = num_work_groups,
+ .num_work_items = num_work_items,
+ .pdump_flags = pdump_flags,
+ };
+
+ struct pvr_srv_rgx_kick_cdm2_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_RGXCMP,
+ PVR_SRV_BRIDGE_RGXCMP_RGXKICKCDM2,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_OUT_OF_DEVICE_MEMORY,
+ "PVR_SRV_BRIDGE_RGXCMP_RGXKICKCDM2",
+ ret);
+ }
+
+ *update_fence_out = ret.update_fence;
+
+ return VK_SUCCESS;
+}
+
+VkResult
+pvr_srv_rgx_create_hwrt_dataset(int fd,
+ pvr_dev_addr_t pm_mlist_dev_addr0,
+ pvr_dev_addr_t pm_mlist_dev_addr1,
+ pvr_dev_addr_t tail_ptrs_dev_addr,
+ pvr_dev_addr_t macrotile_array_dev_addr0,
+ pvr_dev_addr_t macrotile_array_dev_addr1,
+ pvr_dev_addr_t rtc_dev_addr,
+ pvr_dev_addr_t rgn_header_dev_addr0,
+ pvr_dev_addr_t rgn_header_dev_addr1,
+ pvr_dev_addr_t vheap_table_dev_add,
+ uint64_t flipped_multi_sample_ctl,
+ uint64_t multi_sample_ctl,
+ uint64_t rgn_header_size,
+ void **free_lists,
+ uint32_t mtile_stride,
+ uint32_t ppp_screen,
+ uint32_t te_aa,
+ uint32_t te_mtile1,
+ uint32_t te_mtile2,
+ uint32_t te_screen,
+ uint32_t tpc_size,
+ uint32_t tpc_stride,
+ uint32_t isp_merge_lower_x,
+ uint32_t isp_merge_lower_y,
+ uint32_t isp_merge_scale_x,
+ uint32_t isp_merge_scale_y,
+ uint32_t isp_merge_upper_x,
+ uint32_t isp_merge_upper_y,
+ uint32_t isp_mtile_size,
+ uint16_t max_rts,
+ void **const hwrt_dataset0_out,
+ void **const hwrt_dataset1_out)
+{
+ struct pvr_srv_rgx_create_hwrt_dataset_cmd cmd = {
+ .pm_mlist_dev_addr0 = pm_mlist_dev_addr0,
+ .pm_mlist_dev_addr1 = pm_mlist_dev_addr1,
+ .tail_ptrs_dev_addr = tail_ptrs_dev_addr,
+ .macrotile_array_dev_addr0 = macrotile_array_dev_addr0,
+ .macrotile_array_dev_addr1 = macrotile_array_dev_addr1,
+ .rtc_dev_addr = rtc_dev_addr,
+ .rgn_header_dev_addr0 = rgn_header_dev_addr0,
+ .rgn_header_dev_addr1 = rgn_header_dev_addr1,
+ .vheap_table_dev_add = vheap_table_dev_add,
+ .flipped_multi_sample_ctl = flipped_multi_sample_ctl,
+ .multi_sample_ctl = multi_sample_ctl,
+ .rgn_header_size = rgn_header_size,
+ .free_lists = free_lists,
+ .mtile_stride = mtile_stride,
+ .ppp_screen = ppp_screen,
+ .te_aa = te_aa,
+ .te_mtile1 = te_mtile1,
+ .te_mtile2 = te_mtile2,
+ .te_screen = te_screen,
+ .tpc_size = tpc_size,
+ .tpc_stride = tpc_stride,
+ .isp_merge_lower_x = isp_merge_lower_x,
+ .isp_merge_lower_y = isp_merge_lower_y,
+ .isp_merge_scale_x = isp_merge_scale_x,
+ .isp_merge_scale_y = isp_merge_scale_y,
+ .isp_merge_upper_x = isp_merge_upper_x,
+ .isp_merge_upper_y = isp_merge_upper_y,
+ .isp_mtile_size = isp_mtile_size,
+ .max_rts = max_rts,
+ };
+
+ struct pvr_srv_rgx_create_hwrt_dataset_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_RGXTA3D,
+ PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEHWRTDATASET,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+ "PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEHWRTDATASET",
+ ret);
+ }
+
+ *hwrt_dataset0_out = ret.hwrt_dataset0;
+ *hwrt_dataset1_out = ret.hwrt_dataset1;
+
+ return VK_SUCCESS;
+}
+
+void pvr_srv_rgx_destroy_hwrt_dataset(int fd, void *hwrt_dataset)
+{
+ struct pvr_srv_rgx_destroy_hwrt_dataset_cmd cmd = {
+ .hwrt_dataset = hwrt_dataset,
+ };
+
+ struct pvr_srv_rgx_destroy_hwrt_dataset_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_RGXTA3D,
+ PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYHWRTDATASET,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+ "PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYHWRTDATASET",
+ ret);
+ }
+}
+
+VkResult pvr_srv_rgx_create_free_list(int fd,
+ void *mem_ctx_priv_data,
+ uint32_t max_free_list_pages,
+ uint32_t init_free_list_pages,
+ uint32_t grow_free_list_pages,
+ uint32_t grow_param_threshold,
+ void *global_free_list,
+ enum pvr_srv_bool free_list_check,
+ pvr_dev_addr_t free_list_dev_addr,
+ void *free_list_pmr,
+ uint64_t pmr_offset,
+ void **const cleanup_cookie_out)
+{
+ struct pvr_srv_rgx_create_free_list_cmd cmd = {
+ .free_list_dev_addr = free_list_dev_addr,
+ .pmr_offset = pmr_offset,
+ .mem_ctx_priv_data = mem_ctx_priv_data,
+ .free_list_pmr = free_list_pmr,
+ .global_free_list = global_free_list,
+ .free_list_check = free_list_check,
+ .grow_free_list_pages = grow_free_list_pages,
+ .grow_param_threshold = grow_param_threshold,
+ .init_free_list_pages = init_free_list_pages,
+ .max_free_list_pages = max_free_list_pages,
+ };
+
+ struct pvr_srv_rgx_create_free_list_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_RGXTA3D,
+ PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEFREELIST,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+ "PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEFREELIST",
+ ret);
+ }
+
+ *cleanup_cookie_out = ret.cleanup_cookie;
+
+ return VK_SUCCESS;
+}
+
+void pvr_srv_rgx_destroy_free_list(int fd, void *cleanup_cookie)
+{
+ struct pvr_srv_rgx_destroy_free_list_cmd cmd = {
+ .cleanup_cookie = cleanup_cookie,
+ };
+
+ struct pvr_srv_rgx_destroy_free_list_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ /* FIXME: Do we want to propagate the retry error up the call chain so that
+ * we can do something better than busy wait or is the expectation that we
+ * should never get into this situation because the driver doesn't attempt
+ * to free any resources while they're in use?
+ */
+ do {
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_RGXTA3D,
+ PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYFREELIST,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ } while (result == PVR_SRV_ERROR_RETRY);
+
+ if (result || ret.error != PVR_SRV_OK) {
+ vk_bridge_err(VK_ERROR_UNKNOWN,
+ "PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYFREELIST",
+ ret);
+ }
+}
+
+VkResult
+pvr_srv_rgx_create_render_context(int fd,
+ uint32_t priority,
+ pvr_dev_addr_t vdm_callstack_addr,
+ uint32_t reset_framework_cmd_size,
+ uint8_t *reset_framework_cmd,
+ void *priv_data,
+ uint32_t static_render_context_state_size,
+ uint8_t *static_render_context_state,
+ uint32_t packed_ccb_size,
+ uint32_t context_flags,
+ uint64_t robustness_address,
+ uint32_t max_geom_deadline_ms,
+ uint32_t max_frag_deadline_ms,
+ void **const render_context_out)
+{
+ struct pvr_srv_rgx_create_render_context_cmd cmd = {
+ .priority = priority,
+ .vdm_callstack_addr = vdm_callstack_addr,
+ .reset_framework_cmd_size = reset_framework_cmd_size,
+ .reset_framework_cmd = reset_framework_cmd,
+ .priv_data = priv_data,
+ .static_render_context_state_size = static_render_context_state_size,
+ .static_render_context_state = static_render_context_state,
+ .packed_ccb_size = packed_ccb_size,
+ .context_flags = context_flags,
+ .robustness_address = robustness_address,
+ .max_ta_deadline_ms = max_geom_deadline_ms,
+ .max_3d_deadline_ms = max_frag_deadline_ms,
+ };
+
+ struct pvr_srv_rgx_create_render_context_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_RGXTA3D,
+ PVR_SRV_BRIDGE_RGXTA3D_RGXCREATERENDERCONTEXT,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+ "PVR_SRV_BRIDGE_RGXTA3D_RGXCREATERENDERCONTEXT",
+ ret);
+ }
+
+ *render_context_out = ret.render_context;
+
+ return VK_SUCCESS;
+}
+
+void pvr_srv_rgx_destroy_render_context(int fd, void *render_context)
+{
+ struct pvr_srv_rgx_destroy_render_context_cmd cmd = {
+ .render_context = render_context,
+ };
+
+ struct pvr_srv_rgx_destroy_render_context_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_RGXTA3D,
+ PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYRENDERCONTEXT,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ vk_bridge_err(VK_ERROR_UNKNOWN,
+ "PVR_SRV_BRIDGE_RGXTA3D_RGXDESTORYRENDERCONTEXT",
+ ret);
+ }
+}
+
+VkResult pvr_srv_rgx_kick_render2(int fd,
+ void *render_ctx,
+ uint32_t client_cache_op_seq_num,
+ uint32_t client_geom_fence_count,
+ void **client_geom_fence_sync_prim_block,
+ uint32_t *client_geom_fence_sync_offset,
+ uint32_t *client_geom_fence_value,
+ uint32_t client_geom_update_count,
+ void **client_geom_update_sync_prim_block,
+ uint32_t *client_geom_update_sync_offset,
+ uint32_t *client_geom_update_value,
+ uint32_t client_frag_update_count,
+ void **client_frag_update_sync_prim_block,
+ uint32_t *client_frag_update_sync_offset,
+ uint32_t *client_frag_update_value,
+ void *pr_fence_ufo_sync_prim_block,
+ uint32_t client_pr_fence_ufo_sync_offset,
+ uint32_t client_pr_fence_value,
+ int32_t check_fence,
+ int32_t update_timeline,
+ int32_t *const update_fence_out,
+ char *update_fence_name,
+ int32_t check_fence_frag,
+ int32_t update_timeline_frag,
+ int32_t *const update_fence_frag_out,
+ char *update_fence_name_frag,
+ uint32_t cmd_geom_size,
+ uint8_t *cmd_geom,
+ uint32_t cmd_frag_pr_size,
+ uint8_t *cmd_frag_pr,
+ uint32_t cmd_frag_size,
+ uint8_t *cmd_frag,
+ uint32_t ext_job_ref,
+ bool kick_geom,
+ bool kick_pr,
+ bool kick_frag,
+ bool abort,
+ uint32_t pdump_flags,
+ void *hw_rt_dataset,
+ void *zs_buffer,
+ void *msaa_scratch_buffer,
+ uint32_t sync_pmr_count,
+ uint32_t *sync_pmr_flags,
+ void **sync_pmrs,
+ uint32_t render_target_size,
+ uint32_t num_draw_calls,
+ uint32_t num_indices,
+ uint32_t num_mrts,
+ uint64_t deadline)
+{
+ struct pvr_srv_rgx_kick_ta3d2_cmd cmd = {
+ .deadline = deadline,
+ .hw_rt_dataset = hw_rt_dataset,
+ .msaa_scratch_buffer = msaa_scratch_buffer,
+ .pr_fence_ufo_sync_prim_block = pr_fence_ufo_sync_prim_block,
+ .render_ctx = render_ctx,
+ .zs_buffer = zs_buffer,
+ .client_3d_update_sync_offset = client_frag_update_sync_offset,
+ .client_3d_update_value = client_frag_update_value,
+ .client_ta_fence_sync_offset = client_geom_fence_sync_offset,
+ .client_ta_fence_value = client_geom_fence_value,
+ .client_ta_update_sync_offset = client_geom_update_sync_offset,
+ .client_ta_update_value = client_geom_update_value,
+ .sync_pmr_flags = sync_pmr_flags,
+ .cmd_3d = cmd_frag,
+ .cmd_3d_pr = cmd_frag_pr,
+ .cmd_ta = cmd_geom,
+ .update_fence_name = update_fence_name,
+ .update_fence_name_3d = update_fence_name_frag,
+ .client_3d_update_sync_prim_block = client_frag_update_sync_prim_block,
+ .client_ta_fence_sync_prim_block = client_geom_fence_sync_prim_block,
+ .client_ta_update_sync_prim_block = client_geom_update_sync_prim_block,
+ .sync_pmrs = sync_pmrs,
+ .abort = abort,
+ .kick_3d = kick_frag,
+ .kick_pr = kick_pr,
+ .kick_ta = kick_geom,
+ .check_fence = check_fence,
+ .check_fence_3d = check_fence_frag,
+ .update_timeline = update_timeline,
+ .update_timeline_3d = update_timeline_frag,
+ .cmd_3d_size = cmd_frag_size,
+ .cmd_3d_pr_size = cmd_frag_pr_size,
+ .client_3d_update_count = client_frag_update_count,
+ .client_cache_op_seq_num = client_cache_op_seq_num,
+ .client_ta_fence_count = client_geom_fence_count,
+ .client_ta_update_count = client_geom_update_count,
+ .ext_job_ref = ext_job_ref,
+ .client_pr_fence_ufo_sync_offset = client_pr_fence_ufo_sync_offset,
+ .client_pr_fence_value = client_pr_fence_value,
+ .num_draw_calls = num_draw_calls,
+ .num_indices = num_indices,
+ .num_mrts = num_mrts,
+ .pdump_flags = pdump_flags,
+ .render_target_size = render_target_size,
+ .sync_pmr_count = sync_pmr_count,
+ .cmd_ta_size = cmd_geom_size,
+ };
+
+ struct pvr_srv_rgx_kick_ta3d2_ret ret = {
+ .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+ .update_fence = -1,
+ .update_fence_3d = -1,
+ };
+
+ int result;
+
+ result = pvr_srv_bridge_call(fd,
+ PVR_SRV_BRIDGE_RGXTA3D,
+ PVR_SRV_BRIDGE_RGXTA3D_RGXKICKTA3D2,
+ &cmd,
+ sizeof(cmd),
+ &ret,
+ sizeof(ret));
+ if (result || ret.error != PVR_SRV_OK) {
+ /* There is no 'retry' VkResult, so treat it as VK_NOT_READY instead. */
+ if (result == PVR_SRV_ERROR_RETRY)
+ return VK_NOT_READY;
+
+ return vk_bridge_err(VK_ERROR_OUT_OF_DEVICE_MEMORY,
+ "PVR_SRV_BRIDGE_RGXTA3D_RGXKICKTA3D2",
+ ret);
+ }
+
+ *update_fence_out = ret.update_fence;
+ *update_fence_frag_out = ret.update_fence_3d;
+
+ return VK_SUCCESS;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_BRIDGE_H
+#define PVR_SRV_BRIDGE_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_winsys.h"
+#include "util/macros.h"
+
+/******************************************************************************
+ Services bridges
+ ******************************************************************************/
+
+#define PVR_SRV_BRIDGE_SRVCORE 1UL
+
+#define PVR_SRV_BRIDGE_SRVCORE_CONNECT 0UL
+#define PVR_SRV_BRIDGE_SRVCORE_DISCONNECT 1UL
+
+#define PVR_SRV_BRIDGE_SYNC 2UL
+
+#define PVR_SRV_BRIDGE_SYNC_ALLOCSYNCPRIMITIVEBLOCK 0UL
+#define PVR_SRV_BRIDGE_SYNC_FREESYNCPRIMITIVEBLOCK 1UL
+
+#define PVR_SRV_BRIDGE_MM 6UL
+
+#define PVR_SRV_BRIDGE_MM_PMRUNREFUNLOCKPMR 8UL
+#define PVR_SRV_BRIDGE_MM_PHYSMEMNEWRAMBACKEDLOCKEDPMR 10UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTCTXCREATE 15UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTCTXDESTROY 16UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPCREATE 17UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPDESTROY 18UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPMR 19UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPMR 20UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTRESERVERANGE 21UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTUNRESERVERANGE 22UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPAGES 24UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPAGES 25UL
+#define PVR_SRV_BRIDGE_MM_HEAPCFGHEAPCOUNT 30UL
+#define PVR_SRV_BRIDGE_MM_HEAPCFGHEAPDETAILS 32UL
+
+#define PVR_SRV_BRIDGE_DMABUF 11UL
+
+#define PVR_SRV_BRIDGE_DMABUF_PHYSMEMIMPORTDMABUF 0UL
+#define PVR_SRV_BRIDGE_DMABUF_PHYSMEMEXPORTDMABUF 1UL
+
+#define PVR_SRV_BRIDGE_RGXCMP 129UL
+
+#define PVR_SRV_BRIDGE_RGXCMP_RGXCREATECOMPUTECONTEXT 0UL
+#define PVR_SRV_BRIDGE_RGXCMP_RGXDESTROYCOMPUTECONTEXT 1UL
+#define PVR_SRV_BRIDGE_RGXCMP_RGXKICKCDM2 5UL
+
+#define PVR_SRV_BRIDGE_RGXTA3D 130UL
+
+#define PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEHWRTDATASET 0UL
+#define PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYHWRTDATASET 1UL
+#define PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEFREELIST 6UL
+#define PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYFREELIST 7UL
+#define PVR_SRV_BRIDGE_RGXTA3D_RGXCREATERENDERCONTEXT 8UL
+#define PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYRENDERCONTEXT 9UL
+#define PVR_SRV_BRIDGE_RGXTA3D_RGXKICKTA3D2 12UL
+
+/******************************************************************************
+ DRM Services specific defines
+ ******************************************************************************/
+/* DRM command numbers, relative to DRM_COMMAND_BASE.
+ * These defines must be prefixed with "DRM_".
+ */
+#define DRM_SRVKM_CMD 0U /* Used for Services ioctls */
+
+/* These defines must be prefixed with "DRM_IOCTL_". */
+#define DRM_IOCTL_SRVKM_CMD \
+ DRM_IOWR(DRM_COMMAND_BASE + DRM_SRVKM_CMD, struct drm_srvkm_cmd)
+
+/******************************************************************************
+ Misc defines
+ ******************************************************************************/
+
+#define SUPPORT_RGX_SET_OFFSET BITFIELD_BIT(4U)
+#define DEBUG_SET_OFFSET BITFIELD_BIT(10U)
+#define SUPPORT_BUFFER_SYNC_SET_OFFSET BITFIELD_BIT(11U)
+#define OPTIONS_BIT31 BITFIELD_BIT(31U)
+
+#define RGX_BUILD_OPTIONS \
+ (SUPPORT_RGX_SET_OFFSET | DEBUG_SET_OFFSET | \
+ SUPPORT_BUFFER_SYNC_SET_OFFSET | OPTIONS_BIT31)
+
+#define PVR_SRV_VERSION_MAJ 1U
+#define PVR_SRV_VERSION_MIN 14U
+
+#define PVR_SRV_VERSION \
+ (((uint32_t)((uint32_t)(PVR_SRV_VERSION_MAJ)&0xFFFFU) << 16U) | \
+ (((PVR_SRV_VERSION_MIN)&0xFFFFU) << 0U))
+
+#define PVR_SRV_VERSION_BUILD 5843584
+
+/*! This flags gets set if the client is 64 Bit compatible. */
+#define PVR_SRV_FLAGS_CLIENT_64BIT_COMPAT BITFIELD_BIT(5U)
+
+#define DEVMEM_ANNOTATION_MAX_LEN 64U
+
+#define PVR_SRV_SYNC_MAX 12U
+
+#define PVR_BUFFER_FLAG_READ BITFIELD_BIT(0U)
+#define PVR_BUFFER_FLAG_WRITE BITFIELD_BIT(1U)
+
+/******************************************************************************
+ Services Boolean
+ ******************************************************************************/
+
+enum pvr_srv_bool {
+ PVR_SRV_FALSE = 0,
+ PVR_SRV_TRUE = 1,
+ PVR_SRV_FORCE_ALIGN = 0x7fffffff
+};
+
+/******************************************************************************
+ Service Error codes
+ ******************************************************************************/
+
+enum pvr_srv_error {
+ PVR_SRV_OK,
+ PVR_SRV_ERROR_RETRY = 25,
+ PVR_SRV_ERROR_BRIDGE_CALL_FAILED = 37,
+ PVR_SRV_ERROR_FORCE_I32 = 0x7fffffff
+};
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_SRVCORE_CONNECT structs
+ ******************************************************************************/
+
+struct pvr_srv_bridge_connect_cmd {
+ uint32_t build_options;
+ uint32_t DDK_build;
+ uint32_t DDK_version;
+ uint32_t flags;
+} PACKED;
+
+struct pvr_srv_bridge_connect_ret {
+ uint64_t bvnc;
+ enum pvr_srv_error error;
+ uint32_t capability_flags;
+ uint8_t kernel_arch;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_SRVCORE_DISCONNECT struct
+ ******************************************************************************/
+
+struct pvr_srv_bridge_disconnect_ret {
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_SYNC_ALLOCSYNCPRIMITIVEBLOCK struct
+ ******************************************************************************/
+
+struct pvr_srv_bridge_alloc_sync_primitive_block_ret {
+ void *handle;
+ void *pmr;
+ enum pvr_srv_error error;
+ uint32_t size;
+ uint32_t addr;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_SYNC_FREESYNCPRIMITIVEBLOCK structs
+ ******************************************************************************/
+
+struct pvr_srv_bridge_free_sync_primitive_block_cmd {
+ void *handle;
+} PACKED;
+
+struct pvr_srv_bridge_free_sync_primitive_block_ret {
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_MM_DEVMEMINTCTXCREATE structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_ctx_create_cmd {
+ uint32_t kernel_memory_ctx;
+} PACKED;
+
+struct pvr_srv_devmem_int_ctx_create_ret {
+ void *server_memctx;
+ void *server_memctx_data;
+ enum pvr_srv_error error;
+ uint32_t cpu_cache_line_size;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_MM_DEVMEMINTCTXDESTROY structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_ctx_destroy_cmd {
+ void *server_memctx;
+} PACKED;
+
+struct pvr_srv_devmem_int_ctx_destroy_ret {
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_MM_HEAPCFGHEAPCOUNT structs
+ ******************************************************************************/
+
+struct pvr_srv_heap_count_cmd {
+ uint32_t heap_config_index;
+} PACKED;
+
+struct pvr_srv_heap_count_ret {
+ enum pvr_srv_error error;
+ uint32_t heap_count;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_MM_HEAPCFGHEAPDETAILS structs
+ ******************************************************************************/
+
+struct pvr_srv_heap_cfg_details_cmd {
+ char *buffer;
+ uint32_t heap_config_index;
+ uint32_t heap_index;
+ uint32_t buffer_size;
+} PACKED;
+
+struct pvr_srv_heap_cfg_details_ret {
+ pvr_dev_addr_t base_addr;
+ uint64_t size;
+ uint64_t reserved_size;
+ char *buffer;
+ enum pvr_srv_error error;
+ uint32_t log2_page_size;
+ uint32_t log2_alignment;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPCREATE structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_heap_create_cmd {
+ pvr_dev_addr_t base_addr;
+ uint64_t size;
+ void *server_memctx;
+ uint32_t log2_page_size;
+} PACKED;
+
+struct pvr_srv_devmem_int_heap_create_ret {
+ void *server_heap;
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPDESTROY structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_heap_destroy_cmd {
+ void *server_heap;
+} PACKED;
+
+struct pvr_srv_devmem_int_heap_destroy_ret {
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_MM_DEVMEMINTRESERVERANGE structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_reserve_range_cmd {
+ pvr_dev_addr_t addr;
+ uint64_t size;
+ void *server_heap;
+} PACKED;
+
+struct pvr_srv_devmem_int_reserve_range_ret {
+ void *reservation;
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_MM_DEVMEMINTUNRESERVERANGE structs
+ ******************************************************************************/
+
+struct pvr_srv_bridge_in_devmem_int_unreserve_range_cmd {
+ void *reservation;
+} PACKED;
+
+struct pvr_srv_bridge_in_devmem_int_unreserve_range_ret {
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_MM_PHYSMEMNEWRAMBACKEDLOCKEDPMR structs
+ ******************************************************************************/
+
+struct pvr_srv_physmem_new_ram_backed_locked_pmr_cmd {
+ uint64_t block_size;
+ uint64_t size;
+ uint32_t *mapping_table;
+ const char *annotation;
+ uint32_t annotation_size;
+ uint32_t log2_page_size;
+ uint32_t phy_blocks;
+ uint32_t virt_blocks;
+ uint32_t pdump_flags;
+ uint32_t pid;
+ uint64_t flags;
+} PACKED;
+
+struct pvr_srv_physmem_new_ram_backed_locked_pmr_ret {
+ void *pmr;
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_MM_PMRUNREFUNLOCKPMR structs
+ ******************************************************************************/
+
+struct pvr_srv_pmr_unref_unlock_pmr_cmd {
+ void *pmr;
+} PACKED;
+
+struct pvr_srv_pmr_unref_unlock_pmr_ret {
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPAGES structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_map_pages_cmd {
+ pvr_dev_addr_t addr;
+ void *pmr;
+ void *reservation;
+ uint32_t page_count;
+ uint32_t page_offset;
+ uint64_t flags;
+} PACKED;
+
+struct pvr_srv_devmem_int_map_pages_ret {
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPAGES structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_unmap_pages_cmd {
+ pvr_dev_addr_t dev_addr;
+ void *reservation;
+ uint32_t page_count;
+} PACKED;
+
+struct pvr_srv_devmem_int_unmap_pages_ret {
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPMR structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_map_pmr_cmd {
+ void *server_heap;
+ void *pmr;
+ void *reservation;
+ uint64_t flags;
+} PACKED;
+
+struct pvr_srv_devmem_int_map_pmr_ret {
+ void *mapping;
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPMR structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_unmap_pmr_cmd {
+ void *mapping;
+} PACKED;
+
+struct pvr_srv_devmem_int_unmap_pmr_ret {
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_DMABUF_PHYSMEMIMPORTDMABUF structs
+ ******************************************************************************/
+
+struct pvr_srv_phys_mem_import_dmabuf_cmd {
+ const char *name;
+ int buffer_fd;
+ uint32_t name_size;
+ uint64_t flags;
+} PACKED;
+
+struct pvr_srv_phys_mem_import_dmabuf_ret {
+ uint64_t align;
+ uint64_t size;
+ void *pmr;
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_DMABUF_PHYSMEMEXPORTDMABUF structs
+ ******************************************************************************/
+
+struct pvr_srv_phys_mem_export_dmabuf_cmd {
+ void *pmr;
+} PACKED;
+
+struct pvr_srv_phys_mem_export_dmabuf_ret {
+ enum pvr_srv_error error;
+ int fd;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_RGXCMP_RGXCREATECOMPUTECONTEXT structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_create_compute_context_cmd {
+ uint64_t robustness_address;
+ void *priv_data;
+ uint8_t *reset_framework_cmd;
+ uint8_t *static_compute_context_state;
+ uint32_t context_flags;
+ uint32_t reset_framework_cmd_size;
+ uint32_t max_deadline_ms;
+ uint32_t packed_ccb_size;
+ /* RGX_CONTEXT_PRIORITY_... flags. */
+ uint32_t priority;
+ uint32_t static_compute_context_state_size;
+} PACKED;
+
+struct pvr_srv_rgx_create_compute_context_ret {
+ void *compute_context;
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_RGXCMP_RGXDESTROYCOMPUTECONTEXT structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_destroy_compute_context_cmd {
+ void *compute_context;
+} PACKED;
+
+struct pvr_srv_rgx_destroy_compute_context_ret {
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_RGXCMP_RGXKICKCDM2 structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_kick_cdm2_cmd {
+ uint64_t max_deadline_us;
+ void *compute_context;
+ uint32_t *client_update_offset;
+ uint32_t *client_update_value;
+ uint8_t *cdm_cmd;
+ char *update_fence_name;
+ void **client_update_ufo_sync_prim_block;
+ int32_t check_fence;
+ int32_t update_timeline;
+ uint32_t client_cache_op_seq_num;
+ uint32_t client_update_count;
+ uint32_t cmd_size;
+ uint32_t ext_job_ref;
+ uint32_t num_work_groups;
+ uint32_t num_work_items;
+ uint32_t pdump_flags;
+} PACKED;
+
+struct pvr_srv_rgx_kick_cdm2_ret {
+ enum pvr_srv_error error;
+ int32_t update_fence;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEHWRTDATASET structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_create_hwrt_dataset_cmd {
+ pvr_dev_addr_t pm_mlist_dev_addr0;
+ pvr_dev_addr_t pm_mlist_dev_addr1;
+ pvr_dev_addr_t tail_ptrs_dev_addr;
+ pvr_dev_addr_t macrotile_array_dev_addr0;
+ pvr_dev_addr_t macrotile_array_dev_addr1;
+ pvr_dev_addr_t rtc_dev_addr;
+ pvr_dev_addr_t rgn_header_dev_addr0;
+ pvr_dev_addr_t rgn_header_dev_addr1;
+ pvr_dev_addr_t vheap_table_dev_add;
+ uint64_t flipped_multi_sample_ctl;
+ uint64_t multi_sample_ctl;
+ uint64_t rgn_header_size;
+ void **free_lists;
+ uint32_t mtile_stride;
+ uint32_t ppp_screen;
+ uint32_t te_aa;
+ uint32_t te_mtile1;
+ uint32_t te_mtile2;
+ uint32_t te_screen;
+ uint32_t tpc_size;
+ uint32_t tpc_stride;
+ uint32_t isp_merge_lower_x;
+ uint32_t isp_merge_lower_y;
+ uint32_t isp_merge_scale_x;
+ uint32_t isp_merge_scale_y;
+ uint32_t isp_merge_upper_x;
+ uint32_t isp_merge_upper_y;
+ uint32_t isp_mtile_size;
+ uint16_t max_rts;
+} PACKED;
+
+struct pvr_srv_rgx_create_hwrt_dataset_ret {
+ void *hwrt_dataset0;
+ void *hwrt_dataset1;
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYHWRTDATASET structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_destroy_hwrt_dataset_cmd {
+ void *hwrt_dataset;
+} PACKED;
+
+struct pvr_srv_rgx_destroy_hwrt_dataset_ret {
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEFREELIST structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_create_free_list_cmd {
+ pvr_dev_addr_t free_list_dev_addr;
+ uint64_t pmr_offset;
+ void *mem_ctx_priv_data;
+ void *free_list_pmr;
+ void *global_free_list;
+ enum pvr_srv_bool free_list_check;
+ uint32_t grow_free_list_pages;
+ uint32_t grow_param_threshold;
+ uint32_t init_free_list_pages;
+ uint32_t max_free_list_pages;
+} PACKED;
+
+struct pvr_srv_rgx_create_free_list_ret {
+ void *cleanup_cookie;
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYFREELIST structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_destroy_free_list_cmd {
+ void *cleanup_cookie;
+} PACKED;
+
+struct pvr_srv_rgx_destroy_free_list_ret {
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_RGXTA3D_RGXCREATERENDERCONTEXT structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_create_render_context_cmd {
+ pvr_dev_addr_t vdm_callstack_addr;
+ uint64_t robustness_address;
+ void *priv_data;
+ uint8_t *reset_framework_cmd;
+ uint8_t *static_render_context_state;
+#define RGX_CONTEXT_FLAG_DISABLESLR BITFIELD_BIT(0U)
+ uint32_t context_flags;
+ uint32_t reset_framework_cmd_size;
+ uint32_t max_3d_deadline_ms;
+ uint32_t max_ta_deadline_ms;
+ uint32_t packed_ccb_size;
+#define RGX_CONTEXT_PRIORITY_REALTIME UINT32_MAX
+#define RGX_CONTEXT_PRIORITY_HIGH 2U
+#define RGX_CONTEXT_PRIORITY_MEDIUM 1U
+#define RGX_CONTEXT_PRIORITY_LOW 0U
+ uint32_t priority;
+ uint32_t static_render_context_state_size;
+} PACKED;
+
+struct pvr_srv_rgx_create_render_context_ret {
+ void *render_context;
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYRENDERCONTEXT structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_destroy_render_context_cmd {
+ void *render_context;
+} PACKED;
+
+struct pvr_srv_rgx_destroy_render_context_ret {
+ enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+ PVR_SRV_BRIDGE_RGXTA3D_RGXKICKTA3D2 structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_kick_ta3d2_cmd {
+ uint64_t deadline;
+ void *hw_rt_dataset;
+ void *msaa_scratch_buffer;
+ void *pr_fence_ufo_sync_prim_block;
+ void *render_ctx;
+ void *zs_buffer;
+ uint32_t *client_3d_update_sync_offset;
+ uint32_t *client_3d_update_value;
+ uint32_t *client_ta_fence_sync_offset;
+ uint32_t *client_ta_fence_value;
+ uint32_t *client_ta_update_sync_offset;
+ uint32_t *client_ta_update_value;
+ uint32_t *sync_pmr_flags;
+ uint8_t *cmd_3d;
+ uint8_t *cmd_3d_pr;
+ uint8_t *cmd_ta;
+ char *update_fence_name;
+ char *update_fence_name_3d;
+ void **client_3d_update_sync_prim_block;
+ void **client_ta_fence_sync_prim_block;
+ void **client_ta_update_sync_prim_block;
+ void **sync_pmrs;
+ enum pvr_srv_bool abort;
+ enum pvr_srv_bool kick_3d;
+ enum pvr_srv_bool kick_pr;
+ enum pvr_srv_bool kick_ta;
+ int32_t check_fence;
+ int32_t check_fence_3d;
+ int32_t update_timeline;
+ int32_t update_timeline_3d;
+ uint32_t cmd_3d_size;
+ uint32_t cmd_3d_pr_size;
+ uint32_t client_3d_update_count;
+ uint32_t client_cache_op_seq_num;
+ uint32_t client_ta_fence_count;
+ uint32_t client_ta_update_count;
+ uint32_t ext_job_ref;
+ uint32_t client_pr_fence_ufo_sync_offset;
+ uint32_t client_pr_fence_value;
+ uint32_t num_draw_calls;
+ uint32_t num_indices;
+ uint32_t num_mrts;
+ uint32_t pdump_flags;
+ uint32_t render_target_size;
+ uint32_t sync_pmr_count;
+ uint32_t cmd_ta_size;
+} PACKED;
+
+struct pvr_srv_rgx_kick_ta3d2_ret {
+ enum pvr_srv_error error;
+ int32_t update_fence;
+ int32_t update_fence_3d;
+} PACKED;
+
+/******************************************************************************
+ Ioctl structure to pass cmd and ret structures
+ ******************************************************************************/
+
+struct drm_srvkm_cmd {
+ uint32_t bridge_id;
+ uint32_t bridge_func_id;
+ uint64_t in_data_ptr;
+ uint64_t out_data_ptr;
+ uint32_t in_data_size;
+ uint32_t out_data_size;
+};
+
+/******************************************************************************
+ Bridge function prototype
+ ******************************************************************************/
+
+VkResult pvr_srv_connection_create(int fd, uint64_t *const bvnc_out);
+void pvr_srv_connection_destroy(int fd);
+
+VkResult pvr_srv_alloc_sync_primitive_block(int fd,
+ void **const handle_out,
+ void **const pmr_out,
+ uint32_t *const size_out,
+ uint32_t *const addr_out);
+void pvr_srv_free_sync_primitive_block(int fd, void *handle);
+
+VkResult pvr_srv_get_heap_count(int fd, uint32_t *const heap_count_out);
+VkResult pvr_srv_get_heap_details(int fd,
+ uint32_t heap_index,
+ uint32_t buffer_size,
+ char *const buffer_out,
+ pvr_dev_addr_t *const base_address_out,
+ uint64_t *const size_out,
+ uint64_t *const reserved_size_out,
+ uint32_t *const log2_page_size_out);
+
+VkResult pvr_srv_int_heap_create(int fd,
+ pvr_dev_addr_t base_address,
+ uint64_t size,
+ uint32_t log2_page_size,
+ void *server_memctx,
+ void **const server_heap_out);
+void pvr_srv_int_heap_destroy(int fd, void *server_heap);
+
+VkResult pvr_srv_int_ctx_create(int fd,
+ void **const server_memctx_out,
+ void **const server_memctx_data_out);
+void pvr_srv_int_ctx_destroy(int fd, void *server_memctx);
+
+VkResult pvr_srv_int_reserve_addr(int fd,
+ void *server_heap,
+ pvr_dev_addr_t addr,
+ uint64_t size,
+ void **const reservation_out);
+void pvr_srv_int_unreserve_addr(int fd, void *reservation);
+
+VkResult pvr_srv_alloc_pmr(int fd,
+ uint64_t size,
+ uint64_t block_size,
+ uint32_t phy_blocks,
+ uint32_t virt_blocks,
+ uint32_t log2_page_size,
+ uint64_t flags,
+ uint32_t pid,
+ void **const pmr_out);
+void pvr_srv_free_pmr(int fd, void *pmr);
+
+VkResult pvr_srv_int_map_pages(int fd,
+ void *reservation,
+ void *pmr,
+ uint32_t page_count,
+ uint32_t page_offset,
+ uint64_t flags,
+ pvr_dev_addr_t addr);
+void pvr_srv_int_unmap_pages(int fd,
+ void *reservation,
+ pvr_dev_addr_t dev_addr,
+ uint32_t page_count);
+
+VkResult pvr_srv_int_map_pmr(int fd,
+ void *server_heap,
+ void *reservation,
+ void *pmr,
+ uint64_t flags,
+ void **const mapping_out);
+void pvr_srv_int_unmap_pmr(int fd, void *mapping);
+
+VkResult pvr_srv_physmem_import_dmabuf(int fd,
+ int buffer_fd,
+ uint64_t flags,
+ void **const pmr_out,
+ uint64_t *const size_out,
+ uint64_t *const align_out);
+VkResult pvr_srv_physmem_export_dmabuf(int fd, void *pmr, int *const fd_out);
+
+VkResult
+pvr_srv_rgx_create_compute_context(int fd,
+ uint32_t priority,
+ uint32_t reset_framework_cmd_size,
+ uint8_t *reset_framework_cmd,
+ void *priv_data,
+ uint32_t static_compute_context_state_size,
+ uint8_t *static_compute_context_state,
+ uint32_t packed_ccb_size,
+ uint32_t context_flags,
+ uint64_t robustness_address,
+ uint32_t max_deadline_ms,
+ void **const compute_context_out);
+void pvr_srv_rgx_destroy_compute_context(int fd, void *compute_context);
+
+VkResult pvr_srv_rgx_kick_compute2(int fd,
+ void *compute_context,
+ uint32_t client_cache_op_seq_num,
+ uint32_t client_update_count,
+ void **client_update_ufo_sync_prim_block,
+ uint32_t *client_update_offset,
+ uint32_t *client_update_value,
+ int32_t check_fence,
+ int32_t update_timeline,
+ uint32_t cmd_size,
+ uint8_t *cdm_cmd,
+ uint32_t ext_job_ref,
+ uint32_t num_work_groups,
+ uint32_t num_work_items,
+ uint32_t pdump_flags,
+ uint64_t max_deadline_us,
+ char *update_fence_name,
+ int32_t *const update_fence_out);
+
+VkResult
+pvr_srv_rgx_create_hwrt_dataset(int fd,
+ pvr_dev_addr_t pm_mlist_dev_addr0,
+ pvr_dev_addr_t pm_mlist_dev_addr1,
+ pvr_dev_addr_t tail_ptrs_dev_addr,
+ pvr_dev_addr_t macrotile_array_dev_addr0,
+ pvr_dev_addr_t macrotile_array_dev_addr1,
+ pvr_dev_addr_t rtc_dev_addr,
+ pvr_dev_addr_t rgn_header_dev_addr0,
+ pvr_dev_addr_t rgn_header_dev_addr1,
+ pvr_dev_addr_t vheap_table_dev_add,
+ uint64_t flipped_multi_sample_ctl,
+ uint64_t multi_sample_ctl,
+ uint64_t rgn_header_size,
+ void **free_lists,
+ uint32_t mtile_stride,
+ uint32_t ppp_screen,
+ uint32_t te_aa,
+ uint32_t te_mtile1,
+ uint32_t te_mtile2,
+ uint32_t te_screen,
+ uint32_t tpc_size,
+ uint32_t tpc_stride,
+ uint32_t isp_merge_lower_x,
+ uint32_t isp_merge_lower_y,
+ uint32_t isp_merge_scale_x,
+ uint32_t isp_merge_scale_y,
+ uint32_t isp_merge_upper_x,
+ uint32_t isp_merge_upper_y,
+ uint32_t isp_mtile_size,
+ uint16_t max_rts,
+ void **const hwrt_dataset0_out,
+ void **const hwrt_dataset1_out);
+
+void pvr_srv_rgx_destroy_hwrt_dataset(int fd, void *hwrt_dataset);
+
+VkResult pvr_srv_rgx_create_free_list(int fd,
+ void *mem_ctx_priv_data,
+ uint32_t max_free_list_pages,
+ uint32_t init_free_list_pages,
+ uint32_t grow_free_list_pages,
+ uint32_t grow_param_threshold,
+ void *global_free_list,
+ enum pvr_srv_bool free_list_check,
+ pvr_dev_addr_t free_list_dev_addr,
+ void *free_list_pmr,
+ uint64_t pmr_offset,
+ void **const cleanup_cookie_out);
+
+void pvr_srv_rgx_destroy_free_list(int fd, void *cleanup_cookie);
+
+VkResult
+pvr_srv_rgx_create_render_context(int fd,
+ uint32_t priority,
+ pvr_dev_addr_t vdm_callstack_addr,
+ uint32_t reset_framework_cmd_size,
+ uint8_t *reset_framework_cmd,
+ void *priv_data,
+ uint32_t static_render_context_state_size,
+ uint8_t *static_render_context_state,
+ uint32_t packed_ccb_size,
+ uint32_t context_flags,
+ uint64_t robustness_address,
+ uint32_t max_geom_deadline_ms,
+ uint32_t max_frag_deadline_ms,
+ void **const render_context_out);
+
+void pvr_srv_rgx_destroy_render_context(int fd, void *render_context);
+
+VkResult pvr_srv_rgx_kick_render2(int fd,
+ void *render_ctx,
+ uint32_t client_cache_op_seq_num,
+ uint32_t client_geom_fence_count,
+ void **client_geom_fence_sync_prim_block,
+ uint32_t *client_geom_fence_sync_offset,
+ uint32_t *client_geom_fence_value,
+ uint32_t client_geom_update_count,
+ void **client_geom_update_sync_prim_block,
+ uint32_t *client_geom_update_sync_offset,
+ uint32_t *client_geom_update_value,
+ uint32_t client_frag_update_count,
+ void **client_frag_update_sync_prim_block,
+ uint32_t *client_frag_update_sync_offset,
+ uint32_t *client_frag_update_value,
+ void *client_pr_fence_ufo_sync_prim_block,
+ uint32_t client_pr_fence_ufo_sync_offset,
+ uint32_t client_pr_fence_value,
+ int32_t check_fence,
+ int32_t update_timeline,
+ int32_t *const update_fence_out,
+ char *update_fence_name,
+ int32_t check_fence_frag,
+ int32_t update_timeline_frag,
+ int32_t *const update_fence_frag_out,
+ char *update_fence_name_frag,
+ uint32_t cmd_geom_size,
+ uint8_t *cmd_geom,
+ uint32_t cmd_frag_pr_size,
+ uint8_t *cmd_frag_pr,
+ uint32_t cmd_frag_size,
+ uint8_t *cmd_frag,
+ uint32_t ext_job_ref,
+ bool kick_geom,
+ bool kick_pr,
+ bool kick_frag,
+ bool abort,
+ uint32_t pdump_flags,
+ void *hw_rt_dataset,
+ void *zs_buffer,
+ void *msaa_scratch_buffer,
+ uint32_t sync_pmr_count,
+ uint32_t *sync_pmr_flags,
+ void **sync_pmrs,
+ uint32_t render_target_size,
+ uint32_t num_draw_calls,
+ uint32_t num_indices,
+ uint32_t num_mrts,
+ uint64_t deadline);
+
+#endif /* PVR_SRV_BRIDGE_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_JOB_COMMON_H
+#define PVR_SRV_JOB_COMMON_H
+
+#include <stdint.h>
+
+#include "pvr_srv_bridge.h"
+#include "pvr_winsys.h"
+#include "util/macros.h"
+
+#define PVR_SRV_SYNC_DEV_PATH "/dev/pvr_sync"
+
+static inline uint32_t
+pvr_srv_from_winsys_priority(enum pvr_winsys_ctx_priority priority)
+{
+ switch (priority) {
+ case PVR_WINSYS_CTX_PRIORITY_HIGH:
+ return RGX_CONTEXT_PRIORITY_HIGH;
+ case PVR_WINSYS_CTX_PRIORITY_MEDIUM:
+ return RGX_CONTEXT_PRIORITY_MEDIUM;
+ case PVR_WINSYS_CTX_PRIORITY_LOW:
+ return RGX_CONTEXT_PRIORITY_LOW;
+ default:
+ unreachable("Invalid winsys context priority.");
+ }
+}
+
+#endif /* PVR_SRV_JOB_COMMON_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <vulkan/vulkan.h>
+
+#include "fw-api/pvr_rogue_fwif.h"
+#include "fw-api/pvr_rogue_fwif_rf.h"
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_srv_bridge.h"
+#include "pvr_srv_job_compute.h"
+#include "pvr_srv_job_common.h"
+#include "pvr_srv_syncobj.h"
+#include "pvr_winsys.h"
+#include "util/macros.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+
+struct pvr_srv_winsys_compute_ctx {
+ struct pvr_winsys_compute_ctx base;
+
+ void *handle;
+
+ int timeline;
+};
+
+#define to_pvr_srv_winsys_compute_ctx(ctx) \
+ container_of(ctx, struct pvr_srv_winsys_compute_ctx, base)
+
+VkResult pvr_srv_winsys_compute_ctx_create(
+ struct pvr_winsys *ws,
+ const struct pvr_winsys_compute_ctx_create_info *create_info,
+ struct pvr_winsys_compute_ctx **const ctx_out)
+{
+ struct rogue_fwif_static_computecontext_state static_state = {
+ .ctx_switch_regs = {
+ .cdm_context_state_base_addr =
+ create_info->static_state.cdm_ctx_state_base_addr,
+
+ .cdm_context_pds0 = create_info->static_state.cdm_ctx_store_pds0,
+ .cdm_context_pds0_b =
+ create_info->static_state.cdm_ctx_store_pds0_b,
+ .cdm_context_pds1 = create_info->static_state.cdm_ctx_store_pds1,
+
+ .cdm_terminate_pds = create_info->static_state.cdm_ctx_terminate_pds,
+ .cdm_terminate_pds1 =
+ create_info->static_state.cdm_ctx_terminate_pds1,
+
+ .cdm_resume_pds0 = create_info->static_state.cdm_ctx_resume_pds0,
+ .cdm_resume_pds0_b = create_info->static_state.cdm_ctx_resume_pds0_b,
+ },
+ };
+
+ struct rogue_fwif_rf_cmd reset_cmd = {
+ .flags = 0U,
+ };
+
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+ struct pvr_srv_winsys_compute_ctx *srv_ctx;
+ VkResult result;
+
+ srv_ctx = vk_alloc(srv_ws->alloc,
+ sizeof(*srv_ctx),
+ 8U,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!srv_ctx)
+ return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ srv_ctx->timeline = open(PVR_SRV_SYNC_DEV_PATH, O_CLOEXEC | O_RDWR);
+ if (srv_ctx->timeline < 0)
+ goto err_free_srv_ctx;
+
+ result = pvr_srv_rgx_create_compute_context(
+ srv_ws->render_fd,
+ pvr_srv_from_winsys_priority(create_info->priority),
+ sizeof(reset_cmd) - sizeof(reset_cmd.regs),
+ (uint8_t *)&reset_cmd,
+ srv_ws->server_memctx_data,
+ sizeof(static_state),
+ (uint8_t *)&static_state,
+ 0U,
+ RGX_CONTEXT_FLAG_DISABLESLR,
+ 0U,
+ UINT_MAX,
+ &srv_ctx->handle);
+ if (result != VK_SUCCESS)
+ goto err_close_timeline;
+
+ srv_ctx->base.ws = ws;
+
+ *ctx_out = &srv_ctx->base;
+
+ return VK_SUCCESS;
+
+err_close_timeline:
+ close(srv_ctx->timeline);
+
+err_free_srv_ctx:
+ vk_free(srv_ws->alloc, srv_ctx);
+
+ return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED);
+}
+
+void pvr_srv_winsys_compute_ctx_destroy(struct pvr_winsys_compute_ctx *ctx)
+{
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ctx->ws);
+ struct pvr_srv_winsys_compute_ctx *srv_ctx =
+ to_pvr_srv_winsys_compute_ctx(ctx);
+
+ pvr_srv_rgx_destroy_compute_context(srv_ws->render_fd, srv_ctx->handle);
+ close(srv_ctx->timeline);
+ vk_free(srv_ws->alloc, srv_ctx);
+}
+
+static void pvr_srv_compute_cmd_init(
+ const struct pvr_winsys_compute_submit_info *submit_info,
+ struct rogue_fwif_cmd_compute *cmd)
+{
+ struct rogue_fwif_cdm_regs *fw_regs = &cmd->regs;
+
+ memset(cmd, 0, sizeof(*cmd));
+
+ cmd->cmn.frame_num = submit_info->frame_num;
+
+ fw_regs->tpu_border_colour_table = submit_info->regs.tpu_border_colour_table;
+ fw_regs->cdm_item = submit_info->regs.cdm_item;
+ fw_regs->compute_cluster = submit_info->regs.compute_cluster;
+ fw_regs->cdm_ctrl_stream_base = submit_info->regs.cdm_ctrl_stream_base;
+ fw_regs->tpu = submit_info->regs.tpu;
+ fw_regs->cdm_resume_pds1 = submit_info->regs.cdm_resume_pds1;
+
+ if (submit_info->flags & PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP)
+ cmd->flags |= ROGUE_FWIF_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
+
+ if (submit_info->flags & PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE)
+ cmd->flags |= ROGUE_FWIF_COMPUTE_FLAG_SINGLE_CORE;
+}
+
+VkResult pvr_srv_winsys_compute_submit(
+ const struct pvr_winsys_compute_ctx *ctx,
+ const struct pvr_winsys_compute_submit_info *submit_info,
+ struct pvr_winsys_syncobj **const syncobj_out)
+{
+ const struct pvr_srv_winsys_compute_ctx *srv_ctx =
+ to_pvr_srv_winsys_compute_ctx(ctx);
+ const struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ctx->ws);
+
+ struct pvr_winsys_syncobj *signal_syncobj = NULL;
+ struct pvr_winsys_syncobj *wait_syncobj = NULL;
+ struct pvr_srv_winsys_syncobj *srv_syncobj;
+
+ struct rogue_fwif_cmd_compute compute_cmd;
+ VkResult result;
+ int fence;
+
+ pvr_srv_compute_cmd_init(submit_info, &compute_cmd);
+
+ for (uint32_t i = 0U; i < submit_info->semaphore_count; i++) {
+ PVR_FROM_HANDLE(pvr_semaphore, sem, submit_info->semaphores[i]);
+
+ if (!sem->syncobj)
+ continue;
+
+ if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_COMPUTE_BIT) {
+ result = pvr_srv_winsys_syncobjs_merge(sem->syncobj,
+ wait_syncobj,
+ &wait_syncobj);
+ if (result != VK_SUCCESS)
+ goto err_destroy_wait_syncobj;
+
+ submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_COMPUTE_BIT;
+ }
+
+ if (submit_info->stage_flags[i] == 0U) {
+ pvr_srv_winsys_syncobj_destroy(sem->syncobj);
+ sem->syncobj = NULL;
+ }
+ }
+
+ srv_syncobj = to_pvr_srv_winsys_syncobj(wait_syncobj);
+
+ do {
+ result = pvr_srv_rgx_kick_compute2(srv_ws->render_fd,
+ srv_ctx->handle,
+ /* No support cache operations. */
+ 0U,
+ 0U,
+ NULL,
+ NULL,
+ NULL,
+ wait_syncobj ? srv_syncobj->fd : -1,
+ srv_ctx->timeline,
+ sizeof(compute_cmd),
+ (uint8_t *)&compute_cmd,
+ submit_info->job_num,
+ 0U,
+ 0U,
+ 0U,
+ 0U,
+ "COMPUTE",
+ &fence);
+ } while (result == VK_NOT_READY);
+
+ if (result != VK_SUCCESS)
+ goto err_destroy_wait_syncobj;
+
+ /* Given job submission succeeded, we don't need to close wait fence and it
+ * should be consumed by the compute job itself.
+ */
+ if (wait_syncobj)
+ srv_syncobj->fd = -1;
+
+ if (fence != -1) {
+ result = pvr_srv_winsys_syncobj_create(ctx->ws, false, &signal_syncobj);
+ if (result != VK_SUCCESS)
+ goto err_destroy_wait_syncobj;
+
+ pvr_srv_set_syncobj_payload(signal_syncobj, fence);
+ }
+
+ *syncobj_out = signal_syncobj;
+
+err_destroy_wait_syncobj:
+ if (wait_syncobj)
+ pvr_srv_winsys_syncobj_destroy(wait_syncobj);
+
+ return result;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_JOB_COMPUTE_H
+#define PVR_SRV_JOB_COMPUTE_H
+
+#include <vulkan/vulkan.h>
+
+struct pvr_winsys;
+struct pvr_winsys_compute_ctx;
+struct pvr_winsys_compute_ctx_create_info;
+struct pvr_winsys_compute_submit_info;
+struct pvr_winsys_syncobj;
+
+/*******************************************
+ Function prototypes
+ *******************************************/
+
+VkResult pvr_srv_winsys_compute_ctx_create(
+ struct pvr_winsys *ws,
+ const struct pvr_winsys_compute_ctx_create_info *create_info,
+ struct pvr_winsys_compute_ctx **const ctx_out);
+void pvr_srv_winsys_compute_ctx_destroy(struct pvr_winsys_compute_ctx *ctx);
+
+VkResult pvr_srv_winsys_compute_submit(
+ const struct pvr_winsys_compute_ctx *ctx,
+ const struct pvr_winsys_compute_submit_info *submit_info,
+ struct pvr_winsys_syncobj **const syncobj_out);
+
+#endif /* PVR_SRV_JOB_COMPUTE_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <vulkan/vulkan.h>
+
+#include "fw-api/pvr_rogue_fwif.h"
+#include "fw-api/pvr_rogue_fwif_rf.h"
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_srv_bo.h"
+#include "pvr_srv_bridge.h"
+#include "pvr_srv_job_common.h"
+#include "pvr_srv_job_render.h"
+#include "pvr_srv_syncobj.h"
+#include "pvr_winsys.h"
+#include "util/log.h"
+#include "util/macros.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+#include "vk_util.h"
+
+struct pvr_srv_winsys_free_list {
+ struct pvr_winsys_free_list base;
+
+ void *handle;
+
+ struct pvr_srv_winsys_free_list *parent;
+};
+
+#define to_pvr_srv_winsys_free_list(free_list) \
+ container_of(free_list, struct pvr_srv_winsys_free_list, base)
+
+struct pvr_srv_winsys_rt_dataset {
+ struct pvr_winsys_rt_dataset base;
+
+ struct {
+ void *handle;
+ struct pvr_srv_sync_prim *sync_prim;
+ } rt_datas[ROGUE_FWIF_NUM_RTDATAS];
+};
+
+#define to_pvr_srv_winsys_rt_dataset(rt_dataset) \
+ container_of(rt_dataset, struct pvr_srv_winsys_rt_dataset, base)
+
+struct pvr_srv_winsys_render_ctx {
+ struct pvr_winsys_render_ctx base;
+
+ /* Handle to kernel context. */
+ void *handle;
+
+ int timeline_geom;
+ int timeline_frag;
+};
+
+#define to_pvr_srv_winsys_render_ctx(ctx) \
+ container_of(ctx, struct pvr_srv_winsys_render_ctx, base)
+
+VkResult pvr_srv_winsys_free_list_create(
+ struct pvr_winsys *ws,
+ struct pvr_winsys_vma *free_list_vma,
+ uint32_t initial_num_pages,
+ uint32_t max_num_pages,
+ uint32_t grow_num_pages,
+ uint32_t grow_threshold,
+ struct pvr_winsys_free_list *parent_free_list,
+ struct pvr_winsys_free_list **const free_list_out)
+{
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+ struct pvr_srv_winsys_bo *srv_free_list_bo =
+ to_pvr_srv_winsys_bo(free_list_vma->bo);
+ struct pvr_srv_winsys_free_list *srv_free_list;
+ void *parent_handle;
+ VkResult result;
+
+ srv_free_list = vk_zalloc(srv_ws->alloc,
+ sizeof(*srv_free_list),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!srv_free_list)
+ return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ if (parent_free_list) {
+ srv_free_list->parent = to_pvr_srv_winsys_free_list(parent_free_list);
+ parent_handle = srv_free_list->parent->handle;
+ } else {
+ srv_free_list->parent = NULL;
+ parent_handle = NULL;
+ }
+
+ result = pvr_srv_rgx_create_free_list(srv_ws->render_fd,
+ srv_ws->server_memctx_data,
+ max_num_pages,
+ initial_num_pages,
+ grow_num_pages,
+ grow_threshold,
+ parent_handle,
+#if defined(DEBUG)
+ PVR_SRV_TRUE /* free_list_check */,
+#else
+ PVR_SRV_FALSE /* free_list_check */,
+#endif
+ free_list_vma->dev_addr,
+ srv_free_list_bo->pmr,
+ 0 /* pmr_offset */,
+ &srv_free_list->handle);
+ if (result != VK_SUCCESS)
+ goto err_vk_free_srv_free_list;
+
+ srv_free_list->base.ws = ws;
+
+ *free_list_out = &srv_free_list->base;
+
+ return VK_SUCCESS;
+
+err_vk_free_srv_free_list:
+ vk_free(srv_ws->alloc, srv_free_list);
+
+ return result;
+}
+
+void pvr_srv_winsys_free_list_destroy(struct pvr_winsys_free_list *free_list)
+{
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(free_list->ws);
+ struct pvr_srv_winsys_free_list *srv_free_list =
+ to_pvr_srv_winsys_free_list(free_list);
+
+ pvr_srv_rgx_destroy_free_list(srv_ws->render_fd, srv_free_list->handle);
+ vk_free(srv_ws->alloc, srv_free_list);
+}
+
+VkResult pvr_srv_render_target_dataset_create(
+ struct pvr_winsys *ws,
+ const struct pvr_winsys_rt_dataset_create_info *create_info,
+ struct pvr_winsys_rt_dataset **const rt_dataset_out)
+{
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+ struct pvr_srv_winsys_free_list *srv_local_free_list =
+ to_pvr_srv_winsys_free_list(create_info->local_free_list);
+ void *free_lists[ROGUE_FW_MAX_FREELISTS] = { NULL };
+ struct pvr_srv_winsys_rt_dataset *srv_rt_dataset;
+ VkResult result;
+
+ free_lists[ROGUE_FW_LOCAL_FREELIST] = srv_local_free_list->handle;
+
+ if (srv_local_free_list->parent) {
+ free_lists[ROGUE_FW_GLOBAL_FREELIST] =
+ srv_local_free_list->parent->handle;
+ }
+
+ srv_rt_dataset = vk_zalloc(srv_ws->alloc,
+ sizeof(*srv_rt_dataset),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!srv_rt_dataset)
+ return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ result = pvr_srv_rgx_create_hwrt_dataset(
+ srv_ws->render_fd,
+ create_info->rt_datas[0].pm_mlist_dev_addr,
+ create_info->rt_datas[1].pm_mlist_dev_addr,
+ create_info->tpc_dev_addr,
+ create_info->rt_datas[0].macrotile_array_dev_addr,
+ create_info->rt_datas[1].macrotile_array_dev_addr,
+ create_info->rtc_dev_addr,
+ create_info->rt_datas[0].rgn_header_dev_addr,
+ create_info->rt_datas[1].rgn_header_dev_addr,
+ create_info->vheap_table_dev_addr,
+ create_info->ppp_multi_sample_ctl_y_flipped,
+ create_info->ppp_multi_sample_ctl,
+ create_info->rgn_header_size,
+ free_lists,
+ create_info->mtile_stride,
+ create_info->ppp_screen,
+ create_info->te_aa,
+ create_info->te_mtile1,
+ create_info->te_mtile2,
+ create_info->te_screen,
+ create_info->tpc_size,
+ create_info->tpc_stride,
+ create_info->isp_merge_lower_x,
+ create_info->isp_merge_lower_y,
+ create_info->isp_merge_scale_x,
+ create_info->isp_merge_scale_y,
+ create_info->isp_merge_upper_x,
+ create_info->isp_merge_upper_y,
+ create_info->isp_mtile_size,
+ create_info->max_rts,
+ &srv_rt_dataset->rt_datas[0].handle,
+ &srv_rt_dataset->rt_datas[1].handle);
+ if (result != VK_SUCCESS)
+ goto err_vk_free_srv_rt_dataset;
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(srv_rt_dataset->rt_datas); i++) {
+ srv_rt_dataset->rt_datas[i].sync_prim = pvr_srv_sync_prim_alloc(srv_ws);
+ if (!srv_rt_dataset->rt_datas[i].sync_prim)
+ goto err_srv_sync_prim_free;
+ }
+
+ srv_rt_dataset->base.ws = ws;
+
+ *rt_dataset_out = &srv_rt_dataset->base;
+
+ return VK_SUCCESS;
+
+err_srv_sync_prim_free:
+ for (uint32_t i = 0; i < ARRAY_SIZE(srv_rt_dataset->rt_datas); i++) {
+ pvr_srv_sync_prim_free(srv_rt_dataset->rt_datas[i].sync_prim);
+
+ if (srv_rt_dataset->rt_datas[i].handle) {
+ pvr_srv_rgx_destroy_hwrt_dataset(srv_ws->render_fd,
+ srv_rt_dataset->rt_datas[i].handle);
+ }
+ }
+
+err_vk_free_srv_rt_dataset:
+ vk_free(srv_ws->alloc, srv_rt_dataset);
+
+ return result;
+}
+
+void pvr_srv_render_target_dataset_destroy(
+ struct pvr_winsys_rt_dataset *rt_dataset)
+{
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(rt_dataset->ws);
+ struct pvr_srv_winsys_rt_dataset *srv_rt_dataset =
+ to_pvr_srv_winsys_rt_dataset(rt_dataset);
+
+ for (uint32_t i = 0; i < ARRAY_SIZE(srv_rt_dataset->rt_datas); i++) {
+ pvr_srv_sync_prim_free(srv_rt_dataset->rt_datas[i].sync_prim);
+
+ if (srv_rt_dataset->rt_datas[i].handle) {
+ pvr_srv_rgx_destroy_hwrt_dataset(srv_ws->render_fd,
+ srv_rt_dataset->rt_datas[i].handle);
+ }
+ }
+
+ vk_free(srv_ws->alloc, srv_rt_dataset);
+}
+
+static void pvr_srv_render_ctx_fw_static_state_init(
+ struct pvr_winsys_render_ctx_create_info *create_info,
+ struct rogue_fwif_static_rendercontext_state *static_state)
+{
+ struct pvr_winsys_render_ctx_static_state *ws_static_state =
+ &create_info->static_state;
+ struct rogue_fwif_ta_regs_cswitch *regs = &static_state->ctx_switch_regs;
+
+ memset(static_state, 0, sizeof(*static_state));
+
+ regs->vdm_context_state_base_addr = ws_static_state->vdm_ctx_state_base_addr;
+ regs->ta_context_state_base_addr = ws_static_state->geom_ctx_state_base_addr;
+
+ STATIC_ASSERT(ARRAY_SIZE(regs->ta_state) ==
+ ARRAY_SIZE(ws_static_state->geom_state));
+ for (uint32_t i = 0; i < ARRAY_SIZE(ws_static_state->geom_state); i++) {
+ regs->ta_state[i].vdm_context_store_task0 =
+ ws_static_state->geom_state[i].vdm_ctx_store_task0;
+ regs->ta_state[i].vdm_context_store_task1 =
+ ws_static_state->geom_state[i].vdm_ctx_store_task1;
+ regs->ta_state[i].vdm_context_store_task2 =
+ ws_static_state->geom_state[i].vdm_ctx_store_task2;
+
+ regs->ta_state[i].vdm_context_resume_task0 =
+ ws_static_state->geom_state[i].vdm_ctx_resume_task0;
+ regs->ta_state[i].vdm_context_resume_task1 =
+ ws_static_state->geom_state[i].vdm_ctx_resume_task1;
+ regs->ta_state[i].vdm_context_resume_task2 =
+ ws_static_state->geom_state[i].vdm_ctx_resume_task2;
+ }
+}
+
+VkResult pvr_srv_winsys_render_ctx_create(
+ struct pvr_winsys *ws,
+ struct pvr_winsys_render_ctx_create_info *create_info,
+ struct pvr_winsys_render_ctx **const ctx_out)
+{
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+ struct rogue_fwif_rf_cmd reset_cmd = {
+ .flags = 0,
+ };
+
+ struct rogue_fwif_static_rendercontext_state static_state;
+ struct pvr_srv_winsys_render_ctx *srv_ctx;
+ VkResult result;
+
+ srv_ctx = vk_zalloc(srv_ws->alloc,
+ sizeof(*srv_ctx),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!srv_ctx)
+ return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ srv_ctx->timeline_geom = open(PVR_SRV_SYNC_DEV_PATH, O_CLOEXEC | O_RDWR);
+ if (srv_ctx->timeline_geom < 0)
+ goto err_free_srv_ctx;
+
+ srv_ctx->timeline_frag = open(PVR_SRV_SYNC_DEV_PATH, O_CLOEXEC | O_RDWR);
+ if (srv_ctx->timeline_frag < 0)
+ goto err_close_timeline_geom;
+
+ pvr_srv_render_ctx_fw_static_state_init(create_info, &static_state);
+
+ result = pvr_srv_rgx_create_render_context(
+ srv_ws->render_fd,
+ pvr_srv_from_winsys_priority(create_info->priority),
+ create_info->vdm_callstack_addr,
+ sizeof(reset_cmd) - sizeof(reset_cmd.regs),
+ (uint8_t *)&reset_cmd,
+ srv_ws->server_memctx_data,
+ sizeof(static_state),
+ (uint8_t *)&static_state,
+ 0,
+ RGX_CONTEXT_FLAG_DISABLESLR,
+ 0,
+ UINT_MAX,
+ UINT_MAX,
+ &srv_ctx->handle);
+ if (result != VK_SUCCESS)
+ goto err_close_timeline_frag;
+
+ srv_ctx->base.ws = ws;
+
+ *ctx_out = &srv_ctx->base;
+
+ return VK_SUCCESS;
+
+err_close_timeline_frag:
+ close(srv_ctx->timeline_frag);
+
+err_close_timeline_geom:
+ close(srv_ctx->timeline_geom);
+
+err_free_srv_ctx:
+ vk_free(srv_ws->alloc, srv_ctx);
+
+ return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED);
+}
+
+void pvr_srv_winsys_render_ctx_destroy(struct pvr_winsys_render_ctx *ctx)
+{
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ctx->ws);
+ struct pvr_srv_winsys_render_ctx *srv_ctx =
+ to_pvr_srv_winsys_render_ctx(ctx);
+
+ pvr_srv_rgx_destroy_render_context(srv_ws->render_fd, srv_ctx->handle);
+ close(srv_ctx->timeline_frag);
+ close(srv_ctx->timeline_geom);
+ vk_free(srv_ws->alloc, srv_ctx);
+}
+
+static void pvr_srv_geometry_cmd_init(
+ const struct pvr_winsys_render_submit_info *submit_info,
+ const struct pvr_srv_sync_prim *sync_prim,
+ struct rogue_fwif_cmd_ta *cmd)
+{
+ const struct pvr_winsys_geometry_state *state = &submit_info->geometry;
+ struct rogue_fwif_ta_regs *fw_regs = &cmd->geom_regs;
+
+ memset(cmd, 0, sizeof(*cmd));
+
+ cmd->cmd_shared.cmn.frame_num = submit_info->frame_num;
+
+ fw_regs->vdm_ctrl_stream_base = state->regs.vdm_ctrl_stream_base;
+ fw_regs->tpu_border_colour_table = state->regs.tpu_border_colour_table;
+ fw_regs->ppp_ctrl = state->regs.ppp_ctrl;
+ fw_regs->te_psg = state->regs.te_psg;
+ fw_regs->tpu = state->regs.tpu;
+ fw_regs->vdm_context_resume_task0_size =
+ state->regs.vdm_ctx_resume_task0_size;
+ fw_regs->pds_ctrl = state->regs.pds_ctrl;
+
+ if (state->flags & PVR_WINSYS_GEOM_FLAG_FIRST_GEOMETRY)
+ cmd->flags |= ROGUE_FWIF_TAFLAGS_FIRSTKICK;
+
+ if (state->flags & PVR_WINSYS_GEOM_FLAG_LAST_GEOMETRY)
+ cmd->flags |= ROGUE_FWIF_TAFLAGS_LASTKICK;
+
+ if (state->flags & PVR_WINSYS_GEOM_FLAG_SINGLE_CORE)
+ cmd->flags |= ROGUE_FWIF_TAFLAGS_SINGLE_CORE;
+
+ cmd->partial_render_ta_3d_fence.ufo_addr.addr =
+ pvr_srv_sync_prim_get_fw_addr(sync_prim);
+ cmd->partial_render_ta_3d_fence.value = sync_prim->value;
+}
+
+static void pvr_srv_fragment_cmd_init(
+ const struct pvr_winsys_render_submit_info *submit_info,
+ struct rogue_fwif_cmd_3d *cmd)
+{
+ const struct pvr_winsys_fragment_state *state = &submit_info->fragment;
+ struct rogue_fwif_3d_regs *fw_regs = &cmd->regs;
+
+ memset(cmd, 0, sizeof(*cmd));
+
+ cmd->cmd_shared.cmn.frame_num = submit_info->frame_num;
+
+ fw_regs->usc_pixel_output_ctrl = state->regs.usc_pixel_output_ctrl;
+ fw_regs->isp_bgobjdepth = state->regs.isp_bgobjdepth;
+ fw_regs->isp_bgobjvals = state->regs.isp_bgobjvals;
+ fw_regs->isp_aa = state->regs.isp_aa;
+ fw_regs->isp_ctl = state->regs.isp_ctl;
+ fw_regs->tpu = state->regs.tpu;
+ fw_regs->event_pixel_pds_info = state->regs.event_pixel_pds_info;
+ fw_regs->pixel_phantom = state->regs.pixel_phantom;
+ fw_regs->event_pixel_pds_data = state->regs.event_pixel_pds_data;
+ fw_regs->isp_scissor_base = state->regs.isp_scissor_base;
+ fw_regs->isp_dbias_base = state->regs.isp_dbias_base;
+ fw_regs->isp_oclqry_base = state->regs.isp_oclqry_base;
+ fw_regs->isp_zlsctl = state->regs.isp_zlsctl;
+ fw_regs->isp_zload_store_base = state->regs.isp_zload_store_base;
+ fw_regs->isp_stencil_load_store_base =
+ state->regs.isp_stencil_load_store_base;
+ fw_regs->isp_zls_pixels = state->regs.isp_zls_pixels;
+
+ STATIC_ASSERT(ARRAY_SIZE(fw_regs->pbe_word) ==
+ ARRAY_SIZE(state->regs.pbe_word));
+
+ STATIC_ASSERT(ARRAY_SIZE(fw_regs->pbe_word[0]) <=
+ ARRAY_SIZE(state->regs.pbe_word[0]));
+
+#if !defined(NDEBUG)
+ /* Depending on the hardware we might have more PBE words than the firmware
+ * accepts so check that the extra words are 0.
+ */
+ if (ARRAY_SIZE(fw_regs->pbe_word[0]) < ARRAY_SIZE(state->regs.pbe_word[0])) {
+ /* For each color attachment. */
+ for (uint32_t i = 0; i < ARRAY_SIZE(state->regs.pbe_word); i++) {
+ /* For each extra PBE word not used by the firmware. */
+ for (uint32_t j = ARRAY_SIZE(fw_regs->pbe_word[0]);
+ j < ARRAY_SIZE(state->regs.pbe_word[0]);
+ j++) {
+ assert(state->regs.pbe_word[i][j] == 0);
+ }
+ }
+ }
+#endif
+
+ memcpy(fw_regs->pbe_word, state->regs.pbe_word, sizeof(fw_regs->pbe_word));
+
+ fw_regs->tpu_border_colour_table = state->regs.tpu_border_colour_table;
+
+ STATIC_ASSERT(ARRAY_SIZE(fw_regs->pds_bgnd) ==
+ ARRAY_SIZE(state->regs.pds_bgnd));
+ typed_memcpy(fw_regs->pds_bgnd,
+ state->regs.pds_bgnd,
+ ARRAY_SIZE(fw_regs->pds_bgnd));
+
+ STATIC_ASSERT(ARRAY_SIZE(fw_regs->pds_pr_bgnd) ==
+ ARRAY_SIZE(state->regs.pds_pr_bgnd));
+ typed_memcpy(fw_regs->pds_pr_bgnd,
+ state->regs.pds_pr_bgnd,
+ ARRAY_SIZE(fw_regs->pds_pr_bgnd));
+
+ if (state->flags & PVR_WINSYS_FRAG_FLAG_DEPTH_BUFFER_PRESENT)
+ cmd->flags |= ROGUE_FWIF_RENDERFLAGS_DEPTHBUFFER;
+
+ if (state->flags & PVR_WINSYS_FRAG_FLAG_STENCIL_BUFFER_PRESENT)
+ cmd->flags |= ROGUE_FWIF_RENDERFLAGS_STENCILBUFFER;
+
+ if (state->flags & PVR_WINSYS_FRAG_FLAG_PREVENT_CDM_OVERLAP)
+ cmd->flags |= ROGUE_FWIF_RENDERFLAGS_PREVENT_CDM_OVERLAP;
+
+ if (state->flags & PVR_WINSYS_FRAG_FLAG_SINGLE_CORE)
+ cmd->flags |= ROGUE_FWIF_RENDERFLAGS_SINGLE_CORE;
+
+ cmd->zls_stride = state->zls_stride;
+ cmd->sls_stride = state->sls_stride;
+}
+
+VkResult pvr_srv_winsys_render_submit(
+ const struct pvr_winsys_render_ctx *ctx,
+ const struct pvr_winsys_render_submit_info *submit_info,
+ struct pvr_winsys_syncobj **const syncobj_geom_out,
+ struct pvr_winsys_syncobj **const syncobj_frag_out)
+{
+ const struct pvr_srv_winsys_rt_dataset *srv_rt_dataset =
+ to_pvr_srv_winsys_rt_dataset(submit_info->rt_dataset);
+ struct pvr_srv_sync_prim *sync_prim =
+ srv_rt_dataset->rt_datas[submit_info->rt_data_idx].sync_prim;
+ void *rt_data_handle =
+ srv_rt_dataset->rt_datas[submit_info->rt_data_idx].handle;
+ const struct pvr_srv_winsys_render_ctx *srv_ctx =
+ to_pvr_srv_winsys_render_ctx(ctx);
+ const struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ctx->ws);
+
+ uint32_t sync_pmr_flags[PVR_SRV_SYNC_MAX] = { 0U };
+ void *sync_pmrs[PVR_SRV_SYNC_MAX] = { NULL };
+ uint32_t sync_pmr_count;
+
+ struct pvr_winsys_syncobj *geom_signal_syncobj = NULL;
+ struct pvr_winsys_syncobj *frag_signal_syncobj = NULL;
+ struct pvr_winsys_syncobj *geom_wait_syncobj = NULL;
+ struct pvr_winsys_syncobj *frag_wait_syncobj = NULL;
+ struct pvr_srv_winsys_syncobj *srv_geom_syncobj;
+ struct pvr_srv_winsys_syncobj *srv_frag_syncobj;
+
+ struct rogue_fwif_cmd_ta geom_cmd;
+ struct rogue_fwif_cmd_3d frag_cmd;
+
+ int fence_frag;
+ int fence_geom;
+
+ VkResult result;
+
+ pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd);
+ pvr_srv_fragment_cmd_init(submit_info, &frag_cmd);
+
+ for (uint32_t i = 0U; i < submit_info->semaphore_count; i++) {
+ PVR_FROM_HANDLE(pvr_semaphore, sem, submit_info->semaphores[i]);
+
+ if (!sem->syncobj)
+ continue;
+
+ if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_GEOM_BIT) {
+ result = pvr_srv_winsys_syncobjs_merge(sem->syncobj,
+ geom_wait_syncobj,
+ &geom_wait_syncobj);
+ if (result != VK_SUCCESS)
+ goto err_destroy_wait_syncobjs;
+
+ submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_GEOM_BIT;
+ }
+
+ if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_FRAG_BIT) {
+ result = pvr_srv_winsys_syncobjs_merge(sem->syncobj,
+ frag_wait_syncobj,
+ &frag_wait_syncobj);
+ if (result != VK_SUCCESS)
+ goto err_destroy_wait_syncobjs;
+
+ submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_FRAG_BIT;
+ }
+
+ if (submit_info->stage_flags[i] == 0U) {
+ pvr_srv_winsys_syncobj_destroy(sem->syncobj);
+ sem->syncobj = NULL;
+ }
+ }
+
+ srv_geom_syncobj = to_pvr_srv_winsys_syncobj(geom_wait_syncobj);
+ srv_frag_syncobj = to_pvr_srv_winsys_syncobj(frag_wait_syncobj);
+
+ if (submit_info->bo_count <= ARRAY_SIZE(sync_pmrs)) {
+ sync_pmr_count = submit_info->bo_count;
+ } else {
+ mesa_logw("Too many bos to synchronize access to (ignoring %zu bos)\n",
+ submit_info->bo_count - ARRAY_SIZE(sync_pmrs));
+ sync_pmr_count = ARRAY_SIZE(sync_pmrs);
+ }
+
+ STATIC_ASSERT(ARRAY_SIZE(sync_pmrs) == ARRAY_SIZE(sync_pmr_flags));
+ assert(sync_pmr_count <= ARRAY_SIZE(sync_pmrs));
+ for (uint32_t i = 0; i < sync_pmr_count; i++) {
+ const struct pvr_winsys_job_bo *job_bo = &submit_info->bos[i];
+ const struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(job_bo->bo);
+
+ sync_pmrs[i] = srv_bo->pmr;
+
+ if (job_bo->flags & PVR_WINSYS_JOB_BO_FLAG_WRITE)
+ sync_pmr_flags[i] = PVR_BUFFER_FLAG_WRITE;
+ else
+ sync_pmr_flags[i] = PVR_BUFFER_FLAG_READ;
+ }
+
+ /* The 1.14 PowerVR Services KM driver doesn't add a sync dependency to the
+ * fragment phase on the geometry phase for us. This makes it
+ * necessary to use a sync prim for this purpose. This requires that we pass
+ * in the same sync prim information for the geometry phase update and the
+ * PR fence. We update the sync prim value here as this is the value the
+ * sync prim will get updated to once the geometry phase has completed and
+ * the value the PR or fragment phase will be fenced on.
+ */
+ sync_prim->value++;
+
+ do {
+ result =
+ pvr_srv_rgx_kick_render2(srv_ws->render_fd,
+ srv_ctx->handle,
+ /* Currently no support for cache operations.
+ */
+ 0,
+ 0,
+ NULL,
+ NULL,
+ NULL,
+ 1,
+ &sync_prim->srv_ws->sync_block_handle,
+ &sync_prim->offset,
+ &sync_prim->value,
+ 0,
+ NULL,
+ NULL,
+ NULL,
+ sync_prim->srv_ws->sync_block_handle,
+ sync_prim->offset,
+ sync_prim->value,
+ geom_wait_syncobj ? srv_geom_syncobj->fd : -1,
+ srv_ctx->timeline_geom,
+ &fence_geom,
+ "GEOM",
+ frag_wait_syncobj ? srv_frag_syncobj->fd : -1,
+ srv_ctx->timeline_frag,
+ &fence_frag,
+ "FRAG",
+ sizeof(geom_cmd),
+ (uint8_t *)&geom_cmd,
+ /* Currently no support for PRs. */
+ 0,
+ /* Currently no support for PRs. */
+ NULL,
+ sizeof(frag_cmd),
+ (uint8_t *)&frag_cmd,
+ submit_info->job_num,
+ true, /* Always kick the TA. */
+ true, /* Always kick a PR. */
+ submit_info->run_frag,
+ false,
+ 0,
+ rt_data_handle,
+ /* Currently no support for PRs. */
+ NULL,
+ /* Currently no support for PRs. */
+ NULL,
+ sync_pmr_count,
+ sync_pmr_count ? sync_pmr_flags : NULL,
+ sync_pmr_count ? sync_pmrs : NULL,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0);
+ } while (result == VK_NOT_READY);
+
+ if (result != VK_SUCCESS)
+ goto err_destroy_wait_syncobjs;
+
+ /* Given job submission succeeded, we don't need to close wait fences, these
+ * should be consumed by the render job itself.
+ */
+ if (geom_wait_syncobj)
+ srv_geom_syncobj->fd = -1;
+
+ if (frag_wait_syncobj)
+ srv_frag_syncobj->fd = -1;
+
+ if (fence_geom != -1) {
+ result =
+ pvr_srv_winsys_syncobj_create(ctx->ws, false, &geom_signal_syncobj);
+ if (result != VK_SUCCESS)
+ goto err_destroy_wait_syncobjs;
+
+ pvr_srv_set_syncobj_payload(geom_signal_syncobj, fence_geom);
+ }
+
+ if (fence_frag != -1) {
+ result =
+ pvr_srv_winsys_syncobj_create(ctx->ws, false, &frag_signal_syncobj);
+ if (result != VK_SUCCESS) {
+ if (geom_signal_syncobj)
+ pvr_srv_winsys_syncobj_destroy(geom_signal_syncobj);
+ goto err_destroy_wait_syncobjs;
+ }
+
+ pvr_srv_set_syncobj_payload(frag_signal_syncobj, fence_frag);
+ }
+
+ *syncobj_geom_out = geom_signal_syncobj;
+ *syncobj_frag_out = frag_signal_syncobj;
+
+err_destroy_wait_syncobjs:
+ if (geom_wait_syncobj)
+ pvr_srv_winsys_syncobj_destroy(geom_wait_syncobj);
+
+ if (frag_wait_syncobj)
+ pvr_srv_winsys_syncobj_destroy(frag_wait_syncobj);
+
+ return result;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_JOB_RENDER_H
+#define PVR_SRV_JOB_RENDER_H
+
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+struct pvr_winsys;
+struct pvr_winsys_free_list;
+struct pvr_winsys_render_ctx;
+struct pvr_winsys_render_ctx_create_info;
+struct pvr_winsys_render_submit_info;
+struct pvr_winsys_rt_dataset;
+struct pvr_winsys_rt_dataset_create_info;
+struct pvr_winsys_syncobj;
+struct pvr_winsys_vma;
+
+/*******************************************
+ Function prototypes
+ *******************************************/
+
+VkResult pvr_srv_winsys_free_list_create(
+ struct pvr_winsys *ws,
+ struct pvr_winsys_vma *free_list_vma,
+ uint32_t initial_num_pages,
+ uint32_t max_num_pages,
+ uint32_t grow_num_pages,
+ uint32_t grow_threshold,
+ struct pvr_winsys_free_list *parent_free_list,
+ struct pvr_winsys_free_list **const free_list_out);
+void pvr_srv_winsys_free_list_destroy(struct pvr_winsys_free_list *free_list);
+
+VkResult pvr_srv_render_target_dataset_create(
+ struct pvr_winsys *ws,
+ const struct pvr_winsys_rt_dataset_create_info *create_info,
+ struct pvr_winsys_rt_dataset **const rt_dataset_out);
+void pvr_srv_render_target_dataset_destroy(
+ struct pvr_winsys_rt_dataset *rt_dataset);
+
+VkResult pvr_srv_winsys_render_ctx_create(
+ struct pvr_winsys *ws,
+ struct pvr_winsys_render_ctx_create_info *create_info,
+ struct pvr_winsys_render_ctx **const ctx_out);
+void pvr_srv_winsys_render_ctx_destroy(struct pvr_winsys_render_ctx *ctx);
+
+VkResult pvr_srv_winsys_render_submit(
+ const struct pvr_winsys_render_ctx *ctx,
+ const struct pvr_winsys_render_submit_info *submit_info,
+ struct pvr_winsys_syncobj **const syncobj_geom_out,
+ struct pvr_winsys_syncobj **const syncobj_frag_out);
+
+#endif /* PVR_SRV_JOB_RENDER_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_PUBLIC_H
+#define PVR_SRV_PUBLIC_H
+
+#include <vulkan/vulkan.h>
+
+#include "pvr_winsys.h"
+
+struct pvr_winsys *pvr_srv_winsys_create(int master_fd,
+ int render_fd,
+ const VkAllocationCallbacks *alloc);
+
+#endif /* PVR_SRV_PUBLIC_H */
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_srv_syncobj.h"
+#include "pvr_winsys.h"
+#include "util/libsync.h"
+#include "util/macros.h"
+#include "util/timespec.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+
+VkResult
+pvr_srv_winsys_syncobj_create(struct pvr_winsys *ws,
+ bool signaled,
+ struct pvr_winsys_syncobj **const syncobj_out)
+{
+ struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+ struct pvr_srv_winsys_syncobj *srv_syncobj;
+
+ srv_syncobj = vk_alloc(srv_ws->alloc,
+ sizeof(*srv_syncobj),
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+ if (!srv_syncobj)
+ return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ srv_syncobj->base.ws = ws;
+ srv_syncobj->signaled = signaled;
+ srv_syncobj->fd = -1;
+
+ *syncobj_out = &srv_syncobj->base;
+
+ return VK_SUCCESS;
+}
+
+void pvr_srv_winsys_syncobj_destroy(struct pvr_winsys_syncobj *syncobj)
+{
+ struct pvr_srv_winsys_syncobj *srv_syncobj;
+ struct pvr_srv_winsys *srv_ws;
+
+ assert(syncobj);
+
+ srv_ws = to_pvr_srv_winsys(syncobj->ws);
+ srv_syncobj = to_pvr_srv_winsys_syncobj(syncobj);
+
+ if (srv_syncobj->fd != -1)
+ close(srv_syncobj->fd);
+
+ vk_free(srv_ws->alloc, srv_syncobj);
+}
+
+/* Note: function closes the fd. */
+static void pvr_set_syncobj_state(struct pvr_srv_winsys_syncobj *srv_syncobj,
+ bool signaled)
+{
+ if (srv_syncobj->fd != -1) {
+ close(srv_syncobj->fd);
+ srv_syncobj->fd = -1;
+ }
+
+ srv_syncobj->signaled = signaled;
+}
+
+void pvr_srv_set_syncobj_payload(struct pvr_winsys_syncobj *syncobj,
+ int payload)
+{
+ struct pvr_srv_winsys_syncobj *srv_syncobj =
+ to_pvr_srv_winsys_syncobj(syncobj);
+
+ if (srv_syncobj->fd != -1)
+ close(srv_syncobj->fd);
+
+ srv_syncobj->fd = payload;
+ /* FIXME: Is this valid? */
+ srv_syncobj->signaled = (payload == -1);
+}
+
+VkResult
+pvr_srv_winsys_syncobjs_reset(struct pvr_winsys *ws,
+ struct pvr_winsys_syncobj **const syncobjs,
+ uint32_t count)
+{
+ for (uint32_t i = 0; i < count; i++) {
+ struct pvr_srv_winsys_syncobj *srv_syncobj;
+
+ if (!syncobjs[i])
+ continue;
+
+ srv_syncobj = to_pvr_srv_winsys_syncobj(syncobjs[i]);
+ pvr_set_syncobj_state(srv_syncobj, false);
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult
+pvr_srv_winsys_syncobjs_signal(struct pvr_winsys *ws,
+ struct pvr_winsys_syncobj **const syncobjs,
+ uint32_t count)
+{
+ for (uint32_t i = 0; i < count; i++) {
+ struct pvr_srv_winsys_syncobj *srv_syncobj;
+
+ if (!syncobjs[i])
+ continue;
+
+ srv_syncobj = to_pvr_srv_winsys_syncobj(syncobjs[i]);
+ pvr_set_syncobj_state(srv_syncobj, true);
+ }
+
+ return VK_SUCCESS;
+}
+
+/* Careful, timeout might overflow. */
+static inline void pvr_start_timeout(struct timespec *timeout,
+ uint64_t timeout_ns)
+{
+ clock_gettime(CLOCK_MONOTONIC, timeout);
+ timespec_add_nsec(timeout, timeout, timeout_ns);
+}
+
+/* Careful, a negative value might be returned. */
+static inline struct timespec
+pvr_get_remaining_time(const struct timespec *timeout)
+{
+ struct timespec time;
+
+ clock_gettime(CLOCK_MONOTONIC, &time);
+ timespec_sub(&time, timeout, &time);
+
+ return time;
+}
+
+/* timeout == 0 -> Get status without waiting.
+ * timeout == ~0 -> Wait infinitely
+ * else wait for the given timeout in nanoseconds. */
+VkResult
+pvr_srv_winsys_syncobjs_wait(struct pvr_winsys *ws,
+ struct pvr_winsys_syncobj **const syncobjs,
+ uint32_t count,
+ bool wait_all,
+ uint64_t timeout)
+{
+ const struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+ uint32_t unsignaled_count = 0U;
+ struct timespec end_time;
+ struct pollfd *poll_fds;
+ VkResult result;
+ int ppoll_ret;
+
+ if (timeout != 0U && timeout != ~0U) {
+ /* We don't worry about overflow since ppoll() returns EINVAL on
+ * negative timeout.
+ */
+ pvr_start_timeout(&end_time, timeout);
+ }
+
+ poll_fds = vk_alloc(srv_ws->alloc,
+ sizeof(*poll_fds) * count,
+ 8,
+ VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+ if (!poll_fds)
+ return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ for (uint32_t i = 0; i < count; i++) {
+ struct pvr_srv_winsys_syncobj *srv_syncobj =
+ to_pvr_srv_winsys_syncobj(syncobjs[i]);
+
+ /* -1 in case if fence is signaled or uninitialized, ppoll will skip the
+ * fence.
+ */
+ if (!srv_syncobj || srv_syncobj->signaled || srv_syncobj->fd == -1) {
+ poll_fds[i].fd = -1;
+ } else {
+ poll_fds[i].fd = srv_syncobj->fd;
+ unsignaled_count++;
+ }
+
+ poll_fds[i].events = POLLIN;
+ poll_fds[i].revents = 0U;
+ }
+
+ if (unsignaled_count == 0U) {
+ result = VK_SUCCESS;
+ goto end_wait_for_fences;
+ }
+
+ /* TODO: Implement device loss handling like anvil: reporting the loss
+ * save the reported status, maybe abort() on env flag, etc.
+ */
+
+ do {
+ if (timeout == ~0U) {
+ ppoll_ret = ppoll(poll_fds, count, NULL, NULL);
+ } else {
+ struct timespec remaining_time;
+
+ if (timeout == 0U) {
+ remaining_time = (struct timespec){ 0UL, 0UL };
+ } else {
+ /* ppoll() returns EINVAL on negative timeout. Nothing to worry.
+ */
+ remaining_time = pvr_get_remaining_time(&end_time);
+ }
+
+ ppoll_ret = ppoll(poll_fds, count, &remaining_time, NULL);
+ }
+
+ if (ppoll_ret > 0U) {
+ /* ppoll_ret contains the amount of structs updated by poll(). */
+ unsignaled_count -= ppoll_ret;
+
+ /* ppoll_ret > 0 is for early loop termination. */
+ for (uint32_t i = 0; ppoll_ret > 0 && i < count; i++) {
+ struct pvr_srv_winsys_syncobj *srv_syncobj;
+
+ if (poll_fds[i].revents == 0)
+ continue;
+
+ if (poll_fds[i].revents & (POLLNVAL | POLLERR)) {
+ result = vk_error(NULL, VK_ERROR_DEVICE_LOST);
+ goto end_wait_for_fences;
+ }
+
+ srv_syncobj = to_pvr_srv_winsys_syncobj(syncobjs[i]);
+ pvr_set_syncobj_state(srv_syncobj, true);
+
+ if (!wait_all) {
+ result = VK_SUCCESS;
+ goto end_wait_for_fences;
+ }
+
+ /* -1 makes ppoll ignore it and set revents to 0. */
+ poll_fds[i].fd = -1;
+ ppoll_ret--;
+ }
+
+ /* For zero timeout, just return even if we still have unsignaled
+ * fences.
+ */
+ if (timeout == 0U && unsignaled_count != 0U) {
+ result = VK_TIMEOUT;
+ goto end_wait_for_fences;
+ }
+ } else if (ppoll_ret == 0) {
+ result = VK_TIMEOUT;
+ goto end_wait_for_fences;
+ }
+
+ /* Careful as we might have decremented ppoll_ret to 0. */
+ } while ((ppoll_ret != -1 && unsignaled_count != 0) ||
+ (ppoll_ret == -1 && (errno == EINTR || errno == EAGAIN)));
+
+ /* We assume device loss in case of an unknown error or invalid fd. */
+ if (ppoll_ret != -1)
+ result = VK_SUCCESS;
+ else if (errno == EINVAL)
+ result = VK_TIMEOUT;
+ else if (errno == ENOMEM)
+ result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+ else
+ result = vk_error(NULL, VK_ERROR_DEVICE_LOST);
+
+end_wait_for_fences:
+ vk_free(srv_ws->alloc, poll_fds);
+
+ return result;
+}
+
+VkResult pvr_srv_winsys_syncobjs_merge(struct pvr_winsys_syncobj *src,
+ struct pvr_winsys_syncobj *target,
+ struct pvr_winsys_syncobj **syncobj_out)
+{
+ struct pvr_srv_winsys_syncobj *srv_target =
+ to_pvr_srv_winsys_syncobj(target);
+ struct pvr_srv_winsys_syncobj *srv_src = to_pvr_srv_winsys_syncobj(src);
+ struct pvr_srv_winsys_syncobj *srv_output;
+ struct pvr_winsys_syncobj *output = NULL;
+ VkResult result;
+
+ if (!srv_src || srv_src->fd == -1) {
+ *syncobj_out = target;
+ return VK_SUCCESS;
+ }
+
+ result = pvr_srv_winsys_syncobj_create(src->ws, false, &output);
+ if (result != VK_SUCCESS)
+ return result;
+
+ srv_output = to_pvr_srv_winsys_syncobj(output);
+
+ if (!srv_target || srv_target->fd == -1) {
+ int fd = dup(srv_src->fd);
+ if (fd < 0) {
+ result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto err_syncobj_destroy;
+ }
+
+ pvr_srv_set_syncobj_payload(output, fd);
+ if (target)
+ pvr_srv_winsys_syncobj_destroy(target);
+ *syncobj_out = output;
+ return VK_SUCCESS;
+ }
+
+ srv_output->fd = sync_merge("", srv_src->fd, srv_target->fd);
+ if (srv_output->fd < 0) {
+ result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+ goto err_syncobj_destroy;
+ }
+
+ pvr_srv_winsys_syncobj_destroy(target);
+
+ *syncobj_out = output;
+
+ return VK_SUCCESS;
+
+err_syncobj_destroy:
+ pvr_srv_winsys_syncobj_destroy(output);
+
+ return result;
+}
--- /dev/null
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_SYNCOBJ_H
+#define PVR_SRV_SYNCOBJ_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_winsys.h"
+#include "util/macros.h"
+
+struct pvr_srv_winsys_syncobj {
+ struct pvr_winsys_syncobj base;
+
+ /* Cached version of completion. */
+ bool signaled;
+
+ int fd;
+};
+
+#define to_pvr_srv_winsys_syncobj(syncobj) \
+ container_of(syncobj, struct pvr_srv_winsys_syncobj, base)
+
+/*******************************************
+ function prototypes
+ *******************************************/
+
+VkResult
+pvr_srv_winsys_syncobj_create(struct pvr_winsys *ws,
+ bool signaled,
+ struct pvr_winsys_syncobj **const syncobj_out);
+void pvr_srv_winsys_syncobj_destroy(struct pvr_winsys_syncobj *syncobj);
+VkResult
+pvr_srv_winsys_syncobjs_reset(struct pvr_winsys *ws,
+ struct pvr_winsys_syncobj **const syncobjs,
+ uint32_t count);
+VkResult
+pvr_srv_winsys_syncobjs_signal(struct pvr_winsys *ws,
+ struct pvr_winsys_syncobj **const syncobjs,
+ uint32_t count);
+VkResult
+pvr_srv_winsys_syncobjs_wait(struct pvr_winsys *ws,
+ struct pvr_winsys_syncobj **const syncobjs,
+ uint32_t count,
+ bool wait_all,
+ uint64_t timeout);
+VkResult pvr_srv_winsys_syncobjs_merge(struct pvr_winsys_syncobj *src,
+ struct pvr_winsys_syncobj *target,
+ struct pvr_winsys_syncobj **out);
+
+void pvr_srv_set_syncobj_payload(struct pvr_winsys_syncobj *syncobj,
+ int payload);
+
+#endif /* PVR_SRV_SYNCOBJ_H */
if with_gallium_freedreno or with_freedreno_vk or with_tools.contains('freedreno')
subdir('freedreno')
endif
+if with_imagination_vk
+ subdir('imagination')
+endif
if with_gallium_panfrost or with_gallium_lima or with_panfrost_vk or with_tools.contains('panfrost')
subdir('panfrost')
endif