pvr: Add a Vulkan driver for Imagination Technologies PowerVR Rogue GPUs
authorFrank Binns <frank.binns@imgtec.com>
Fri, 25 Feb 2022 10:28:39 +0000 (10:28 +0000)
committerFrank Binns <frank.binns@imgtec.com>
Tue, 22 Mar 2022 15:04:55 +0000 (15:04 +0000)
Co-authored-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com>
Co-authored-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Co-authored-by: Simon Perretta <simon.perretta@imgtec.com>
Co-authored-by: Alexander Wasey <Alexander.Wasey@imgtec.com>
Signed-off-by: Frank Binns <frank.binns@imgtec.com>
Signed-off-by: Rajnesh Kanwal <rajnesh.kanwal@imgtec.com>
Signed-off-by: Karmjit Mahil <Karmjit.Mahil@imgtec.com>
Signed-off-by: Simon Perretta <simon.perretta@imgtec.com>
Signed-off-by: Alexander Wasey <Alexander.Wasey@imgtec.com>
Acked-by: Jason Ekstrand <jason.ekstrand@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15243>

158 files changed:
CODEOWNERS
meson.build
meson_options.txt
src/imagination/.clang-format [new file with mode: 0644]
src/imagination/.dir-locals.el [new file with mode: 0644]
src/imagination/.editorconfig [new file with mode: 0644]
src/imagination/common/meson.build [new file with mode: 0644]
src/imagination/common/pvr_device_info.c [new file with mode: 0644]
src/imagination/common/pvr_device_info.h [new file with mode: 0644]
src/imagination/csbgen/gen_pack_header.py [new file with mode: 0644]
src/imagination/csbgen/meson.build [new file with mode: 0644]
src/imagination/csbgen/pvr_packet_helpers.h [new file with mode: 0644]
src/imagination/csbgen/rogue_cdm.xml [new file with mode: 0644]
src/imagination/csbgen/rogue_cr.xml [new file with mode: 0644]
src/imagination/csbgen/rogue_hwdefs.h [new file with mode: 0644]
src/imagination/csbgen/rogue_ipf.xml [new file with mode: 0644]
src/imagination/csbgen/rogue_lls.xml [new file with mode: 0644]
src/imagination/csbgen/rogue_pbestate.xml [new file with mode: 0644]
src/imagination/csbgen/rogue_pds.xml [new file with mode: 0644]
src/imagination/csbgen/rogue_ppp.xml [new file with mode: 0644]
src/imagination/csbgen/rogue_texstate.xml [new file with mode: 0644]
src/imagination/csbgen/rogue_vdm.xml [new file with mode: 0644]
src/imagination/include/hwdef/rogue_hw_defs.h [new file with mode: 0644]
src/imagination/include/hwdef/rogue_hw_utils.h [new file with mode: 0644]
src/imagination/include/pvr_rogue_fw.h [new file with mode: 0644]
src/imagination/meson.build [new file with mode: 0644]
src/imagination/rogue/meson.build [new file with mode: 0644]
src/imagination/rogue/nir/rogue_nir_constreg.c [new file with mode: 0644]
src/imagination/rogue/nir/rogue_nir_lower_io.c [new file with mode: 0644]
src/imagination/rogue/nir/rogue_nir_pfo.c [new file with mode: 0644]
src/imagination/rogue/rogue.c [new file with mode: 0644]
src/imagination/rogue/rogue.h [new file with mode: 0644]
src/imagination/rogue/rogue_build_data.c [new file with mode: 0644]
src/imagination/rogue/rogue_build_data.h [new file with mode: 0644]
src/imagination/rogue/rogue_compiler.c [new file with mode: 0644]
src/imagination/rogue/rogue_compiler.h [new file with mode: 0644]
src/imagination/rogue/rogue_constreg.c [new file with mode: 0644]
src/imagination/rogue/rogue_constreg.h [new file with mode: 0644]
src/imagination/rogue/rogue_dump.c [new file with mode: 0644]
src/imagination/rogue/rogue_dump.h [new file with mode: 0644]
src/imagination/rogue/rogue_encode.c [new file with mode: 0644]
src/imagination/rogue/rogue_encode.h [new file with mode: 0644]
src/imagination/rogue/rogue_encoders.c [new file with mode: 0644]
src/imagination/rogue/rogue_encoders.h [new file with mode: 0644]
src/imagination/rogue/rogue_instr.c [new file with mode: 0644]
src/imagination/rogue/rogue_instr.h [new file with mode: 0644]
src/imagination/rogue/rogue_nir.c [new file with mode: 0644]
src/imagination/rogue/rogue_nir.h [new file with mode: 0644]
src/imagination/rogue/rogue_nir_helpers.h [new file with mode: 0644]
src/imagination/rogue/rogue_operand.c [new file with mode: 0644]
src/imagination/rogue/rogue_operand.h [new file with mode: 0644]
src/imagination/rogue/rogue_regalloc.c [new file with mode: 0644]
src/imagination/rogue/rogue_regalloc.h [new file with mode: 0644]
src/imagination/rogue/rogue_shader.c [new file with mode: 0644]
src/imagination/rogue/rogue_shader.h [new file with mode: 0644]
src/imagination/rogue/rogue_util.c [new file with mode: 0644]
src/imagination/rogue/rogue_util.h [new file with mode: 0644]
src/imagination/rogue/rogue_validate.c [new file with mode: 0644]
src/imagination/rogue/rogue_validate.h [new file with mode: 0644]
src/imagination/rogue/tools/offline_compiler.c [new file with mode: 0644]
src/imagination/vulkan/meson.build [new file with mode: 0644]
src/imagination/vulkan/pds/meson.build [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds.c [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_disasm.c [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_printer.c [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays0.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays1.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays2.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays3.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance0.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance1.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance2.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance3.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid0.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid1.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid2.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid3.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements0.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements1.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements2.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements3.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance0.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance1.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance2.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance3.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid0.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid1.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid2.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid3.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_rogue_pds_defs.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_rogue_pds_disasm.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_rogue_pds_encode.h [new file with mode: 0644]
src/imagination/vulkan/pds/pvr_xgl_pds.c [new file with mode: 0644]
src/imagination/vulkan/pvr_blit.c [new file with mode: 0644]
src/imagination/vulkan/pvr_bo.c [new file with mode: 0644]
src/imagination/vulkan/pvr_bo.h [new file with mode: 0644]
src/imagination/vulkan/pvr_cmd_buffer.c [new file with mode: 0644]
src/imagination/vulkan/pvr_csb.c [new file with mode: 0644]
src/imagination/vulkan/pvr_csb.h [new file with mode: 0644]
src/imagination/vulkan/pvr_descriptor_set.c [new file with mode: 0644]
src/imagination/vulkan/pvr_device.c [new file with mode: 0644]
src/imagination/vulkan/pvr_formats.c [new file with mode: 0644]
src/imagination/vulkan/pvr_formats.h [new file with mode: 0644]
src/imagination/vulkan/pvr_hw_pass.c [new file with mode: 0644]
src/imagination/vulkan/pvr_hw_pass.h [new file with mode: 0644]
src/imagination/vulkan/pvr_image.c [new file with mode: 0644]
src/imagination/vulkan/pvr_job_common.c [new file with mode: 0644]
src/imagination/vulkan/pvr_job_common.h [new file with mode: 0644]
src/imagination/vulkan/pvr_job_compute.c [new file with mode: 0644]
src/imagination/vulkan/pvr_job_compute.h [new file with mode: 0644]
src/imagination/vulkan/pvr_job_context.c [new file with mode: 0644]
src/imagination/vulkan/pvr_job_context.h [new file with mode: 0644]
src/imagination/vulkan/pvr_job_render.c [new file with mode: 0644]
src/imagination/vulkan/pvr_job_render.h [new file with mode: 0644]
src/imagination/vulkan/pvr_limits.h [new file with mode: 0644]
src/imagination/vulkan/pvr_pass.c [new file with mode: 0644]
src/imagination/vulkan/pvr_pipeline.c [new file with mode: 0644]
src/imagination/vulkan/pvr_pipeline_cache.c [new file with mode: 0644]
src/imagination/vulkan/pvr_private.h [new file with mode: 0644]
src/imagination/vulkan/pvr_query.c [new file with mode: 0644]
src/imagination/vulkan/pvr_queue.c [new file with mode: 0644]
src/imagination/vulkan/pvr_shader.c [new file with mode: 0644]
src/imagination/vulkan/pvr_shader.h [new file with mode: 0644]
src/imagination/vulkan/pvr_tex_state.c [new file with mode: 0644]
src/imagination/vulkan/pvr_tex_state.h [new file with mode: 0644]
src/imagination/vulkan/pvr_wsi.c [new file with mode: 0644]
src/imagination/vulkan/usc/programs/pvr_cdm_load_sr.h [new file with mode: 0644]
src/imagination/vulkan/usc/programs/pvr_end_of_tile.h [new file with mode: 0644]
src/imagination/vulkan/usc/programs/pvr_usc_compute_shader.h [new file with mode: 0644]
src/imagination/vulkan/usc/programs/pvr_usc_fragment_shader.h [new file with mode: 0644]
src/imagination/vulkan/usc/programs/pvr_vdm_load_sr.h [new file with mode: 0644]
src/imagination/vulkan/usc/programs/pvr_vdm_store_sr.h [new file with mode: 0644]
src/imagination/vulkan/vk_format.h [new file with mode: 0644]
src/imagination/vulkan/winsys/powervr/pvr_drm.c [new file with mode: 0644]
src/imagination/vulkan/winsys/powervr/pvr_drm_public.h [new file with mode: 0644]
src/imagination/vulkan/winsys/pvr_winsys.c [new file with mode: 0644]
src/imagination/vulkan/winsys/pvr_winsys.h [new file with mode: 0644]
src/imagination/vulkan/winsys/pvr_winsys_helper.c [new file with mode: 0644]
src/imagination/vulkan/winsys/pvr_winsys_helper.h [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif.h [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif_rf.h [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif_shared.h [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.h [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bo.c [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bo.h [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.c [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.h [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_common.h [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.h [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.h [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_public.h [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_syncobj.c [new file with mode: 0644]
src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_syncobj.h [new file with mode: 0644]
src/meson.build

index 55a5aa5..1eedf46 100644 (file)
@@ -109,6 +109,11 @@ meson.build @dbaker @eric
 # Freedreno
 /src/gallium/drivers/freedreno/ @robclark
 
+# Imagination
+/include/drm-uapi/pvr_drm.h @CreativeCylon @frankbinns @rajnesh-kanwal
+/src/imagination/ @CreativeCylon @frankbinns @rajnesh-kanwal
+/src/imagination/rogue/ @simon-perretta-img
+
 # Intel
 /include/drm-uapi/i915_drm.h @kwg @llandwerlin @jekstrand @idr
 /include/pci_ids/i*_pci_ids.h @kwg @llandwerlin @jekstrand @idr
index 2416594..764381c 100644 (file)
@@ -272,6 +272,8 @@ with_swrast_vk = _vulkan_drivers.contains('swrast')
 with_virtio_vk = _vulkan_drivers.contains('virtio-experimental')
 with_freedreno_kgsl = get_option('freedreno-kgsl')
 with_broadcom_vk = _vulkan_drivers.contains('broadcom')
+with_imagination_vk = _vulkan_drivers.contains('imagination-experimental')
+with_imagination_srv = get_option('imagination-srv')
 with_any_vk = _vulkan_drivers.length() != 0
 
 with_any_broadcom = with_gallium_vc4 or with_gallium_v3d or with_broadcom_vk
index e5df473..9fe70b1 100644 (file)
@@ -192,7 +192,7 @@ option(
   'vulkan-drivers',
   type : 'array',
   value : ['auto'],
-  choices : ['auto', 'amd', 'broadcom', 'freedreno', 'intel', 'panfrost', 'swrast', 'virtio-experimental'],
+  choices : ['auto', 'amd', 'broadcom', 'freedreno', 'imagination-experimental', 'intel', 'panfrost', 'swrast', 'virtio-experimental'],
   description : 'List of vulkan drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
 )
 option(
@@ -202,6 +202,12 @@ option(
   description : 'use kgsl backend for freedreno vulkan driver',
 )
 option(
+  'imagination-srv',
+  type : 'boolean',
+  value : false,
+  description : 'Enable Services backend for Imagination Technologies vulkan driver',
+)
+option(
   'shader-cache',
   type : 'combo',
   value : 'auto',
diff --git a/src/imagination/.clang-format b/src/imagination/.clang-format
new file mode 100644 (file)
index 0000000..db00dca
--- /dev/null
@@ -0,0 +1,247 @@
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+Language: Cpp
+Standard: c++11
+
+UseCRLF: false
+ColumnLimit: 80
+
+DeriveLineEnding: false
+DerivePointerAlignment: false
+ExperimentalAutoDetectBinPacking: false
+
+DisableFormat: false
+
+########
+# Tabs #
+########
+UseTab: Never
+TabWidth: 3
+
+ConstructorInitializerIndentWidth: 6
+ContinuationIndentWidth: 3
+
+IndentWidth: 3
+#IndentCaseBlocks: true  # Requires clang-11
+IndentCaseLabels: false
+#IndentExternBlock: NoIndent  # Requires clang-11
+IndentGotoLabels: false
+IndentPPDirectives: AfterHash
+IndentWrappedFunctionNames: false
+AccessModifierOffset: -4  # -IndentWidth
+
+NamespaceIndentation: None
+
+##########
+# Braces #
+##########
+AlignAfterOpenBracket: Align
+AllowAllArgumentsOnNextLine: false
+AllowAllConstructorInitializersOnNextLine: false
+AllowAllParametersOfDeclarationOnNextLine: false
+BinPackArguments: false
+BinPackParameters: false
+
+Cpp11BracedListStyle: false
+
+########################
+# Whitespace Alignment #
+########################
+AlignConsecutiveAssignments: false
+#AlignConsecutiveBitFields: false  # Requires clang-11
+AlignConsecutiveDeclarations: false
+AlignConsecutiveMacros: false
+AlignTrailingComments: false
+
+AlignEscapedNewlines: Left
+
+#AlignOperands: Align  # Requires clang-11
+#BitFieldColonSpacing: Both  # Requires clang-12
+
+PointerAlignment: Right
+#SpaceAroundPointerQualifiers: Both  # Requires clang-12
+
+SpaceAfterCStyleCast: false
+SpaceAfterLogicalNot: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: ControlStatements
+#SpaceBeforeParens: ControlStatementsExceptForEachMacros  # Requires clang-11
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceBeforeSquareBrackets: false
+SpaceInEmptyBlock: false
+SpaceInEmptyParentheses: false
+SpacesInAngles: false
+SpacesInCStyleCastParentheses: false
+SpacesInConditionalStatement: false
+SpacesInContainerLiterals: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+SpacesBeforeTrailingComments: 2
+
+############################
+# Multi-line constructions #
+############################
+AllowShortBlocksOnASingleLine: Empty
+AllowShortCaseLabelsOnASingleLine: false
+#AllowShortEnumsOnASingleLine: false  # Requires clang-11
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: Never
+AllowShortLambdasOnASingleLine: All
+AllowShortLoopsOnASingleLine: false
+
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: false
+AlwaysBreakTemplateDeclarations: Yes
+
+BreakBeforeBraces: Custom
+BraceWrapping:
+  AfterCaseLabel: false
+  AfterClass: false
+  AfterControlStatement: Never
+  AfterEnum: false
+  AfterFunction: true
+  AfterNamespace: true
+  AfterObjCDeclaration: false
+  AfterStruct: false
+  AfterUnion: false
+  BeforeCatch: false
+  BeforeElse: false
+#  BeforeLambdaBody: false  # Requires clang-11
+  IndentBraces: false
+  SplitEmptyFunction: true
+  SplitEmptyNamespace: true
+  SplitEmptyRecord: true
+
+BreakBeforeBinaryOperators: None
+BreakBeforeTernaryOperators: true
+
+BreakConstructorInitializers: AfterColon
+BreakInheritanceList: AfterColon
+
+BreakStringLiterals: false
+
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+
+#InsertTrailingCommas: Wrapped  # Requires clang-11
+
+KeepEmptyLinesAtTheStartOfBlocks: false
+MaxEmptyLinesToKeep: 1
+
+SortUsingDeclarations: true
+
+############
+# Includes #
+############
+# TODO: Temporary config
+IncludeBlocks: Preserve
+SortIncludes: false
+# TODO: This requires additional work to clean up headers & includes first
+#IncludeBlocks: Regroup
+#SortIncludes: true
+#IncludeIsMainRegex: '(_test)?$'
+##IncludeIsMainSourceRegex: <default>
+#IncludeCategories:
+#  - Regex:        '^"'
+#    Priority:     1
+
+############
+# Comments #
+############
+FixNamespaceComments: false
+
+#############
+# Penalties #
+#############
+# Taken from torvalds/kernel:.clang-format
+PenaltyBreakAssignment: 10
+PenaltyBreakBeforeFirstCallParameter: 30
+PenaltyBreakComment: 10
+PenaltyBreakFirstLessLess: 0
+PenaltyBreakString: 10
+PenaltyBreakTemplateDeclaration: 10
+PenaltyExcessCharacter: 100
+PenaltyReturnTypeOnItsOwnLine: 60
+
+#######################
+# User-defined macros #
+#######################
+CommentPragmas: '^ IWYU pragma:'
+
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+
+#AttributeMacros: []  # Requires clang-12
+
+ForEachMacros: [
+  'foreach_instr',
+  'foreach_instr_safe',
+  'hash_table_foreach',
+  'LIST_FOR_EACH_ENTRY',
+  'LIST_FOR_EACH_ENTRY_FROM',
+  'LIST_FOR_EACH_ENTRY_FROM_REV',
+  'LIST_FOR_EACH_ENTRY_SAFE',
+  'LIST_FOR_EACH_ENTRY_SAFE_REV',
+  'list_for_each_entry',
+  'list_for_each_entry_from',
+  'list_for_each_entry_from_rev',
+  'list_for_each_entry_from_safe',
+  'list_for_each_entry_rev',
+  'list_for_each_entry_safe',
+  'list_for_each_entry_safe_rev',
+  'list_pair_for_each_entry',
+  'pvr_csb_emit',
+  'pvr_csb_emit_merge',
+  'pvr_csb_pack',
+  'nir_foreach_block',
+  'nir_foreach_block_safe',
+  'nir_foreach_function',
+  'nir_foreach_instr',
+  'nir_foreach_instr_safe',
+  'nir_foreach_shader_in_variable',
+  'nir_foreach_shader_out_variable',
+  'nir_foreach_use',
+  'nir_foreach_use_safe',
+  'nir_foreach_variable_with_modes',
+  'u_vector_foreach',
+  'util_dynarray_foreach',
+  'vk_foreach_struct',
+  'vk_foreach_struct_const',
+# FIXME: vk_outarray_append doesn't fit here, remove
+# it when a better solution exists for it.
+  'vk_outarray_append'
+]
+
+NamespaceMacros: [
+]
+
+StatementMacros: [
+]
+
+TypenameMacros: [
+]
+
+#WhitespaceSensitiveMacros: []  # Requires clang-11
diff --git a/src/imagination/.dir-locals.el b/src/imagination/.dir-locals.el
new file mode 100644 (file)
index 0000000..05e2a13
--- /dev/null
@@ -0,0 +1,9 @@
+((nil . ((show-trailing-whitespace . t)))
+ (prog-mode
+  (indent-tabs-mode . nil)
+  (tab-width . 3)
+  (c-basic-offset . 3)
+  (c-file-style . "linux")
+  (fill-column . 80)
+  )
+ )
diff --git a/src/imagination/.editorconfig b/src/imagination/.editorconfig
new file mode 100644 (file)
index 0000000..d98c43a
--- /dev/null
@@ -0,0 +1,2 @@
+[*.{c,h,cpp,hpp,cc,hh}]
+max_line_length = 80
diff --git a/src/imagination/common/meson.build b/src/imagination/common/meson.build
new file mode 100644 (file)
index 0000000..6948c8d
--- /dev/null
@@ -0,0 +1,33 @@
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+libpowervr_common = static_library(
+  'powervr_common',
+  [
+    'pvr_device_info.c',
+  ],
+  include_directories : [
+    inc_include,
+    inc_src,
+  ],
+  c_args : [no_override_init_args],
+  gnu_symbol_visibility : 'hidden',
+)
diff --git a/src/imagination/common/pvr_device_info.c b/src/imagination/common/pvr_device_info.c
new file mode 100644 (file)
index 0000000..342dba4
--- /dev/null
@@ -0,0 +1,294 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* TODO: This file is currently hand-maintained. However, the intention is to
+ * auto-generate it in the future based on the hwdefs.
+ */
+
+#include "assert.h"
+#include "errno.h"
+#include "pvr_device_info.h"
+
+const struct pvr_device_ident pvr_device_ident_4_V_2_51 = {
+   .device_id = 0x6250,
+   .series_name = "Rogue",
+   .public_name = "GX6250",
+};
+
+const struct pvr_device_features pvr_device_features_4_V_2_51 = {
+   .has_astc = true,
+   .has_cluster_grouping = true,
+   .has_common_store_size_in_dwords = true,
+   .has_compute = true,
+   .has_compute_morton_capable = true,
+   .has_compute_overlap = true,
+   .has_eight_output_registers = true,
+   .has_gs_rta_support = true,
+   .has_isp_max_tiles_in_flight = true,
+   .has_isp_samples_per_pixel = true,
+   .has_max_multisample = true,
+   .has_max_partitions = true,
+   .has_max_usc_tasks = true,
+   .has_num_clusters = true,
+   .has_num_raster_pipes = true,
+   .has_num_user_clip_planes = true,
+   .has_robust_buffer_access = true,
+   .has_slc_cache_line_size_bits = true,
+   .has_slc_mcu_cache_controls = true,
+   .has_tile_size_x = true,
+   .has_tile_size_y = true,
+   .has_tpu_array_textures = true,
+   .has_tpu_extended_integer_lookup = true,
+   .has_tpu_image_state_v2 = true,
+   .has_usc_f16sop_u8 = true,
+   .has_usc_min_output_registers_per_pix = true,
+   .has_uvs_banks = true,
+   .has_uvs_pba_entries = true,
+   .has_uvs_vtx_entries = true,
+   .has_vdm_cam_size = true,
+   .has_xt_top_infrastructure = true,
+   .has_zls_subtile = true,
+
+   .common_store_size_in_dwords = 1280U * 4U * 4U,
+   .isp_max_tiles_in_flight = 4U,
+   .isp_samples_per_pixel = 2U,
+   .max_multisample = 8U,
+   .max_partitions = 8U,
+   .max_usc_tasks = 56U,
+   .num_clusters = 2U,
+   .num_raster_pipes = 1U,
+   .num_user_clip_planes = 8U,
+   .slc_cache_line_size_bits = 512U,
+   .tile_size_x = 32U,
+   .tile_size_y = 32U,
+   .usc_min_output_registers_per_pix = 2U,
+   .uvs_banks = 8U,
+   .uvs_pba_entries = 320U,
+   .uvs_vtx_entries = 288U,
+   .vdm_cam_size = 256U,
+};
+
+const struct pvr_device_enhancements pvr_device_enhancements_4_40_2_51 = {
+   .has_ern35421 = true,
+   .has_ern38020 = true,
+   .has_ern38748 = true,
+   .has_ern42307 = true,
+};
+
+const struct pvr_device_quirks pvr_device_quirks_4_40_2_51 = {
+   .has_brn44079 = true,
+   .has_brn47727 = true,
+   .has_brn48492 = true,
+   .has_brn48545 = true,
+   .has_brn49032 = true,
+   .has_brn51210 = true,
+   .has_brn51764 = true,
+   .has_brn52354 = true,
+   .has_brn52942 = true,
+   .has_brn56279 = true,
+   .has_brn58839 = true,
+   .has_brn62269 = true,
+   .has_brn66011 = true,
+   .has_brn70165 = true,
+};
+
+const struct pvr_device_ident pvr_device_ident_33_V_11_3 = {
+   .device_id = 0x33011003,
+   .series_name = "A-Series",
+   .public_name = "AXE-1-16M",
+};
+
+const struct pvr_device_features pvr_device_features_33_V_11_3 = {
+   .has_common_store_size_in_dwords = true,
+   .has_compute = true,
+   .has_isp_max_tiles_in_flight = true,
+   .has_isp_samples_per_pixel = true,
+   .has_max_multisample = true,
+   .has_max_partitions = true,
+   .has_max_usc_tasks = true,
+   .has_num_clusters = true,
+   .has_num_raster_pipes = true,
+   .has_num_user_clip_planes = true,
+   .has_roguexe = true,
+   .has_screen_size8K = true,
+   .has_simple_internal_parameter_format = true,
+   .has_simple_internal_parameter_format_v2 = true,
+   .has_simple_parameter_format_version = true,
+   .has_slc_cache_line_size_bits = true,
+   .has_tile_size_x = true,
+   .has_tile_size_y = true,
+   .has_tile_size_16x16 = true,
+   .has_tpu_extended_integer_lookup = true,
+   .has_tpu_image_state_v2 = true,
+   .has_usc_f16sop_u8 = true,
+   .has_usc_min_output_registers_per_pix = true,
+   .has_usc_pixel_partition_mask = true,
+   .has_uvs_banks = true,
+   .has_uvs_pba_entries = true,
+   .has_uvs_vtx_entries = true,
+   .has_vdm_cam_size = true,
+
+   .common_store_size_in_dwords = 512U * 4U * 4U,
+   .isp_max_tiles_in_flight = 1U,
+   .isp_samples_per_pixel = 1U,
+   .max_multisample = 4U,
+   .max_partitions = 4U,
+   .max_usc_tasks = 24U,
+   .num_clusters = 1U,
+   .num_raster_pipes = 1U,
+   .num_user_clip_planes = 8U,
+   .simple_parameter_format_version = 2U,
+   .slc_cache_line_size_bits = 512U,
+   .tile_size_x = 16U,
+   .tile_size_y = 16U,
+   .usc_min_output_registers_per_pix = 1U,
+   .uvs_banks = 2U,
+   .uvs_pba_entries = 320U,
+   .uvs_vtx_entries = 288U,
+   .vdm_cam_size = 32U,
+
+   .has_s8xe = true,
+};
+
+const struct pvr_device_enhancements pvr_device_enhancements_33_15_11_3 = {
+   .has_ern35421 = true,
+   .has_ern38748 = true,
+   .has_ern42307 = true,
+   .has_ern45493 = true,
+};
+
+const struct pvr_device_quirks pvr_device_quirks_33_15_11_3 = {
+   .has_brn70165 = true,
+};
+
+const struct pvr_device_ident pvr_device_ident_36_V_104_796 = {
+   .device_id = 0x36104796,
+   .series_name = "B-Series",
+   .public_name = "BXS-4-64",
+};
+
+const struct pvr_device_features pvr_device_features_36_V_104_796 = {
+   .has_astc = true,
+   .has_common_store_size_in_dwords = true,
+   .has_compute = true,
+   .has_compute_overlap = true,
+   .has_gpu_multicore_support = true,
+   .has_gs_rta_support = true,
+   .has_isp_max_tiles_in_flight = true,
+   .has_isp_samples_per_pixel = true,
+   .has_max_multisample = true,
+   .has_max_partitions = true,
+   .has_max_usc_tasks = true,
+   .has_num_clusters = true,
+   .has_num_raster_pipes = true,
+   .has_num_user_clip_planes = true,
+   .has_paired_tiles = true,
+   .has_pds_ddmadt = true,
+   .has_roguexe = true,
+   .has_screen_size8K = true,
+   .has_simple_internal_parameter_format = true,
+   .has_simple_internal_parameter_format_v2 = true,
+   .has_simple_parameter_format_version = true,
+   .has_slc_cache_line_size_bits = true,
+   .has_tile_size_x = true,
+   .has_tile_size_y = true,
+   .has_tile_size_16x16 = true,
+   .has_tpu_extended_integer_lookup = true,
+   .has_tpu_image_state_v2 = true,
+   .has_usc_f16sop_u8 = true,
+   .has_usc_min_output_registers_per_pix = true,
+   .has_usc_pixel_partition_mask = true,
+   .has_uvs_banks = true,
+   .has_uvs_pba_entries = true,
+   .has_uvs_vtx_entries = true,
+   .has_vdm_cam_size = true,
+   .has_xpu_max_slaves = true,
+
+   .common_store_size_in_dwords = 1344U * 4U * 4U,
+   .isp_max_tiles_in_flight = 6U,
+   .isp_samples_per_pixel = 4U,
+   .max_multisample = 4U,
+   .max_partitions = 16U,
+   .max_usc_tasks = 156U,
+   .num_clusters = 1U,
+   .num_raster_pipes = 1U,
+   .num_user_clip_planes = 8U,
+   .simple_parameter_format_version = 2U,
+   .slc_cache_line_size_bits = 512U,
+   .tile_size_x = 16U,
+   .tile_size_y = 16U,
+   .usc_min_output_registers_per_pix = 2U,
+   .uvs_banks = 8U,
+   .uvs_pba_entries = 160U,
+   .uvs_vtx_entries = 144U,
+   .vdm_cam_size = 64U,
+   .xpu_max_slaves = 3U,
+
+   .has_s8xe = true,
+};
+
+const struct pvr_device_enhancements pvr_device_enhancements_36_53_104_796 = {
+   .has_ern35421 = true,
+   .has_ern38748 = true,
+   .has_ern42307 = true,
+   .has_ern45493 = true,
+};
+
+const struct pvr_device_quirks pvr_device_quirks_36_53_104_796 = {
+   .has_brn44079 = true,
+   .has_brn70165 = true,
+};
+
+/**
+ * Initialize PowerVR device information.
+ *
+ * \param info Device info structure to initialize.
+ * \param bvnc Packed BVNC.
+ * \return
+ *  * 0 on success, or
+ *  * -%ENODEV if the device is not supported.
+ */
+int pvr_device_info_init(struct pvr_device_info *info, uint64_t bvnc)
+{
+#define CASE_PACKED_BVNC_DEVICE_INFO(_b, _v, _n, _c)                          \
+   case PVR_BVNC_PACK(_b, _v, _n, _c):                                        \
+      info->ident = pvr_device_ident_##_b##_V_##_n##_##_c;                    \
+      info->ident.b = _b;                                                     \
+      info->ident.n = _n;                                                     \
+      info->ident.v = _v;                                                     \
+      info->ident.c = _c;                                                     \
+      info->features = pvr_device_features_##_b##_V_##_n##_##_c;              \
+      info->enhancements = pvr_device_enhancements_##_b##_##_v##_##_n##_##_c; \
+      info->quirks = pvr_device_quirks_##_b##_##_v##_##_n##_##_c;             \
+      return 0
+
+   switch (bvnc) {
+      CASE_PACKED_BVNC_DEVICE_INFO(4, 40, 2, 51);
+   }
+
+#undef CASE_PACKED_BVNC_DEVICE_INFO
+
+   assert(!"Unsupported Device");
+
+   return -ENODEV;
+}
diff --git a/src/imagination/common/pvr_device_info.h b/src/imagination/common/pvr_device_info.h
new file mode 100644 (file)
index 0000000..c192d58
--- /dev/null
@@ -0,0 +1,381 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DEVICE_INFO_H
+#define PVR_DEVICE_INFO_H
+
+/* TODO: This file is currently hand-maintained. However, the intention is to
+ * auto-generate it in the future based on the hwdefs.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "util/log.h"
+#include "util/macros.h"
+
+#define PVR_BVNC_PACK_SHIFT_B 48
+#define PVR_BVNC_PACK_SHIFT_V 32
+#define PVR_BVNC_PACK_SHIFT_N 16
+#define PVR_BVNC_PACK_SHIFT_C 0
+
+#define PVR_BVNC_PACK_MASK_B UINT64_C(0xFFFF000000000000)
+#define PVR_BVNC_PACK_MASK_V UINT64_C(0x0000FFFF00000000)
+#define PVR_BVNC_PACK_MASK_N UINT64_C(0x00000000FFFF0000)
+#define PVR_BVNC_PACK_MASK_C UINT64_C(0x000000000000FFFF)
+
+/**
+ * Packs B, V, N and C values into a 64-bit unsigned integer.
+ *
+ * The packed layout is as follows:
+ *
+ * \verbatim
+ *    +--------+--------+--------+-------+
+ *    | 63..48 | 47..32 | 31..16 | 15..0 |
+ *    +========+========+========+=======+
+ *    | B      | V      | N      | C     |
+ *    +--------+--------+--------+-------+
+ * \endverbatim
+ *
+ * #pvr_get_packed_bvnc() should be used instead of this macro when a
+ * #pvr_device_information is available in order to ensure proper type checking.
+ *
+ * \param b Branch ID.
+ * \param v Version ID.
+ * \param n Number of scalable units.
+ * \param c Config ID.
+ * \return Packed BVNC.
+ *
+ * \sa #pvr_get_packed_bvnc(), #PVR_BVNC_UNPACK_B(), #PVR_BVNC_UNPACK_V(),
+ * #PVR_BVNC_UNPACK_N() and #PVR_BVNC_UNPACK_C()
+ */
+#define PVR_BVNC_PACK(b, v, n, c)                                       \
+   ((((uint64_t)(b) << PVR_BVNC_PACK_SHIFT_B) & PVR_BVNC_PACK_MASK_B) | \
+    (((uint64_t)(v) << PVR_BVNC_PACK_SHIFT_V) & PVR_BVNC_PACK_MASK_V) | \
+    (((uint64_t)(n) << PVR_BVNC_PACK_SHIFT_N) & PVR_BVNC_PACK_MASK_N) | \
+    (((uint64_t)(c) << PVR_BVNC_PACK_SHIFT_C) & PVR_BVNC_PACK_MASK_C))
+
+/**
+ * Unpacks B value (branch ID) from packed BVNC.
+ *
+ * \param bvnc Packed BVNC.
+ * \return Branch ID.
+ *
+ * \sa #PVR_BVNC_UNPACK_V(), #PVR_BVNC_UNPACK_N(), #PVR_BVNC_UNPACK_C(),
+ * #pvr_get_packed_bvnc() and #PVR_BVNC_PACK()
+ */
+#define PVR_BVNC_UNPACK_B(bvnc) \
+   ((uint16_t)(((bvnc)&PVR_BVNC_PACK_MASK_B) >> PVR_BVNC_PACK_SHIFT_B))
+
+/**
+ * Unpacks V value (version ID) from packed BVNC.
+ *
+ * \param bvnc Packed BVNC.
+ * \return Version ID.
+ *
+ * \sa #PVR_BVNC_UNPACK_B(), #PVR_BVNC_UNPACK_N(), #PVR_BVNC_UNPACK_C(),
+ * #pvr_get_packed_bvnc() and #PVR_BVNC_PACK()
+ */
+#define PVR_BVNC_UNPACK_V(bvnc) \
+   ((uint16_t)(((bvnc)&PVR_BVNC_PACK_MASK_V) >> PVR_BVNC_PACK_SHIFT_V))
+
+/**
+ * Unpacks N value (number of scalable units) from packed BVNC.
+ *
+ * \param bvnc Packed BVNC.
+ * \return Number of scalable units.
+ *
+ * \sa #PVR_BVNC_UNPACK_B(), #PVR_BVNC_UNPACK_V(), #PVR_BVNC_UNPACK_C(),
+ * #pvr_get_packed_bvnc() and #PVR_BVNC_PACK()
+ */
+#define PVR_BVNC_UNPACK_N(bvnc) \
+   ((uint16_t)(((bvnc)&PVR_BVNC_PACK_MASK_N) >> PVR_BVNC_PACK_SHIFT_N))
+
+/**
+ * Unpacks C value (config ID) from packed BVNC.
+ *
+ * \param bvnc Packed BVNC.
+ * \return Config ID.
+ *
+ * \sa #PVR_BVNC_UNPACK_B(), #PVR_BVNC_UNPACK_V(), #PVR_BVNC_UNPACK_N(),
+ * #pvr_get_packed_bvnc() and #PVR_BVNC_PACK()
+ */
+#define PVR_BVNC_UNPACK_C(bvnc) \
+   ((uint16_t)(((bvnc)&PVR_BVNC_PACK_MASK_C) >> PVR_BVNC_PACK_SHIFT_C))
+
+/**
+ * Tests whether a physical device has a given feature.
+ *
+ * Feature names are derived from those found in #pvr_device_features by
+ * dropping the 'has_' prefix, which is applied by this macro.
+ *
+ * \param dev_info #pvr_device_info object associated with the target physical
+ *                 device.
+ * \param feature  Device feature name.
+ *
+ * \return
+ *  * true if the named feature is present in the hardware.
+ *  * false if the named feature is not present in the hardware.
+ *
+ * \sa #PVR_FEATURE_VALUE() and #PVR_GET_FEATURE_VALUE()
+ */
+#define PVR_HAS_FEATURE(dev_info, feature) ((dev_info)->features.has_##feature)
+
+/**
+ * Gets a physical device feature value if feature is supported.
+ *
+ * Feature names are derived from those found in #pvr_device_features by
+ * dropping the 'has_' prefix.
+ *
+ * This macro should be used in preference to #PVR_GET_FEATURE_VALUE() as it has
+ * proper error handling.
+ *
+ * \param dev_info  #pvr_device_info object associated with the target physical
+ *                  device.
+ * \param feature   Feature name.
+ * \param value_out Feature value.
+ *
+ * \return
+ *  * 0 on success, or
+ *  * -%EINVAL if the named feature is not present in the hardware.
+ *
+ * \sa #PVR_HAS_FEATURE() and #PVR_GET_FEATURE_VALUE()
+ */
+#define PVR_FEATURE_VALUE(dev_info, feature, value_out)    \
+   ({                                                      \
+      const struct pvr_device_info *__dev_info = dev_info; \
+      int __ret = -EINVAL;                                 \
+      if (__dev_info->features.has_##feature) {            \
+         *(value_out) = __dev_info->features.feature;      \
+         __ret = 0;                                        \
+      }                                                    \
+      __ret;                                               \
+   })
+
+/**
+ * Gets a physical device feature value if supported, but otherwise returns a
+ * default value.
+ *
+ * Feature names are derived from those found in #pvr_device_features by
+ * dropping the 'has_' prefix.
+ *
+ * #PVR_FEATURE_VALUE() should be used in preference to this macro when errors
+ * can be returned by the caller. This macro is intended for cases where errors
+ * can't be returned.
+ *
+ * \param dev_info      #pvr_device_info object associated with the target
+ *                      physical device.
+ * \param feature       Feature name.
+ * \param default_value Default feature value.
+ *
+ * \return Feature value.
+ *
+ * \sa #PVR_HAS_FEATURE() and #PVR_FEATURE_VALUE()
+ */
+#define PVR_GET_FEATURE_VALUE(dev_info, feature, default_value)     \
+   ({                                                               \
+      const struct pvr_device_info *__dev_info = dev_info;          \
+      __typeof__(default_value) __ret = default_value;              \
+      if (__dev_info->features.has_##feature) {                     \
+         __ret = __dev_info->features.feature;                      \
+      } else {                                                      \
+         mesa_logw("Missing " #feature                              \
+                   " feature (defaulting to: " #default_value ")"); \
+         assert(0);                                                 \
+      }                                                             \
+      __ret;                                                        \
+   })
+
+/**
+ * Tests whether a physical device has a given enhancement.
+ *
+ * Enhancement numbers are derived from those found in #pvr_device_enhancements
+ * by dropping the 'has_ern' prefix, which is applied by this macro.
+ *
+ * \param dev_info #pvr_device_info object associated with the target physical
+ *                 device.
+ * \param number   Enhancement number.
+ *
+ * \return
+ *  * true if the enhancement is present in the hardware.
+ *  * false if the enhancement is not present in the hardware.
+ */
+#define PVR_HAS_ERN(dev_info, number) ((dev_info)->enhancements.has_ern##number)
+
+/**
+ * Tests whether a physical device has a given quirk.
+ *
+ * Quirk numbers are derived from those found in #pvr_device_quirks by
+ * dropping the 'has_brn' prefix, which is applied by this macro.
+ *
+ * \param dev_info #pvr_device_info object associated with the target physical
+ *                 device.
+ * \param number   Quirk number.
+ *
+ * \return
+ *  * true if the quirk is present in the hardware.
+ *  * false if the quirk is not present in the hardware.
+ */
+#define PVR_HAS_QUIRK(dev_info, number) ((dev_info)->quirks.has_brn##number)
+
+struct pvr_device_ident {
+   uint16_t b, v, n, c;
+   uint32_t device_id;
+   const char *series_name;
+   const char *public_name;
+};
+
+struct pvr_device_features {
+   bool has_astc : 1;
+   bool has_cluster_grouping : 1;
+   bool has_common_store_size_in_dwords : 1;
+   bool has_compute : 1;
+   bool has_compute_morton_capable : 1;
+   bool has_compute_overlap : 1;
+   bool has_eight_output_registers : 1;
+   bool has_gpu_multicore_support : 1;
+   bool has_gs_rta_support : 1;
+   bool has_isp_max_tiles_in_flight : 1;
+   bool has_isp_samples_per_pixel : 1;
+   bool has_max_multisample : 1;
+   bool has_max_partitions : 1;
+   bool has_max_usc_tasks : 1;
+   bool has_num_clusters : 1;
+   bool has_num_raster_pipes : 1;
+   bool has_num_user_clip_planes : 1;
+   bool has_paired_tiles : 1;
+   bool has_pds_ddmadt : 1;
+   bool has_robust_buffer_access : 1;
+   bool has_roguexe : 1;
+   bool has_screen_size8K : 1;
+   bool has_simple_internal_parameter_format : 1;
+   bool has_simple_internal_parameter_format_v2 : 1;
+   bool has_simple_parameter_format_version : 1;
+   bool has_slc_cache_line_size_bits : 1;
+   bool has_slc_mcu_cache_controls : 1;
+   bool has_tile_size_x : 1;
+   bool has_tile_size_y : 1;
+   bool has_tile_size_16x16 : 1;
+   bool has_tpu_array_textures : 1;
+   bool has_tpu_extended_integer_lookup : 1;
+   bool has_tpu_image_state_v2 : 1;
+   bool has_usc_f16sop_u8 : 1;
+   bool has_usc_min_output_registers_per_pix : 1;
+   bool has_usc_pixel_partition_mask : 1;
+   bool has_uvs_banks : 1;
+   bool has_uvs_pba_entries : 1;
+   bool has_uvs_vtx_entries : 1;
+   bool has_vdm_cam_size : 1;
+   bool has_xpu_max_slaves : 1;
+   bool has_xt_top_infrastructure : 1;
+   bool has_zls_subtile : 1;
+
+   uint32_t common_store_size_in_dwords;
+   uint32_t isp_max_tiles_in_flight;
+   uint32_t isp_samples_per_pixel;
+   uint32_t max_multisample;
+   uint32_t max_partitions;
+   uint32_t max_usc_tasks;
+   uint32_t num_clusters;
+   uint32_t num_raster_pipes;
+   uint32_t num_user_clip_planes;
+   uint32_t simple_parameter_format_version;
+   uint32_t slc_cache_line_size_bits;
+   uint32_t tile_size_x;
+   uint32_t tile_size_y;
+   uint32_t usc_min_output_registers_per_pix;
+   uint32_t uvs_banks;
+   uint32_t uvs_pba_entries;
+   uint32_t uvs_vtx_entries;
+   uint32_t vdm_cam_size;
+   uint32_t xpu_max_slaves;
+
+   /* Derived features. */
+   bool has_s8xe : 1;
+};
+
+struct pvr_device_enhancements {
+   bool has_ern35421 : 1;
+   bool has_ern38020 : 1;
+   bool has_ern38748 : 1;
+   bool has_ern42307 : 1;
+   bool has_ern45493 : 1;
+};
+
+struct pvr_device_quirks {
+   bool has_brn44079 : 1;
+   bool has_brn47727 : 1;
+   bool has_brn48492 : 1;
+   bool has_brn48545 : 1;
+   bool has_brn49032 : 1;
+   bool has_brn51210 : 1;
+   bool has_brn51764 : 1;
+   bool has_brn52354 : 1;
+   bool has_brn52942 : 1;
+   bool has_brn56279 : 1;
+   bool has_brn58839 : 1;
+   bool has_brn62269 : 1;
+   bool has_brn66011 : 1;
+   bool has_brn70165 : 1;
+};
+
+struct pvr_device_info {
+   struct pvr_device_ident ident;
+   struct pvr_device_features features;
+   struct pvr_device_enhancements enhancements;
+   struct pvr_device_quirks quirks;
+};
+
+/**
+ * Packs B, V, N and C values into a 64-bit unsigned integer.
+ *
+ * The packed layout is as follows:
+ *
+ * \verbatim
+ *    +--------+--------+--------+-------+
+ *    | 63..48 | 47..32 | 31..16 | 15..0 |
+ *    +========+========+========+=======+
+ *    | B      | V      | N      | C     |
+ *    +--------+--------+--------+-------+
+ * \endverbatim
+ *
+ * This should be used in preference to #PVR_BVNC_PACK() when a
+ * #pvr_device_info is available in order to ensure proper type checking.
+ *
+ * \param dev_info Device information.
+ * \return Packed BVNC.
+ */
+static ALWAYS_INLINE uint64_t
+pvr_get_packed_bvnc(const struct pvr_device_info *dev_info)
+{
+   return PVR_BVNC_PACK(dev_info->ident.b,
+                        dev_info->ident.v,
+                        dev_info->ident.n,
+                        dev_info->ident.c);
+}
+
+int pvr_device_info_init(struct pvr_device_info *info, uint64_t bvnc);
+
+#endif /* PVR_DEVICE_INFO_H */
diff --git a/src/imagination/csbgen/gen_pack_header.py b/src/imagination/csbgen/gen_pack_header.py
new file mode 100644 (file)
index 0000000..e6df6c8
--- /dev/null
@@ -0,0 +1,934 @@
+# encoding=utf-8
+
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# based on anv driver gen_pack_header.py which is:
+# Copyright © 2016 Intel Corporation
+
+# based on v3dv driver gen_pack_header.py which is:
+# Copyright (C) 2016 Broadcom
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+import argparse
+import ast
+import xml.parsers.expat
+import re
+import sys
+import copy
+import os
+import textwrap
+
+license = """/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */"""
+
+pack_header = """%(license)s
+
+/* Enums, structures and pack functions for %(platform)s.
+ *
+ * This file has been generated, do not hand edit.
+ */
+
+#ifndef %(guard)s
+#define %(guard)s
+
+#include "csbgen/pvr_packet_helpers.h"
+
+"""
+
+def safe_name(name):
+    if not name[0].isalpha():
+        name = '_' + name
+
+    return name
+
+def num_from_str(num_str):
+    if num_str.lower().startswith('0x'):
+        return int(num_str, base=16)
+
+    if num_str.startswith('0') and len(num_str) > 1:
+        raise ValueError('Octal numbers not allowed')
+
+    return int(num_str)
+
+class Node:
+    def __init__(self, parent, name, name_is_safe = False):
+        self.parent = parent
+        if name_is_safe:
+            self.name = name
+        else:
+            self.name = safe_name(name)
+
+    @property
+    def full_name(self):
+        if self.name[0] == '_':
+            return self.parent.prefix + self.name.upper()
+
+        return self.parent.prefix + "_" + self.name.upper()
+
+    @property
+    def prefix(self):
+        return self.parent.prefix
+
+class Csbgen(Node):
+    def __init__(self, name, prefix, filename):
+        super().__init__(None, name.upper())
+        self.prefix_field = safe_name(prefix.upper())
+        self.filename = filename
+
+        self._defines = []
+        self._enums = {}
+        self._structs = {}
+
+    @property
+    def full_name(self):
+        return self.name + "_" + self.prefix_field
+
+    @property
+    def prefix(self):
+        return self.full_name
+
+    def add(self, element):
+        if isinstance(element, Enum):
+            if element.name in self._enums:
+                raise RuntimeError('Enum redefined. Enum: %s' % element.name)
+
+            self._enums[element.name] = element
+        elif isinstance(element, Struct):
+            if element.name in self._structs:
+                raise RuntimeError('Struct redefined. Struct: %s' % element.name)
+
+            self._structs[element.name] = element
+        elif isinstance(element, Define):
+            define_names = map(lambda d: d.full_name, self._defines)
+            if element.full_name in define_names:
+                raise RuntimeError('Define redefined. Define: %s' % element.full_name)
+
+            self._defines.append(element)
+        else:
+            raise RuntimeError('Element "%s" cannot be nested in csbgen.' %
+                    type(element).__name__)
+
+    def _gen_guard(self):
+        return os.path.basename(self.filename).replace('.xml', '_h').upper()
+
+    def emit(self):
+        print(pack_header % {'license': license,
+                             'platform': self.name,
+                             'guard': self._gen_guard()})
+
+        for define in self._defines:
+            define.emit(self)
+
+        print()
+
+        for enum in self._enums.values():
+            enum.emit(self)
+
+        for struct in self._structs.values():
+            struct.emit(self)
+
+        print('#endif /* %s */' % self._gen_guard())
+
+    def is_known_struct(self, struct_name):
+        return struct_name in self._structs.keys()
+
+    def is_known_enum(self, enum_name):
+        return enum_name in self._enums.keys()
+
+    def get_enum(self, enum_name):
+        return self._enums[enum_name]
+
+class Enum(Node):
+    def __init__(self, parent, name):
+        super().__init__(parent, name)
+
+        self._values = {}
+
+        self.parent.add(self)
+
+    # We override prefix so that the values will contain the enum's name too.
+    @property
+    def prefix(self):
+        return self.full_name
+
+    def get_value(self, value_name):
+        return self._values[value_name]
+
+    def add(self, element):
+        if not isinstance(element, Value):
+            raise RuntimeError('Element cannot be nested in enum. ' +
+                    'Element Type: %s, Enum: %s' %
+                    (type(element).__name__, self.full_name))
+
+        if element.name in self._values:
+            raise RuntimeError('Value is being redefined. Value: "%s"' % element.name)
+
+        self._values[element.name] = element
+
+    def emit(self, root):
+        # This check is invalid if tags other than Value can be nested within an enum.
+        if not self._values.values():
+            raise RuntimeError('Enum definition is empty. Enum: "%s"' % self.full_name)
+
+        print('enum %s {' % self.full_name)
+        for value in self._values.values():
+            value.emit()
+        print('};\n')
+
+class Value(Node):
+    def __init__(self, parent, name, value):
+        super().__init__(parent, name)
+
+        self.value = int(value)
+
+        self.parent.add(self)
+
+    def emit(self):
+            print('    %-36s = %6d,' % (self.full_name, self.value))
+
+class Struct(Node):
+    def __init__(self, parent, name, length):
+        super().__init__(parent, name)
+
+        self.length = int(length)
+        self.size = self.length * 32
+
+        if self.length <= 0:
+            raise ValueError('Struct length must be greater than 0. ' +
+                    'Struct: "%s".' % self.full_name)
+
+        self._children = {}
+
+        self.parent.add(self)
+
+    @property
+    def fields(self):
+        # TODO: Should we cache? See TODO in equivalent Condition getter.
+
+        fields = []
+        for child in self._children.values():
+            if isinstance(child, Condition):
+                fields += child.fields
+            else:
+                fields.append(child)
+
+        return fields
+
+    @property
+    def prefix(self):
+        return self.full_name
+
+    def add(self, element):
+        # We don't support conditions and field having the same name.
+        if isinstance(element, Field):
+            if element.name in self._children.keys():
+                raise ValueError('Field is being redefined. ' +
+                        'Field: "%s", Struct: "%s"' %
+                        (element.name, self.full_name))
+
+            self._children[element.name] = element
+
+        elif isinstance(element, Condition):
+            # We only save ifs, and ignore the rest. The rest will be linked to
+            # the if condition so we just need to call emit() on the if and the
+            # rest will also be emitted.
+            if element.type == 'if':
+                self._children[element.name] = element
+            else:
+                if element.name not in self._children.keys():
+                    raise RuntimeError('Unknown condition: "%s"' % element.name)
+
+        else:
+            raise RuntimeError('Element cannot be nested in struct. ' +
+                    'Element Type: %s, Struct: %s' %
+                    (type(element).__name__, self.full_name))
+
+    def _emit_header(self, root):
+        fields = filter(lambda f: hasattr(f, 'default'), self.fields)
+
+        default_fields = []
+        for field in fields:
+            if field.is_builtin_type:
+                default_fields.append("    .%-35s = %6d" %
+                                      (field.name, field.default))
+            else:
+                if not root.is_known_enum(field.type):
+                    # Default values should not apply to structures
+                    raise RuntimeError('Unknown type. Field: "%s" Type: "%s"' %
+                            (field.name, field.type))
+
+                enum = root.get_enum(field.type)
+
+                try:
+                    value = enum.get_value(field.default)
+                except KeyError:
+                    raise ValueError('Unknown enum value. ' +
+                            'Value: "%s", Enum: "%s", Field: "%s"' %
+                            (field.default, enum.full_name, field.name))
+
+                default_fields.append("    .%-35s = %s" %
+                        (field.name, value.full_name))
+
+        print('#define %-40s\\' % (self.full_name + '_header'))
+        print(",  \\\n".join(default_fields))
+        print('')
+
+    def _emit_helper_macros(self, root):
+        fields_with_defines = filter(lambda f: f.defines, self.fields)
+
+        for field in fields_with_defines:
+            print("/* Helper macros for %s */" % (field.name))
+
+            for define in field.defines:
+                define.emit(root)
+
+            print()
+
+    def _emit_pack_function(self, root):
+        print(textwrap.dedent("""\
+            static inline __attribute__((always_inline)) void
+            %s_pack(__attribute__((unused)) void * restrict dst,
+                  %s__attribute__((unused)) const struct %s * restrict values)
+            {""") % (self.full_name, ' ' * len(self.full_name), self.full_name))
+
+        group = Group(0, 1, self.size, self.fields)
+        (dwords, length) = group.collect_dwords_and_length()
+        if length:
+            # Cast dst to make header C++ friendly
+            print("    uint32_t * restrict dw = (uint32_t * restrict) dst;")
+
+        group.emit_pack_function(root, dwords, length)
+
+        print("}\n")
+
+
+    def emit(self, root):
+        print('#define %-33s %6d' % (self.full_name + "_length", self.length))
+
+        self._emit_header(root)
+
+        self._emit_helper_macros(root)
+
+        print("struct %s {" % self.full_name)
+        for child in self._children.values():
+                child.emit(root)
+        print("};\n")
+
+        self._emit_pack_function(root)
+
+class Field(Node):
+    def __init__(self, parent, name, start, end, type, default=None, shift=None):
+        super().__init__(parent, name)
+
+        self.start = int(start)
+        self.end = int(end)
+        self.type = type
+
+        self._defines = {}
+
+        self.parent.add(self)
+
+        if self.start > self.end:
+            raise ValueError('Start cannot be after end. ' +
+                    'Start: %d, End: %d, Field: "%s"' %
+                    (self.start, self.end, self.name))
+
+        if self.type == 'bool' and self.end != self.start:
+            raise ValueError('Bool field can only be 1 bit long. ' +
+                    'Field "%s"' % self.name)
+
+        if default is not None:
+            if not self.is_builtin_type:
+                # Assuming it's an enum type.
+                self.default = safe_name(default)
+            else:
+                self.default = num_from_str(default)
+
+        if shift is not None:
+            if self.type != 'address':
+                raise RuntimeError('Only address fields can have a shift ' +
+                        'attribute. Field: "%s"' % self.name)
+
+            self.shift = int(shift)
+
+            Define(self, "ALIGNMENT", 2 ** self.shift)
+        else:
+            if self.type == 'address':
+                raise RuntimeError('Field of address type ' +
+                        'requires a shift attribute. Field "%s"' %
+                        self.name)
+
+    @property
+    def defines(self):
+        return self._defines.values()
+
+    # We override prefix so that the defines will contain the field's name too.
+    @property
+    def prefix(self):
+        return self.full_name
+
+    @property
+    def is_builtin_type(self):
+        builtins = {'address', 'bool', 'float', 'mbo', 'offset', 'int', 'uint'}
+        return self.type in builtins
+
+    def _get_c_type(self, root):
+        if self.type == 'address':
+            return '__pvr_address_type'
+        elif self.type == 'bool':
+            return 'bool'
+        elif self.type == 'float':
+            return 'float'
+        elif self.type == 'offset':
+            return 'uint64_t'
+        elif self.type == 'int':
+            return 'int32_t'
+        elif self.type == 'uint':
+            if self.end - self.start < 32:
+                return 'uint32_t'
+            elif self.end - self.self < 64:
+                return 'uint64_t'
+
+            raise RuntimeError('No known C type found to hold %d bit sized value. ' +
+                    'Field: "%s"' %
+                    (self.end - self.start, self.name))
+        elif root.is_known_struct(self.type):
+            return 'struct ' + self.type
+        elif root.is_known_enum(self.type):
+            return 'enum ' + root.get_enum(self.type).full_name
+        raise RuntimeError('Unknown type. Type: "%s", Field: "%s"' %
+                (self.type, self.name))
+
+    def add(self, element):
+        if self.type == 'mbo':
+            raise RuntimeError('No element can be nested in an mbo field. ' +
+                    'Element Type: %s, Field: %s' %
+                    (type(element).__name__, self.name))
+
+        if isinstance(element, Define):
+            if element.name in self._defines:
+                raise RuntimeError('Duplicate define. Define: "%s"' %
+                        element.name)
+
+            self._defines[element.name] = element
+        else:
+            raise RuntimeError('Element cannot be nested in a field. ' +
+                    'Element Type: %s, Field: %s' %
+                    (type(element).__name__, self.name))
+
+    def emit(self, root):
+        if self.type == 'mbo':
+            return
+
+        print("    %-36s %s;" % (self._get_c_type(root), self.name))
+
+class Define(Node):
+    def __init__(self, parent, name, value):
+        super().__init__(parent, name)
+
+        self.value = value
+
+        self.parent.add(self)
+
+    def emit(self, root):
+        print("#define %-40s %d" % (self.full_name, self.value))
+
+class Condition(Node):
+    def __init__(self, parent, name, type):
+        super().__init__(parent, name, name_is_safe = True)
+
+        self.type = type
+        if not Condition._is_valid_type(self.type):
+            raise RuntimeError('Unknown type: "%s"' % self.name)
+
+        self._children = {}
+
+        # This is the link to the next branch for the if statement so either
+        # elif, else, or endif. They themselves will also have a link to the
+        # next branch up until endif which terminates the chain.
+        self._child_branch = None
+
+        self.parent.add(self)
+
+    @property
+    def fields(self):
+        # TODO: Should we use some kind of state to indicate the all of the
+        # child nodes have been added and then cache the fields in here on the
+        # first call so that we don't have to traverse them again per each call?
+        # The state could be changed wither when we reach the endif and pop from
+        # the context, or when we start emitting.
+
+        fields = []
+
+        for child in self._children.values():
+            if isinstance(child, Condition):
+                fields += child.fields
+            else:
+                fields.append(child)
+
+        if self._child_branch is not None:
+            fields += self._child_branch.fields
+
+        return fields
+
+    def _is_valid_type(type):
+        types = {'if', 'elif', 'else', 'endif'}
+        return type in types
+
+    def _is_compatible_child_branch(self, branch):
+        types = ['if', 'elif', 'else', 'endif']
+        idx = types.index(self.type)
+        return (branch.type in types[idx + 1:] or
+                self.type == 'elif' and branch.type == 'elif')
+
+    def _add_branch(self, branch):
+        if branch.type == 'elif' and branch.name == self.name:
+                raise RuntimeError('Elif branch cannot have same check as previous branch. ' +
+                        'Check: "%s"' % (branch.name))
+
+        if not self._is_compatible_child_branch(branch):
+            raise RuntimeError('Invalid branch. Check: "%s", Type: "%s"' %
+                    (branch.name, branch.type))
+
+        self._child_branch = branch
+
+    # Returns the name of the if condition. This is used for elif branches since
+    # they have a different name than the if condition thus we have to traverse
+    # the chain of branches.
+    # This is used to discriminate nested if conditions from branches since
+    # branches like 'endif' and 'else' will have the same name as the 'if' (the
+    # elif is an exception) while nested conditions will have different names.
+    #
+    # TODO: Redo this to improve speed? Would caching this be helpful? We could
+    # just save the name of the if instead of having to walk towards it whenever
+    # a new condition is being added.
+    def _top_branch_name(self):
+        if self.type == 'if':
+            return self.name
+
+        return self.parent._top_branch_name()
+
+    def add(self, element):
+        if isinstance(element, Field):
+            if element.name in self._children.keys():
+                raise ValueError('Duplicate field. Field: "%s"' % element.name)
+
+            self._children[element.name] = element
+        elif isinstance(element, Condition):
+            if element.type == 'elif' or self._top_branch_name() == element.name:
+                self._add_branch(element)
+            else:
+                if element.type != 'if':
+                    raise RuntimeError('Branch of an unopened if condition. ' +
+                        'Check: "%s", Type: "%s".' % (element.name, element.type))
+
+                # This is a nested condition and we made sure that the name
+                # doesn't match _top_branch_name() so we can recognize the else
+                # and endif.
+                # We recognized the elif by its type however its name differs
+                # from the if condition thus when we add an if condition with
+                # the same name as the elif nested in it, the _top_branch_name()
+                # check doesn't hold true as the name matched the elif and not
+                # the if statement which the elif was a branch of, thus the
+                # nested if condition is not recognized as an invalid branch of
+                # the outer if statement.
+                #   Sample:
+                #   <condition type="if" check="ROGUEXE"/>
+                #       <condition type="elif" check="COMPUTE"/>
+                #           <condition type="if" check="COMPUTE"/>
+                #           <condition type="endif" check="COMPUTE"/>
+                #       <condition type="endif" check="COMPUTE"/>
+                #   <condition type="endif" check="ROGUEXE"/>
+                #
+                # We fix this by checking the if condition name against its
+                # parent.
+                if element.name == self.name:
+                    raise RuntimeError('Invalid if condition. Check: "%s"' %
+                            element.name)
+
+                self._children[element.name] = element
+        else:
+            raise RuntimeError('Element cannot be nested in a condition. ' +
+                    'Element Type: %s, Check: %s' %
+                    (type(element).__name__, self.name))
+
+    def emit(self, root):
+        if self.type == "if":
+            print("/* if %s is supported use: */" % (self.name))
+        elif self.type == "elif":
+            print("/* else if %s is supported use: */" % (self.name))
+        elif self.type == "else":
+            print("/* else %s is not-supported use: */" % (self.name))
+        elif self.type == "endif":
+            print("/* endif %s */" % (self.name))
+            return
+        else:
+            raise RuntimeError('Unknown condition type. Implementation error.')
+
+        for child in self._children.values():
+            child.emit(root)
+
+        self._child_branch.emit(root)
+
+class Group(object):
+    def __init__(self, start, count, size, fields):
+        self.start = start
+        self.count = count
+        self.size = size
+        self.fields = fields
+
+    class DWord:
+        def __init__(self):
+            self.size = 32
+            self.fields = []
+            self.addresses = []
+
+    def collect_dwords(self, dwords, start, dim):
+        for field in self.fields:
+            index = (start + field.start) // 32
+            if index not in dwords:
+                dwords[index] = self.DWord()
+
+            clone = copy.copy(field)
+            clone.start = clone.start + start
+            clone.end = clone.end + start
+            clone.dim = dim
+            dwords[index].fields.append(clone)
+
+            if field.type == "address":
+                # assert dwords[index].address == None
+                dwords[index].addresses.append(clone)
+
+            # Coalesce all the dwords covered by this field. The two cases we
+            # handle are where multiple fields are in a 64 bit word (typically
+            # and address and a few bits) or where a single struct field
+            # completely covers multiple dwords.
+            while index < (start + field.end) // 32:
+                if index + 1 in dwords and \
+                   not dwords[index] == dwords[index + 1]:
+                    dwords[index].fields.extend(dwords[index + 1].fields)
+                    dwords[index].addresses.extend(dwords[index + 1].addresses)
+                dwords[index].size = 64
+                dwords[index + 1] = dwords[index]
+                index = index + 1
+
+    def collect_dwords_and_length(self):
+        dwords = {}
+        self.collect_dwords(dwords, 0, "")
+
+        # Determine number of dwords in this group. If we have a size, use
+        # that, since that'll account for MBZ dwords at the end of a group
+        # (like dword 8 on BDW+ 3DSTATE_HS). Otherwise, use the largest dword
+        # index we've seen plus one.
+        if self.size > 0:
+            length = self.size // 32
+        elif dwords:
+            length = max(dwords.keys()) + 1
+        else:
+            length = 0
+
+        return (dwords, length)
+
+    def emit_pack_function(self, root, dwords, length):
+        for index in range(length):
+            # Handle MBZ dwords
+            if index not in dwords:
+                print("")
+                print("    dw[%d] = 0;" % index)
+                continue
+
+            # For 64 bit dwords, we aliased the two dword entries in the dword
+            # dict it occupies. Now that we're emitting the pack function,
+            # skip the duplicate entries.
+            dw = dwords[index]
+            if index > 0 and index - 1 in dwords and dw == dwords[index - 1]:
+                continue
+
+            # Special case: only one field and it's a struct at the beginning
+            # of the dword. In this case we pack directly into the
+            # destination. This is the only way we handle embedded structs
+            # larger than 32 bits.
+            if len(dw.fields) == 1:
+                field = dw.fields[0]
+                name = field.name + field.dim
+                if root.is_known_struct(field.type) and field.start % 32 == 0:
+                    print("")
+                    print("    %s_pack(data, &dw[%d], &values->%s);" %
+                          (self.parser.gen_prefix(safe_name(field.type)),
+                           index, name))
+                    continue
+
+            # Pack any fields of struct type first so we have integer values
+            # to the dword for those fields.
+            field_index = 0
+            for field in dw.fields:
+                if isinstance(field, Field) and root.is_known_struct(field.type):
+                    name = field.name + field.dim
+                    print("")
+                    print("    uint32_t v%d_%d;" % (index, field_index))
+                    print("    %s_pack(data, &v%d_%d, &values->%s);" %
+                          (self.parser.gen_prefix(safe_name(field.type)),
+                           index, field_index, name))
+                    field_index = field_index + 1
+
+            print("")
+            dword_start = index * 32
+            address_count = len(dw.addresses);
+
+            if dw.size == 32 and not dw.addresses:
+                v = None
+                print("    dw[%d] =" % index)
+            elif len(dw.fields) > address_count:
+                v = "v%d" % index
+                print("    const uint%d_t %s =" % (dw.size, v))
+            else:
+                v = "0"
+
+            field_index = 0
+            non_address_fields = []
+            for field in dw.fields:
+                if field.type != "mbo":
+                    name = field.name + field.dim
+
+                if field.type == "mbo":
+                    non_address_fields.append("__pvr_mbo(%d, %d)" %
+                                              (field.start - dword_start,
+                                               field.end - dword_start))
+                elif field.type == "address":
+                    pass
+                elif field.type == "uint":
+                    non_address_fields.append("__pvr_uint(values->%s, %d, %d)" %
+                                              (name, field.start - dword_start,
+                                               field.end - dword_start))
+                elif root.is_known_enum(field.type):
+                    non_address_fields.append("__pvr_uint(values->%s, %d, %d)" %
+                                              (name, field.start - dword_start,
+                                               field.end - dword_start))
+                elif field.type == "int":
+                    non_address_fields.append("__pvr_sint(values->%s, %d, %d)" %
+                                              (name, field.start - dword_start,
+                                               field.end - dword_start))
+                elif field.type == "bool":
+                    non_address_fields.append("__pvr_uint(values->%s, %d, %d)" %
+                                              (name, field.start - dword_start,
+                                               field.end - dword_start))
+                elif field.type == "float":
+                    non_address_fields.append("__pvr_float(values->%s)" % name)
+                elif field.type == "offset":
+                    non_address_fields.append(
+                        "__pvr_offset(values->%s,"" %d, %d)" %
+                        (name, field.start - dword_start,
+                         field.end - dword_start))
+                elif field.is_struct_type():
+                    non_address_fields.append("__pvr_uint(v%d_%d, %d, %d)" %
+                                              (index, field_index,
+                                               field.start - dword_start,
+                                               field.end - dword_start))
+                    field_index = field_index + 1
+                else:
+                    non_address_fields.append("/* unhandled field %s,"
+                                              " type %s */\n" %
+                                              (name, field.type))
+
+            if non_address_fields:
+                print(" |\n".join("      " + f for f in non_address_fields) +
+                      ";")
+
+            if dw.size == 32:
+                for i in range(address_count):
+                    print("    dw[%d] = __pvr_address("
+                          "values->%s, %d, %d, %d) | %s;" %
+                          (index, dw.addresses[i].name + field.dim,
+                           dw.addresses[i].shift, dw.addresses[i].start - dword_start,
+                           dw.addresses[i].end - dword_start, v))
+                continue
+
+            v_accumulated_addr = ""
+            for i in range(address_count):
+                v_address = "v%d_address" % i
+                v_accumulated_addr += "v%d_address" % i
+                print("    const uint64_t %s =\n     "
+                      " __pvr_address(values->%s, %d, %d, %d);" %
+                      (v_address, dw.addresses[i].name + field.dim, dw.addresses[i].shift,
+                       dw.addresses[i].start - dword_start,
+                       dw.addresses[i].end - dword_start))
+                if i < (address_count - 1):
+                    v_accumulated_addr += " |\n            "
+
+            if dw.addresses:
+                if len(dw.fields) > address_count:
+                    print("    dw[%d] = %s | %s;" % (index, v_accumulated_addr, v))
+                    print("    dw[%d] = (%s >> 32) | (%s >> 32);" %
+                          (index + 1, v_accumulated_addr, v))
+                    continue
+                else:
+                    v = v_accumulated_addr
+
+            print("    dw[%d] = %s;" % (index, v))
+            print("    dw[%d] = %s >> 32;" % (index + 1, v))
+
+class Parser(object):
+    def __init__(self):
+        self.parser = xml.parsers.expat.ParserCreate()
+        self.parser.StartElementHandler = self.start_element
+        self.parser.EndElementHandler = self.end_element
+
+        self.context = []
+
+    def start_element(self, name, attrs):
+        if not name == "csbgen":
+            parent = self.context[-1]
+
+        if name == "csbgen":
+            if self.context:
+                raise RuntimeError('Can only have 1 csbgen block and it has ' +
+                        'to contain all of the other elements.')
+
+            csbgen = Csbgen(attrs["name"], attrs["prefix"], self.filename)
+            self.context.append(csbgen)
+
+        elif name == "struct":
+            struct = Struct(parent , attrs["name"], attrs["length"])
+            self.context.append(struct)
+
+        elif name == "field":
+            default = None
+            if "default" in attrs.keys():
+                default = attrs["default"]
+
+            shift = None
+            if "shift" in attrs.keys():
+                shift = attrs["shift"]
+
+            field = Field(parent,
+                    name = attrs["name"],
+                    start = int(attrs["start"]),
+                    end = int(attrs["end"]),
+                    type = attrs["type"],
+                    default = default,
+                    shift = shift)
+            self.context.append(field)
+
+        elif name == "enum":
+            enum = Enum(parent, attrs["name"])
+            self.context.append(enum)
+
+        elif name == "value":
+            value = Value(parent, attrs["name"], ast.literal_eval(attrs["value"]))
+            self.context.append(value)
+
+        elif name == "define":
+            define = Define(parent, attrs["name"], ast.literal_eval(attrs["value"]))
+            self.context.append(define)
+
+        elif name == "condition":
+            condition = Condition(parent, name=attrs["check"], type=attrs["type"])
+
+            # Starting with the if statement we push it in the context. For each
+            # branch following (elif, and else) we assign the top of stack as
+            # its parent, pop() and push the new condition. So per branch we end
+            # up having [..., struct, condition]. We don't push an endif since
+            # it's not supposed to have any children and it's supposed to close
+            # the whole if statement.
+
+            if condition.type != 'if':
+                # Remove the parent condition from the context. We were peeking
+                # before, now we pop().
+                self.context.pop()
+
+            if condition.type == 'endif':
+                if not isinstance(parent, Condition):
+                    raise RuntimeError('Cannot close unopened or already ' +
+                            'closed condition. Condition: "%s"' % condition.name)
+            else:
+                self.context.append(condition)
+
+        else:
+            raise RuntimeError('Unknown tag: "%s"' % name)
+
+    def end_element(self, name):
+        if name == 'condition':
+            element = self.context[-1]
+            if not isinstance(element, Condition) and not isinstance(element, Struct):
+                raise RuntimeError("Expected condition or struct tag to be closed.")
+
+            return
+
+        element = self.context.pop()
+
+        if name == "struct":
+            if not isinstance(element, Struct):
+                raise RuntimeError("Expected struct tag to be closed.")
+        elif name == "field":
+            if not isinstance(element, Field):
+                raise RuntimeError("Expected field tag to be closed.")
+        elif name == "enum":
+            if not isinstance(element, Enum):
+                raise RuntimeError("Expected enum tag to be closed.")
+        elif name == "value":
+            if not isinstance(element, Value):
+                raise RuntimeError("Expected value tag to be closed.")
+        elif name == "define":
+            if not isinstance(element, Define):
+                raise RuntimeError("Expected define tag to be closed.")
+        elif name == "csbgen":
+            if not isinstance(element, Csbgen):
+                raise RuntimeError("""Expected csbgen tag to be closed.
+                Some tags may have not been closed""")
+
+            element.emit()
+        else:
+            raise RuntimeError('Unknown closing element: "%s"' % name)
+
+    def parse(self, filename):
+        file = open(filename, "rb")
+        self.filename = filename
+        self.parser.ParseFile(file)
+        file.close()
+
+if len(sys.argv) < 2:
+    print("No input xml file specified")
+    sys.exit(1)
+
+input_file = sys.argv[1]
+
+p = Parser()
+p.parse(input_file)
diff --git a/src/imagination/csbgen/meson.build b/src/imagination/csbgen/meson.build
new file mode 100644 (file)
index 0000000..ea6a8ff
--- /dev/null
@@ -0,0 +1,46 @@
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+pvr_xml_files = [
+  'rogue_cdm.xml',
+  'rogue_cr.xml',
+  'rogue_ipf.xml',
+  'rogue_lls.xml',
+  'rogue_pbestate.xml',
+  'rogue_pds.xml',
+  'rogue_ppp.xml',
+  'rogue_texstate.xml',
+  'rogue_vdm.xml',
+]
+
+pvr_xml_pack = []
+foreach f : pvr_xml_files
+  _name = '@0@.h'.format(f.split('.')[0])
+  pvr_xml_pack += custom_target(
+    _name,
+    input : ['gen_pack_header.py', f],
+    output : _name,
+    command : [prog_python, '@INPUT@'],
+    capture : true,
+  )
+endforeach
+
+dep_csbgen = declare_dependency(sources : [pvr_xml_pack])
diff --git a/src/imagination/csbgen/pvr_packet_helpers.h b/src/imagination/csbgen/pvr_packet_helpers.h
new file mode 100644 (file)
index 0000000..9cef132
--- /dev/null
@@ -0,0 +1,176 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_PACKET_HELPERS_H
+#define PVR_PACKET_HELPERS_H
+
+#include <assert.h>
+#include <math.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#ifndef __pvr_validate_value
+#   define __pvr_validate_value(x)
+#endif
+
+#ifdef NDEBUG
+#   define NDEBUG_UNUSED __attribute__((unused))
+#else
+#   define NDEBUG_UNUSED
+#endif
+
+#ifndef __pvr_address_type
+#   error #define __pvr_address_type before including this file
+#endif
+
+#ifndef __pvr_get_address
+#   error #define __pvr_get_address before including this file
+#endif
+
+union __pvr_value {
+   float f;
+   uint32_t dw;
+};
+
+static inline __attribute__((always_inline)) uint64_t __pvr_mbo(uint32_t start,
+                                                                uint32_t end)
+{
+   return (~0ull >> (64 - (end - start + 1))) << start;
+}
+
+static inline __attribute__((always_inline)) uint64_t
+__pvr_uint(uint64_t v, uint32_t start, NDEBUG_UNUSED uint32_t end)
+{
+   __pvr_validate_value(v);
+
+#ifndef NDEBUG
+   const int width = end - start + 1;
+   if (width < 64) {
+      const uint64_t max = (1ull << width) - 1;
+      assert(v <= max);
+   }
+#endif
+
+   return v << start;
+}
+
+static inline __attribute__((always_inline)) uint64_t
+__pvr_sint(int64_t v, uint32_t start, uint32_t end)
+{
+   const int width = end - start + 1;
+
+   __pvr_validate_value(v);
+
+#ifndef NDEBUG
+   if (width < 64) {
+      const int64_t max = (1ll << (width - 1)) - 1;
+      const int64_t min = -(1ll << (width - 1));
+      assert(min <= v && v <= max);
+   }
+#endif
+
+   const uint64_t mask = ~0ull >> (64 - width);
+
+   return (v & mask) << start;
+}
+
+static inline __attribute__((always_inline)) uint64_t
+__pvr_offset(uint64_t v,
+             NDEBUG_UNUSED uint32_t start,
+             NDEBUG_UNUSED uint32_t end)
+{
+   __pvr_validate_value(v);
+#ifndef NDEBUG
+   uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start;
+
+   assert((v & ~mask) == 0);
+#endif
+
+   return v;
+}
+
+static inline __attribute__((always_inline)) uint64_t
+__pvr_address(__pvr_address_type address,
+              uint32_t shift,
+              uint32_t start,
+              uint32_t end)
+{
+   uint64_t addr_u64 = __pvr_get_address(address);
+   uint64_t mask = (~0ull >> (64 - (end - start + 1))) << start;
+
+   return ((addr_u64 >> shift) << start) & mask;
+}
+
+static inline __attribute__((always_inline)) uint32_t __pvr_float(float v)
+{
+   __pvr_validate_value(v);
+   return ((union __pvr_value){ .f = (v) }).dw;
+}
+
+static inline __attribute__((always_inline)) uint64_t
+__pvr_sfixed(float v, uint32_t start, uint32_t end, uint32_t fract_bits)
+{
+   __pvr_validate_value(v);
+
+   const float factor = (1 << fract_bits);
+
+#ifndef NDEBUG
+   const float max = ((1 << (end - start)) - 1) / factor;
+   const float min = -(1 << (end - start)) / factor;
+   assert(min <= v && v <= max);
+#endif
+
+   const int64_t int_val = llroundf(v * factor);
+   const uint64_t mask = ~0ull >> (64 - (end - start + 1));
+
+   return (int_val & mask) << start;
+}
+
+static inline __attribute__((always_inline)) uint64_t
+__pvr_ufixed(float v,
+             uint32_t start,
+             NDEBUG_UNUSED uint32_t end,
+             uint32_t fract_bits)
+{
+   __pvr_validate_value(v);
+
+   const float factor = (1 << fract_bits);
+
+#ifndef NDEBUG
+   const float max = ((1 << (end - start + 1)) - 1) / factor;
+   const float min = 0.0f;
+   assert(min <= v && v <= max);
+#endif
+
+   const uint64_t uint_val = llroundf(v * factor);
+
+   return uint_val << start;
+}
+
+#undef NDEBUG_UNUSED
+
+#endif /* PVR_PACKET_HELPERS_H */
diff --git a/src/imagination/csbgen/rogue_cdm.xml b/src/imagination/csbgen/rogue_cdm.xml
new file mode 100644 (file)
index 0000000..804652b
--- /dev/null
@@ -0,0 +1,132 @@
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="CDMCTRL">
+
+       <enum name="BLOCK_TYPE">
+               <value name="COMPUTE_KERNEL"   value="0"/>
+               <value name="STREAM_LINK"      value="1"/>
+               <value name="STREAM_TERMINATE" value="2"/>
+       </enum>
+
+       <enum name="USC_TARGET">
+               <value name="ALL" value="0"/>
+               <value name="ANY" value="1"/>
+       </enum>
+
+       <enum name="SD_TYPE">
+               <value name="NONE" value="0"/>
+               <value name="PDS"  value="1"/>
+               <value name="USC"  value="2"/>
+       </enum>
+
+       <struct name="KERNEL0" length="1">
+               <field name="block_type" start="30" end="31" type="BLOCK_TYPE" default="COMPUTE_KERNEL"/>
+               <field name="indirect_present" start="29" end="29" type="bool"/>
+               <field name="global_offsets_present" start="28" end="28" type="bool"/>
+               <field name="event_object_present" start="27" end="27" type="bool"/>
+               <field name="usc_common_size" start="18" end="26" type="uint">
+                       <define name="UNIT_SIZE" value="64"/>
+                       <define name="MAX_SIZE" value="256"/>
+               </field>
+               <field name="usc_unified_size" start="12" end="17" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+               <field name="pds_temp_size" start="8" end="11" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+               <field name="pds_data_size" start="2" end="7" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+               <field name="usc_target" start="1" end="1" type="USC_TARGET"/>
+               <field name="fence" start="0" end="0" type="bool"/>
+       </struct>
+
+       <struct name="KERNEL1" length="1">
+               <field name="data_addr" start="4" end="31" shift="4" type="address"/>
+               <field name="sd_type" start="2" end="3" type="SD_TYPE"/>
+               <field name="usc_common_shared" start="1" end="1" type="bool"/>
+       </struct>
+
+       <struct name="KERNEL2" length="1">
+               <field name="code_addr" start="4" end="31" shift="4" type="address"/>
+               <field name="one_wg_per_task" start="0" end="0" type="bool"/>
+       </struct>
+
+       <struct name="KERNEL3" length="1">
+               <field name="workgroup_x" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="KERNEL4" length="1">
+               <field name="workgroup_y" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="KERNEL5" length="1">
+               <field name="workgroup_z" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="KERNEL6" length="1">
+               <field name="indirect_addrmsb" start="0" end="7" shift="32" type="address"/>
+       </struct>
+
+       <struct name="KERNEL7" length="1">
+               <field name="indirect_addrlsb" start="2" end="31" shift="2" type="address"/>
+       </struct>
+
+       <struct name="KERNEL8" length="1">
+               <field name="max_instances" start="27" end="31" type="uint">
+                       <define name="MAX_SIZE" value="31"/>
+               </field>
+               <field name="workgroup_size_x" start="18" end="26" type="uint"/>
+               <field name="workgroup_size_y" start="9" end="17" type="uint"/>
+               <field name="workgroup_size_z" start="0" end="8" type="uint"/>
+       </struct>
+
+       <struct name="KERNEL9" length="1">
+               <field name="global_offset_x" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="KERNEL10" length="1">
+               <field name="global_offset_y" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="KERNEL11" length="1">
+               <field name="global_offset_z" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="STREAM_LINK0" length="1">
+               <field name="block_type" start="30" end="31" type="BLOCK_TYPE" default="STREAM_LINK"/>
+               <field name="link_addrmsb" start="0" end="7" shift="32" type="address"/>
+       </struct>
+
+       <struct name="STREAM_LINK1" length="1">
+               <field name="link_addrlsb" start="2" end="31" shift="2" type="address"/>
+       </struct>
+
+       <struct name="STREAM_TERMINATE" length="1">
+               <field name="block_type" start="30" end="31" type="BLOCK_TYPE" default="STREAM_TERMINATE"/>
+       </struct>
+
+</csbgen>
diff --git a/src/imagination/csbgen/rogue_cr.xml b/src/imagination/csbgen/rogue_cr.xml
new file mode 100644 (file)
index 0000000..a3f6903
--- /dev/null
@@ -0,0 +1,631 @@
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="CR">
+
+       <define name="PM_VHEAP_TABLE_SIZE" value="0x180" />
+
+       <enum name="COMP_IADDR_TYPE">
+               <value name="INDIRECT_1TILE" value="0"/>
+               <value name="INDIRECT_4TILE" value="1"/>
+       </enum>
+
+       <enum name="COMPRESS_SIZE">
+               <value name="BLOCK_8X8"  value="0"/>
+               <value name="BLOCK_16X4" value="1"/>
+       </enum>
+
+       <enum name="DIR_TYPE">
+               <value name="TL2BR" value="0"/>
+               <value name="TR2BL" value="1"/>
+               <value name="BL2TR" value="2"/>
+               <value name="BR2TL" value="3"/>
+       </enum>
+
+       <enum name="ISP_AA_MODE_TYPE">
+               <value name="AA_NONE" value="0"/>
+               <value name="AA_2X"   value="1"/>
+               <value name="AA_4X"   value="2"/>
+               <value name="AA_8X"   value="3"/>
+       </enum>
+
+       <enum name="ISP_RENDER_MODE_TYPE">
+               <value name="NORM"       value="0"/>
+               <value name="FAST_2D"    value="1"/>
+               <value name="FAST_SCALE" value="2"/>
+       </enum>
+
+       <enum name="MEMLAYOUT">
+               <value name="LINEAR"     value="0"/>
+               <value name="TWIDDLE_2D" value="1"/>
+               <value name="TWIDDLE_3D" value="2"/>
+               <value name="TILED"      value="3"/>
+       </enum>
+
+       <enum name="MODE_TYPE">
+               <value name="DX9"  value="0"/>
+               <value name="DX10" value="1"/>
+               <value name="OGL"  value="2"/>
+       </enum>
+
+       <enum name="PIPE_NUM">
+               <value name="PIPE_ONE"      value="0"/>
+               <value name="PIPE_TWO"      value="1"/>
+               <value name="PIPE_THREE"    value="2"/>
+               <value name="PIPE_FOUR"     value="3"/>
+               <value name="PIPE_FIVE"     value="4"/>
+               <value name="PIPE_SIX"      value="5"/>
+               <value name="PIPE_SEVEN"    value="6"/>
+               <value name="PIPE_EIGHT"    value="7"/>
+               <value name="PIPE_NINE"     value="8"/>
+               <value name="PIPE_TEN"      value="9"/>
+               <value name="PIPE_ELEVEN"   value="10"/>
+               <value name="PIPE_TWELVE"   value="11"/>
+               <value name="PIPE_THIRTEEN" value="12"/>
+               <value name="PIPE_FOURTEEN" value="13"/>
+               <value name="PIPE_FIFTEEN"  value="14"/>
+               <value name="PIPE_SIXTEEN"  value="15"/>
+       </enum>
+
+       <enum name="PIXEL_WIDTH">
+               <value name="2REGISTERS" value="0"/>
+               <value name="4REGISTERS" value="1"/>
+               <value name="8REGISTERS" value="2"/>
+               <value name="1REGISTER"  value="3"/>
+       </enum>
+
+       <enum name="ROTATION_TYPE">
+               <value name="0_DEG"   value="0"/>
+               <value name="90_DEG"  value="1"/>
+               <value name="180_DEG" value="2"/>
+               <value name="270_DEG" value="3"/>
+       </enum>
+
+       <enum name="SIZE">
+               <value name="1_PIXEL"   value="0"/>
+               <value name="2_PIXEL"   value="1"/>
+               <value name="4_PIXEL"   value="2"/>
+               <value name="8_PIXEL"   value="3"/>
+               <value name="16_PIXEL"  value="4"/>
+               <value name="32_PIXEL"  value="5"/>
+               <value name="64_PIXEL"  value="6"/>
+               <value name="128_PIXEL" value="7"/>
+               <value name="256_PIXEL" value="8"/>
+               <value name="512_PIXEL" value="9"/>
+               <value name="1K_PIXEL"  value="10"/>
+               <value name="2K_PIXEL"  value="11"/>
+               <value name="4K_PIXEL"  value="12"/>
+               <value name="8K_PIXEL"  value="13"/>
+               <value name="16K_PIXEL" value="14"/>
+       </enum>
+
+       <enum name="SWIZ">
+               <value name="SOURCE_CHAN0" value="0"/>
+               <value name="SOURCE_CHAN1" value="1"/>
+               <value name="SOURCE_CHAN2" value="2"/>
+               <value name="SOURCE_CHAN3" value="3"/>
+               <value name="ONE"          value="4"/>
+               <value name="ZERO"         value="5"/>
+       </enum>
+
+       <enum name="TFBC_LOSSY">
+               <value name="LOSSLESS" value="0"/>
+               <value name="LOSSY_75" value="1"/>
+               <value name="LOSSY_50" value="2"/>
+               <value name="LOSSY_25" value="3"/>
+       </enum>
+
+       <enum name="TWOCOMP_GAMMA">
+               <value name="GAMMA_BOTTOM_CHANNEL" value="0"/>
+               <value name="GAMMA_BOTH_CHANNELS"  value="1"/>
+       </enum>
+
+       <enum name="ZLOADFORMAT_TYPE">
+               <value name="F32Z"     value="0"/>
+               <value name="24BITINT" value="1"/>
+               <value name="16BITINT" value="2"/>
+               <value name="F64Z"     value="3"/>
+       </enum>
+
+       <enum name="ZSTOREFORMAT_TYPE">
+               <value name="F32Z"     value="0"/>
+               <value name="24BITINT" value="1"/>
+               <value name="16BITINT" value="2"/>
+               <value name="F64Z"     value="3"/>
+       </enum>
+
+       <struct name="PM_MTILE_ARRAY" length="2">
+               <field name="base_addr" start="4" end="39" shift="4" type="address"/>
+       </struct>
+
+       <struct name="PM_VHEAP_TABLE" length="2">
+               <field name="base_addr" start="4" end="39" shift="4" type="address"/>
+       </struct>
+
+       <struct name="PM_MLIST0_BASE" length="2">
+               <field name="addr" start="4" end="39" shift="4" type="address"/>
+       </struct>
+
+       <struct name="VDM_CTRL_STREAM_BASE" length="2">
+               <field name="addr" start="2" end="39" shift="2" type="address"/>
+       </struct>
+
+       <struct name="VDM_CALL_STACK_POINTER" length="2">
+               <field name="addr" start="3" end="39" shift="3" type="address"/>
+       </struct>
+
+       <struct name="VDM_CONTEXT_STATE_BASE" length="2">
+               <field name="addr" start="4" end="39" shift="4" type="address"/>
+       </struct>
+
+       <struct name="VDM_CONTEXT_STORE_TASK0" length="2">
+               <field name="pds_state1" start="32" end="63" type="uint"/>
+               <field name="pds_state0" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="VDM_CONTEXT_STORE_TASK1" length="1">
+               <field name="pds_state2" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="VDM_CONTEXT_STORE_TASK2" length="2">
+               <field name="stream_out2" start="32" end="63" type="uint"/>
+               <field name="stream_out1" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="VDM_CONTEXT_RESUME_TASK0" length="2">
+               <field name="pds_state1" start="32" end="63" type="uint"/>
+               <field name="pds_state0" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="VDM_CONTEXT_RESUME_TASK1" length="1">
+               <field name="pds_state2" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="VDM_CONTEXT_RESUME_TASK2" length="2">
+               <field name="stream_out2" start="32" end="63" type="uint"/>
+               <field name="stream_out1" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="CDM_CONTEXT_STATE_BASE" length="2">
+               <field name="addr" start="4" end="39" shift="4" type="address"/>
+       </struct>
+
+       <struct name="CDM_CONTEXT_PDS0" length="2">
+               <field name="data_addr" start="36" end="63" shift="4" type="address"/>
+               <field name="code_addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="CDM_CTRL_STREAM_BASE" length="2">
+               <field name="addr" start="2" end="39" shift="2" type="address"/>
+       </struct>
+
+       <struct name="CDM_CONTEXT_PDS1" length="1">
+               <field name="pds_seq_dep" start="29" end="29" type="bool"/>
+               <field name="usc_seq_dep" start="28" end="28" type="bool"/>
+               <!-- false=All, true=Any -->
+               <field name="target" start="27" end="27" type="bool"/>
+               <field name="unified_size" start="21" end="26" type="uint"/>
+               <field name="common_shared" start="20" end="20" type="bool"/>
+               <field name="common_size" start="11" end="19" type="uint">
+                       <define name="UNIT_SIZE" value="64"/>
+               </field>
+               <field name="temp_size" start="7" end="10" type="uint"/>
+               <field name="data_size" start="1" end="6" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+               <field name="fence" start="0" end="0" type="bool"/>
+       </struct>
+
+       <struct name="CDM_TERMINATE_PDS" length="2">
+               <field name="data_addr" start="36" end="63" shift="4" type="address"/>
+               <field name="code_addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="CDM_TERMINATE_PDS1" length="1">
+               <field name="pds_seq_dep" start="29" end="29" type="bool"/>
+               <field name="usc_seq_dep" start="28" end="28" type="bool"/>
+               <field name="target" start="27" end="27" type="bool"/>
+               <field name="unified_size" start="21" end="26" type="uint"/>
+               <field name="common_shared" start="20" end="20" type="bool"/>
+               <field name="common_size" start="11" end="19" type="uint"/>
+               <field name="temp_size" start="7" end="10" type="uint"/>
+               <field name="data_size" start="1" end="6" type="uint"/>
+               <field name="fence" start="0" end="0" type="bool"/>
+       </struct>
+
+       <struct name="CDM_CONTEXT_LOAD_PDS0" length="2">
+               <field name="data_addr" start="36" end="63" shift="4" type="address"/>
+               <field name="code_addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="COMPUTE_CLUSTER" length="1">
+               <field name="mask" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="PDS_CTRL" length="2">
+               <field name="sm_overlap_enable" start="55" end="55" type="bool"/>
+               <condition type="if" check="ROGUEXE"/>
+                       <condition type="if" check="COMPUTE"/>
+                               <field name="roguexe_max_num_cdm_tasks" start="24" end="31" type="uint"/>
+                       <condition type="endif" check="COMPUTE"/>
+                       <condition type="if" check="NUM_RASTER_PIPES &gt; 0"/>
+                               <field name="roguexe_max_num_pdm_tasks" start="16" end="23" type="uint"/>
+                       <condition type="endif" check="NUM_RASTER_PIPES &gt; 0"/>
+                       <condition type="if" check="NUM_TA &gt; 0"/>
+                               <field name="roguexe_max_num_vdm_tasks" start="8" end="15" type="uint"/>
+                       <condition type="endif" check="NUM_TA &gt; 0"/>
+               <condition type="else" check="ROGUEXE"/>
+                       <condition type="if" check="COMPUTE"/>
+                               <field name="max_num_cdm_tasks" start="24" end="30" type="uint"/>
+                       <condition type="endif" check="COMPUTE"/>
+                       <field name="max_num_pdm_tasks" start="16" end="22" type="uint"/>
+                       <field name="max_num_vdm_tasks" start="8" end="14" type="uint"/>
+               <condition type="endif" check="ROGUEXE"/>
+       </struct>
+
+       <struct name="EVENT_PIXEL_PDS_CODE" length="1">
+               <field name="addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="EVENT_PIXEL_PDS_DATA" length="1">
+               <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+               <field name="addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="EVENT_PIXEL_PDS_INFO" length="1">
+               <field name="usc_sr_size" start="9" end="14" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+               <field name="temp_stride" start="5" end="8" type="uint">
+                       <define name="UNIT_SIZE" value="4"/>
+               </field>
+               <field name="const_size" start="0" end="4" type="uint">
+                       <define name="UNIT_SIZE" value="4"/>
+               </field>
+       </struct>
+
+       <struct name="PDS_BGRND0_BASE" length="2">
+               <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+               <field name="texunicode_addr" start="36" end="63" shift="4" type="address"/>
+               <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+               <field name="shader_addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="PDS_BGRND1_BASE" length="2">
+               <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+               <field name="texturedata_addr" start="36" end="63" shift="4" type="address"/>
+               <!-- Unused in the Vulkan driver. -->
+               <field name="varying_addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="PDS_BGRND2_BASE" length="2">
+               <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+               <field name="uniformdata_addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="PDS_BGRND3_SIZEINFO" length="2">
+               <field name="usc_sharedsize" start="55" end="63" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+               <field name="pds_batchnum" start="32" end="45" type="uint"/>
+               <field name="pds_uniformsize" start="23" end="31" type="uint">
+                       <define name="UNIT_SIZE" value="4"/>
+               </field>
+               <field name="pds_texturestatesize" start="16" end="22" type="uint">
+                       <define name="UNIT_SIZE" value="4"/>
+               </field>
+               <field name="pds_varyingsize" start="10" end="15" type="uint">
+                       <define name="UNIT_SIZE" value="4"/>
+               </field>
+               <field name="usc_varyingsize" start="4" end="9" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+               <field name="pds_tempsize" start="0" end="3" type="uint">
+                       <define name="UNIT_SIZE" value="4"/>
+               </field>
+       </struct>
+
+       <struct name="TE_AA" length="1">
+               <condition type="if" check="SIMPLE_INTERNAL_PARAMETER_FORMAT &amp;&amp; ISP_SAMPLES_PER_PIXEL &gt; 2"/>
+                       <field name="y2" start="3" end="3" type="bool"/>
+               <condition type="endif" check="SIMPLE_INTERNAL_PARAMETER_FORMAT &amp;&amp; ISP_SAMPLES_PER_PIXEL &gt; 2"/>
+               <field name="y" start="2" end="2" type="bool"/>
+               <field name="x" start="1" end="1" type="bool"/>
+               <field name="x2" start="0" end="0" type="bool"/>
+       </struct>
+
+       <struct name="TE_MTILE1" length="1">
+               <field name="x1" start="18" end="26" type="uint"/>
+               <field name="x2" start="9" end="17" type="uint"/>
+               <field name="x3" start="0" end="8" type="uint"/>
+       </struct>
+
+       <struct name="TE_MTILE2" length="1">
+               <field name="y1" start="18" end="26" type="uint"/>
+               <field name="y2" start="9" end="17" type="uint"/>
+               <field name="y3" start="0" end="8" type="uint"/>
+       </struct>
+
+       <struct name="TE_SCREEN" length="1">
+               <field name="ymax" start="12" end="20" type="uint"/>
+               <field name="xmax" start="0" end="8" type="uint"/>
+       </struct>
+
+       <struct name="TE_PSG" length="1">
+               <condition type="if" check="ROGUEXE"/>
+                       <condition type="if" check="TILE_REGION_PROTECTION"/>
+                               <field name="force_protect" start="22" end="22" type="uint"/>
+                       <condition type="endif" check="TILE_REGION_PROTECTION"/>
+                       <field name="cs_size" start="21" end="21" type="uint"/>
+                       <field name="enable_pwr_gate_state" start="20" end="20" type="bool"/>
+               <condition type="endif" check="ROGUEXE"/>
+               <field name="enable_context_state_restore" start="19" end="19" type="bool"/>
+               <field name="zonlyrender" start="18" end="18" type="bool"/>
+               <field name="completeonterminate" start="17" end="17" type="bool"/>
+               <field name="cache_bypass" start="14" end="14" type="bool"/>
+               <field name="forcenewstate" start="13" end="13" type="bool"/>
+               <field name="region_stride" start="0" end="10" type="uint">
+                       <define name="UNIT_SIZE" value="4096"/>
+               </field>
+       </struct>
+
+       <!-- FIXME: This is only a partial definition as (at the time of writing)
+            csbgen doesn't support multiple address fields within structure.
+       -->
+       <!-- FIXME: When csbgen supports conditional structs, make this
+            conditional on NUM_TA > 0.
+       -->
+       <struct name="TE_PSGREGION_ADDR" length="2">
+               <field name="base" start="6" end="33" shift="6" type="address"/>
+       </struct>
+
+       <!-- FIXME: This is only a partial definition as (at the time of writing)
+            csbgen doesn't support multiple address fields within structure.
+       -->
+       <struct name="TE_TPC_ADDR" length="2">
+               <field name="base" start="6" end="33" shift="6" type="address"/>
+       </struct>
+
+       <struct name="PPP_MULTISAMPLECTL" length="2">
+               <condition type="if" check="MAX_MULTISAMPLE == 8"/>
+                       <field name="msaa_y7" start="60" end="63" type="uint"/>
+                       <field name="msaa_x7" start="56" end="59" type="uint"/>
+                       <field name="msaa_y6" start="52" end="55" type="uint"/>
+                       <field name="msaa_x6" start="48" end="51" type="uint"/>
+                       <field name="msaa_y5" start="44" end="47" type="uint"/>
+                       <field name="msaa_x5" start="40" end="43" type="uint"/>
+                       <field name="msaa_y4" start="36" end="39" type="uint"/>
+                       <field name="msaa_x4" start="32" end="35" type="uint"/>
+               <condition type="endif" check="MAX_MULTISAMPLE == 8"/>
+               <field name="msaa_y3" start="28" end="31" type="uint"/>
+               <field name="msaa_x3" start="24" end="27" type="uint"/>
+               <field name="msaa_y2" start="20" end="23" type="uint"/>
+               <field name="msaa_x2" start="16" end="19" type="uint"/>
+               <field name="msaa_y1" start="12" end="15" type="uint"/>
+               <field name="msaa_x1" start="8" end="11" type="uint"/>
+               <field name="msaa_y0" start="4" end="7" type="uint"/>
+               <field name="msaa_x0" start="0" end="3" type="uint"/>
+       </struct>
+
+       <struct name="PPP_CTRL" length="1">
+               <field name="vpt_scissor" start="12" end="12" type="bool"/>
+               <field name="flush_mode" start="11" end="11" type="uint"/>
+               <field name="bfcull_restrict_clip" start="10" end="10" type="bool"/>
+               <field name="fixed_point_format" start="9" end="9" type="uint"/>
+               <field name="default_point_size" start="8" end="8" type="bool"/>
+               <field name="bfcull1_disable" start="7" end="7" type="bool"/>
+               <field name="bfcull2_disable" start="6" end="6" type="bool"/>
+               <field name="fccull_disable" start="5" end="5" type="bool"/>
+               <field name="oscull_disable" start="4" end="4" type="bool"/>
+               <field name="socull_disable" start="2" end="2" type="bool"/>
+               <field name="wclampen" start="1" end="1" type="bool"/>
+               <field name="opengl" start="0" end="0" type="bool"/>
+       </struct>
+
+       <struct name="PPP_SCREEN" length="1">
+               <field name="pixymax" start="16" end="30" type="uint"/>
+               <field name="pixxmax" start="0" end="14" type="uint"/>
+       </struct>
+
+       <!-- FIXME: This is only a partial definition as (at the time of writing)
+            csbgen doesn't support multiple address fields within structure.
+       -->
+       <struct name="TA_RTC_ADDR" length="2">
+               <field name="base" start="6" end="33" shift="6" type="address"/>
+       </struct>
+
+       <struct name="TA_CONTEXT_STATE_BASE" length="2">
+               <field name="addr" start="4" end="39" shift="4" type="address"/>
+       </struct>
+
+       <struct name="ISP_RENDER" length="1">
+               <field name="disable_eomt" start="5" end="5" type="bool"/>
+               <field name="resume" start="4" end="4" type="bool"/>
+               <field name="dir_type" start="2" end="3" type="DIR_TYPE"/>
+               <field name="mode_type" start="0" end="1" type="ISP_RENDER_MODE_TYPE"/>
+       </struct>
+
+       <struct name="ISP_RENDER_ORIGIN" length="1">
+               <field name="x" start="16" end="25" type="uint"/>
+               <field name="y" start="0" end="9" type="uint"/>
+       </struct>
+
+       <struct name="ISP_MTILE_SIZE" length="1">
+               <field name="x" start="16" end="25" type="uint"/>
+               <field name="y" start="0" end="9" type="uint"/>
+       </struct>
+
+       <struct name="ISP_BGOBJDEPTH" length="1">
+               <field name="value" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="ISP_BGOBJVALS" length="1">
+               <field name="enablebgtag" start="9" end="9" type="bool"/>
+               <field name="mask" start="8" end="8" type="bool"/>
+               <field name="stencil" start="0" end="7" type="uint"/>
+       </struct>
+
+       <struct name="ISP_AA" length="1">
+               <field name="mode" start="0" end="1" type="ISP_AA_MODE_TYPE"/>
+       </struct>
+
+       <struct name="ISP_CTL" length="1">
+               <field name="skip_init_hdrs" start="31" end="31" type="bool"/>
+               <field name="line_style" start="30" end="30" type="bool"/>
+               <field name="line_style_pix" start="29" end="29" type="bool"/>
+               <field name="pair_tiles_vert" start="28" end="28" type="bool"/>
+               <field name="pair_tiles" start="27" end="27" type="bool"/>
+               <field name="creq_buf_en" start="26" end="26" type="bool"/>
+               <field name="tile_age_en" start="25" end="25" type="bool"/>
+               <field name="isp_sample_pos_mode" start="23" end="24" type="MODE_TYPE"/>
+               <field name="num_tiles_per_usc" start="21" end="22" type="uint"/>
+               <field name="dbias_is_int" start="20" end="20" type="bool"/>
+               <field name="overlap_check_mode" start="19" end="19" type="bool"/>
+               <field name="pt_upfront_depth_disable" start="18" end="18" type="bool"/>
+               <field name="process_empty_tiles" start="17" end="17" type="bool"/>
+               <field name="sample_pos" start="16" end="16" type="bool"/>
+               <field name="pipe_enable" start="12" end="15" type="PIPE_NUM"/>
+               <field name="valid_id" start="4" end="9" type="uint"/>
+               <field name="upass_start" start="0" end="3" type="uint"/>
+       </struct>
+
+       <struct name="ISP_ZLSCTL" length="2">
+               <field name="zlsextent_y_s" start="48" end="57" type="uint"/>
+               <field name="zlsextent_x_s" start="38" end="47" type="uint"/>
+               <field name="stencil_extent_enable" start="37" end="37" type="bool"/>
+               <field name="zlsextent_y_z" start="27" end="36" type="uint"/>
+               <field name="zstoreformat" start="25" end="26" type="ZSTOREFORMAT_TYPE"/>
+               <field name="zloadformat" start="23" end="24" type="ZLOADFORMAT_TYPE"/>
+               <field name="fb_storeen" start="22" end="22" type="bool"/>
+               <field name="fb_loaden" start="21" end="21" type="bool"/>
+               <field name="mstoreen" start="20" end="20" type="bool"/>
+               <field name="zstoreen" start="19" end="19" type="bool"/>
+               <field name="sstoreen" start="18" end="18" type="bool"/>
+               <field name="storetwiddled" start="17" end="17" type="bool"/>
+               <field name="mloaden" start="16" end="16" type="bool"/>
+               <field name="zloaden" start="15" end="15" type="bool"/>
+               <field name="sloaden" start="14" end="14" type="bool"/>
+               <field name="loadtwiddled" start="13" end="13" type="bool"/>
+               <field name="zlsextent_x_z" start="3" end="12" type="uint"/>
+               <field name="forcezstore" start="2" end="2" type="bool"/>
+               <field name="forcezload" start="1" end="1" type="bool"/>
+               <field name="zonlyrender" start="0" end="0" type="bool"/>
+       </struct>
+
+       <struct name="ISP_ZLOAD_BASE" length="2">
+               <field name="addr" start="4" end="39" shift="4" type="address"/>
+       </struct>
+
+       <struct name="ISP_STENCIL_LOAD_BASE" length="2">
+               <field name="addr" start="4" end="39" shift="4" type="address"/>
+               <field name="enable" start="0" end="0" type="bool"/>
+       </struct>
+
+       <struct name="ISP_SCISSOR_BASE" length="2">
+               <field name="addr" start="2" end="39" shift="2" type="address"/>
+       </struct>
+
+       <struct name="ISP_DBIAS_BASE" length="2">
+               <field name="addr" start="2" end="39" shift="2" type="address"/>
+       </struct>
+
+       <struct name="ISP_ZLS_PIXELS" length="1">
+               <field name="y" start="15" end="29" type="uint"/>
+               <field name="x" start="0" end="14" type="uint"/>
+       </struct>
+
+       <struct name="PBE_WORD0_MRT0" length="2">
+               <condition type="if" check="TFBC"/>
+                       <field name="tfbc_lossy" start="62" end="63" type="TFBC_LOSSY"/>
+               <condition type="endif" check="TFBC"/>
+               <field name="x_rsrvd" start="63" end="63" type="bool"/>
+               <field name="pair_tiles" start="60" end="60" type="uint"/>
+               <field name="comp_iaddr_mode" start="60" end="60" type="COMP_IADDR_TYPE"/>
+               <field name="x_rsrvd2" start="59" end="59" type="bool"/>
+               <field name="comp_cor_enable" start="59" end="59" type="bool"/>
+               <field name="dither" start="58" end="58" type="bool"/>
+               <field name="tilerelative" start="57" end="57" type="bool"/>
+               <field name="downscale" start="56" end="56" type="bool"/>
+               <field name="size_z" start="52" end="55" type="SIZE"/>
+               <field name="rotation" start="50" end="51" type="ROTATION_TYPE"/>
+               <field name="linestride" start="34" end="49" type="uint"/>
+               <field name="memlayout" start="32" end="33" type="MEMLAYOUT"/>
+               <field name="swiz_chan3" start="29" end="31" type="SWIZ"/>
+               <field name="swiz_chan2" start="26" end="28" type="SWIZ"/>
+               <field name="swiz_chan1" start="23" end="25" type="SWIZ"/>
+               <field name="swiz_chan0" start="20" end="22" type="SWIZ"/>
+               <field name="minclip_x" start="6" end="19" type="uint"/>
+               <field name="twocomp_gamma" start="5" end="5" type="TWOCOMP_GAMMA"/>
+               <field name="gamma" start="4" end="4" type="bool"/>
+               <field name="compression" start="3" end="3" type="bool"/>
+               <field name="compress_size" start="2" end="2" type="COMPRESS_SIZE"/>
+               <field name="comp_indirect_table" start="1" end="1" type="bool"/>
+               <condition type="if" check="PBE_YFLIP"/>
+                       <field name="y_flip" start="0" end="0" type="bool"/>
+               <condition type="endif" check="PBE_YFLIP"/>
+       </struct>
+
+       <struct name="FRAG_SCREEN" length="1">
+               <field name="ymax" start="16" end="30" type="uint"/>
+               <field name="xmax" start="0" end="14" type="uint"/>
+       </struct>
+
+       <struct name="TPU" length="1">
+               <condition type="if" check="PDSL0SIZE &gt; 0"/>
+                       <field name="mcu_pds_l0_off" start="8" end="8" type="bool"/>
+               <condition type="endif" check="PDSL0SIZE &gt; 0"/>
+               <condition type="if" check="TPU_CEM_DATAMASTER_GLOBAL_REGISTERS"/>
+                       <field name="tag_cem_64_face_packing" start="7" end="7" type="bool"/>
+               <condition type="endif" check="TPU_CEM_DATAMASTER_GLOBAL_REGISTERS"/>
+               <field name="tag_enable_mmu_prefetch" start="6" end="6" type="bool"/>
+               <field name="tag_cem_4k_face_packing" start="5" end="5" type="bool"/>
+               <field name="madd_config_l0off" start="4" end="4" type="bool"/>
+               <field name="tag_cem_face_packing" start="3" end="3" type="bool"/>
+               <field name="tag_cemedge_dontfilter" start="2" end="2" type="bool"/>
+               <condition type="if" check="TPU_CEM_USG_NORMALISATION"/>
+                       <field name="tag_cemgrad_dontnegate" start="1" end="1" type="bool"/>
+               <condition type="endif" check="TPU_CEM_USG_NORMALISATION"/>
+               <field name="madd_config_dxt35_transovr" start="0" end="0" type="bool"/>
+       </struct>
+
+       <struct name="TPU_BORDER_COLOUR_TABLE_PDM" length="2">
+               <field name="border_colour_table_address" start="0" end="37" shift="2" type="address"/>
+       </struct>
+
+       <struct name="TPU_BORDER_COLOUR_TABLE_VDM" length="2">
+               <field name="border_colour_table_address" start="0" end="37" shift="2" type="address"/>
+       </struct>
+
+       <struct name="TPU_BORDER_COLOUR_TABLE_CDM" length="2">
+               <field name="border_colour_table_address" start="0" end="37" shift="2" type="address"/>
+       </struct>
+
+       <struct name="USC_PIXEL_OUTPUT_CTRL" length="1">
+               <field name="partition_mask" start="3" end="20" type="uint"/>
+               <field name="enable_4th_partition" start="2" end="2" type="bool"/>
+               <field name="width" start="0" end="1" type="PIXEL_WIDTH"/>
+       </struct>
+
+</csbgen>
diff --git a/src/imagination/csbgen/rogue_hwdefs.h b/src/imagination/csbgen/rogue_hwdefs.h
new file mode 100644 (file)
index 0000000..75a3a47
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_HWDEFS_H
+#define ROGUE_HWDEFS_H
+
+#include "rogue_cdm.h"
+#include "rogue_cr.h"
+#include "rogue_ipf.h"
+#include "rogue_lls.h"
+#include "rogue_pbestate.h"
+#include "rogue_pds.h"
+#include "rogue_ppp.h"
+#include "rogue_texstate.h"
+#include "rogue_vdm.h"
+
+#endif /* ROGUE_HWDEFS_H */
diff --git a/src/imagination/csbgen/rogue_ipf.xml b/src/imagination/csbgen/rogue_ipf.xml
new file mode 100644 (file)
index 0000000..aa96c7b
--- /dev/null
@@ -0,0 +1,40 @@
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="IPF">
+
+       <define name="TILE_SIZE_PIXELS" value="32"/>
+
+       <struct name="SCISSOR_WORD_0" length="1">
+               <field name="scw0_xmin" start="16" end="31" type="uint"/>
+               <field name="scw0_xmax" start="0" end="15" type="uint"/>
+       </struct>
+
+       <struct name="SCISSOR_WORD_1" length="1">
+               <field name="scw1_ymin" start="16" end="31" type="uint"/>
+               <field name="scw1_ymax" start="0" end="15" type="uint"/>
+       </struct>
+
+</csbgen>
diff --git a/src/imagination/csbgen/rogue_lls.xml b/src/imagination/csbgen/rogue_lls.xml
new file mode 100644 (file)
index 0000000..2da0af2
--- /dev/null
@@ -0,0 +1,49 @@
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="LLS">
+
+       <!-- Size of the CDM's context state buffer in bytes. -->
+       <define name="CDM_CONTEXT_RESUME_BUFFER_SIZE" value="72"/>
+       <define name="CDM_CONTEXT_RESUME_BUFFER_ALIGNMENT" value="16"/>
+
+       <!-- Size of the PDS's persistent-temporary register context state buffer in bytes. -->
+       <define name="PDS_PERSISTENT_TEMPS_BUFFER_ALIGNMENT" value="16"/>
+       <define name="PDS_PERSISTENT_TEMPS_BUFFER_SIZE" value="128"/>
+
+       <!-- Size of the TA's context state buffer in bytes. -->
+       <define name="TA_STATE_BUFFER_ALIGNMENT" value="16"/>
+       <define name="TA_STATE_BUFFER_SIZE" value="484"/>
+
+       <!-- Size of the USC's shared register context state buffer in bytes. -->
+       <define name="USC_SHARED_REGS_BUFFER_ALIGNMENT" value="16"/>
+       <define name="USC_SHARED_REGS_BUFFER_SIZE" value="16384"/>
+
+       <!-- Size of the VDM's context resume control stream buffer in bytes. -->
+       <define name="VDM_CONTEXT_RESUME_BUFFER_ALIGNMENT" value="16"/>
+       <define name="VDM_CONTEXT_RESUME_BUFFER_SIZE" value="92"/>
+
+
+</csbgen>
diff --git a/src/imagination/csbgen/rogue_pbestate.xml b/src/imagination/csbgen/rogue_pbestate.xml
new file mode 100644 (file)
index 0000000..29346d2
--- /dev/null
@@ -0,0 +1,300 @@
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="PBESTATE">
+
+       <enum name="COMP_IADDR_TYPE">
+               <value name="INDIRECT_1TILE" value="0"/>
+               <value name="INDIRECT_4TILE" value="1"/>
+       </enum>
+
+       <enum name="COMPRESS_SIZE">
+               <value name="BLOCK_8X8"  value="0"/>
+               <value name="BLOCK_16X4" value="1"/>
+       </enum>
+
+       <enum name="COMPRESS_SIZE_EXT">
+               <value name="BLOCK_8X8_16X4"   value="0"/>
+               <value name="BLOCK_32X2_RSRVD" value="1"/>
+       </enum>
+
+       <enum name="COMPRESSION">
+               <value name="DISABLED" value="0"/>
+               <value name="ENABLED"  value="1"/>
+       </enum>
+
+       <enum name="LOSSY">
+               <value name="DISABLED" value="0"/>
+               <value name="ENABLED"  value="1"/>
+       </enum>
+
+       <enum name="MEMLAYOUT">
+               <value name="LINEAR"     value="0"/>
+               <value name="TWIDDLE_2D" value="1"/>
+               <value name="TWIDDLE_3D" value="2"/>
+               <value name="TILED"      value="3"/>
+       </enum>
+
+       <enum name="PACKMODE">
+               <value name="U8U8U8U8"            value="0x0"/>
+               <value name="S8S8S8S8"            value="0x1"/>
+               <value name="X8U8S8S8"            value="0x2"/>
+               <value name="X8S8S8U8"            value="0x3"/>
+               <value name="A1R5G5B5"            value="0x4"/>
+               <value name="R5G5B5A1"            value="0x5"/>
+               <value name="A4R4G4B4"            value="0x6"/>
+               <value name="A8R3G3B2"            value="0x7"/>
+               <value name="U16U16U16U16"        value="0x8"/>
+               <value name="S16S16S16S16"        value="0x9"/>
+               <value name="F16F16F16F16"        value="0xa"/>
+               <value name="U32U32U32U32"        value="0xb"/>
+               <value name="S32S32S32S32"        value="0xc"/>
+               <value name="F32F32F32F32"        value="0xd"/>
+               <value name="A2R10B10G10"         value="0xe"/>
+               <value name="R10B10G10A2"         value="0xf"/>
+               <value name="A2F10F10F10"         value="0x10"/>
+               <value name="F10F10F10A2"         value="0x11"/>
+               <value name="U8U8U8"              value="0x12"/>
+               <value name="S8S8S8"              value="0x13"/>
+               <value name="R5G6B5"              value="0x14"/>
+               <value name="R5SG5SB6"            value="0x15"/>
+               <value name="B6G5SR5S"            value="0x16"/>
+               <value name="U16U16U16"           value="0x17"/>
+               <value name="S16S16S16"           value="0x18"/>
+               <value name="F16F16F16"           value="0x19"/>
+               <value name="U32U32U32"           value="0x1a"/>
+               <value name="S32S32S32"           value="0x1b"/>
+               <value name="F11F11F10"           value="0x1c"/>
+               <value name="F10F11F11"           value="0x1d"/>
+               <value name="SE9995"              value="0x1e"/>
+               <value name="F32F32F32"           value="0x1f"/>
+               <value name="X24U8F32"            value="0x20"/>
+               <value name="X24X8F32"            value="0x21"/>
+               <value name="X24G8X32"            value="0x22"/>
+               <value name="U8U8"                value="0x23"/>
+               <value name="S8S8"                value="0x24"/>
+               <value name="U16U16"              value="0x25"/>
+               <value name="S16S16"              value="0x26"/>
+               <value name="F16F16"              value="0x27"/>
+               <value name="U32U32"              value="0x28"/>
+               <value name="S32S32"              value="0x29"/>
+               <value name="F32F32"              value="0x2a"/>
+               <value name="U24ST8"              value="0x2b"/>
+               <value name="ST8U24"              value="0x2c"/>
+               <value name="X8U24"               value="0x2d"/>
+               <value name="U8X24"               value="0x2e"/>
+               <value name="U8"                  value="0x2f"/>
+               <value name="S8"                  value="0x30"/>
+               <value name="U16"                 value="0x31"/>
+               <value name="S16"                 value="0x32"/>
+               <value name="F16"                 value="0x33"/>
+               <value name="U32"                 value="0x34"/>
+               <value name="S32"                 value="0x35"/>
+               <value name="F32"                 value="0x36"/>
+               <value name="PBYTE"               value="0x37"/>
+               <value name="PWORD"               value="0x38"/>
+               <value name="ARGBV16_XR10"        value="0x39"/>
+               <value name="A2_XRBIAS_U10U10U10" value="0x3a"/>
+               <value name="YUV"                 value="0x3b"/>
+               <value name="U10U10U10_XRBIAS_A2" value="0x3c"/>
+               <value name="INVALID"             value="0xFFFFFFFF"/>
+       </enum>
+
+       <enum name="PAIR_TILES">
+               <value name="DISABLED" value="0"/>
+               <value name="ENABLED"  value="1"/>
+       </enum>
+
+       <enum name="REG_WORD0_LINESTRIDE">
+               <value name="ALIGNSHIFT"                                 value="1"/>
+               <value name="ALIGNSIZE"                                  value="2"/>
+               <value name="ALIGNSHIFT_PBE_STRIDE_ALIGN_1PIXEL_ENABLED" value="0"/>
+               <value name="ALIGNSIZE_PBE_STRIDE_ALIGN_1PIXEL_ENABLED"  value="1"/>
+       </enum>
+
+       <enum name="ROTATION_TYPE">
+               <value name="0_DEG"   value="0"/>
+               <value name="90_DEG"  value="1"/>
+               <value name="180_DEG" value="2"/>
+               <value name="270_DEG" value="3"/>
+       </enum>
+
+       <enum name="SIZE">
+               <value name="1_PIXEL"   value="0"/>
+               <value name="2_PIXEL"   value="1"/>
+               <value name="4_PIXEL"   value="2"/>
+               <value name="8_PIXEL"   value="3"/>
+               <value name="16_PIXEL"  value="4"/>
+               <value name="32_PIXEL"  value="5"/>
+               <value name="64_PIXEL"  value="6"/>
+               <value name="128_PIXEL" value="7"/>
+               <value name="256_PIXEL" value="8"/>
+               <value name="512_PIXEL" value="9"/>
+               <value name="1K_PIXEL"  value="10"/>
+               <value name="2K_PIXEL"  value="11"/>
+               <value name="4K_PIXEL"  value="12"/>
+               <value name="8K_PIXEL"  value="13"/>
+               <value name="16K_PIXEL" value="14"/>
+       </enum>
+
+       <enum name="SOURCE_FORMAT">
+               <value name="F16_PER_CHANNEL" value="0"/>
+               <value name="8_PER_CHANNEL"   value="1"/>
+       </enum>
+
+       <enum name="SOURCE_POS">
+               <value name="START_BIT0"  value="0"/>
+               <value name="START_BIT32" value="1"/>
+               <value name="START_BIT64" value="2"/>
+               <value name="START_BIT96" value="3"/>
+       </enum>
+
+       <enum name="SWIZ">
+               <value name="SOURCE_CHAN0" value="0"/>
+               <value name="SOURCE_CHAN1" value="1"/>
+               <value name="SOURCE_CHAN2" value="2"/>
+               <value name="SOURCE_CHAN3" value="3"/>
+               <value name="ONE"          value="4"/>
+               <value name="ZERO"         value="5"/>
+       </enum>
+
+       <enum name="TFBC_LOSSY">
+               <value name="LOSSLESS" value="0"/>
+               <value name="LOSSY75"  value="1"/>
+               <value name="LOSSY50"  value="2"/>
+               <value name="LOSSY25"  value="3"/>
+       </enum>
+
+       <enum name="TWOCOMP_GAMMA">
+               <value name="GAMMA_BOTTOM_CHANNEL" value="0"/>
+               <value name="GAMMA_BOTH_CHANNELS"  value="1"/>
+       </enum>
+
+       <enum name="Y_FLIP">
+               <value name="DISABLED" value="0"/>
+               <value name="ENABLED"  value="1"/>
+       </enum>
+
+       <enum name="YUV_DOWNSCALE">
+               <value name="NO_DOWNSCALING"   value="0"/>
+               <value name="ONE_SAMPLE_LEFT"  value="1"/>
+               <value name="EMPTY"            value="2"/>
+               <value name="TWO_SAMPLES"      value="3"/>
+               <value name="FOUR_SAMPLES"     value="4"/>
+               <value name="ONE_SAMPLE_RIGHT" value="5"/>
+       </enum>
+
+       <enum name="YUV_PMODE">
+               <value name="UV8_420_2PLANE"   value="0"/>
+               <value name="U8_420_3PLANE"    value="1"/>
+               <value name="V8_420_3PLANE"    value="2"/>
+               <value name="YUV8_422_1PLANE"  value="3"/>
+               <value name="UV8_422_2PLANE"   value="4"/>
+               <value name="UV8_444_2PLANE"   value="5"/>
+               <value name="Y8_23PLANE"       value="6"/>
+               <value name="U8_444_3PLANE"    value="7"/>
+               <value name="V8_444_3PLANE"    value="8"/>
+               <value name="YUV10_444_1PLANE" value="9"/>
+               <value name="RESERVED_YUV1"    value="10"/>
+               <value name="RESERVED_YUV2"    value="11"/>
+               <value name="YUV10_422_1PLANE" value="12"/>
+               <value name="UV10_420_2PLANE"  value="13"/>
+               <value name="Y10_23PLANE"      value="14"/>
+               <value name="UV16_420_2PLANE"  value="15"/>
+               <value name="UV16_422_2PLANE"  value="16"/>
+               <value name="UV16_444_2PLANE"  value="17"/>
+               <value name="Y16_23PLANE"      value="18"/>
+               <value name="U16_444_3PLANE"   value="19"/>
+               <value name="V16_444_3PLANE"   value="20"/>
+       </enum>
+
+       <struct name="STATE_WORD0" length="1">
+               <field name="address_low" start="0" end="31" shift="2" type="address"/>
+       </struct>
+
+       <struct name="STATE_WORD1" length="1">
+               <condition type="if" check="8_OUTPUT_REGISTERS"/>
+                       <field name="source_pos_offset_128" start="28" end="28" type="bool"/>
+               <condition type="endif" check="8_OUTPUT_REGISTERS"/>
+               <field name="yuv_pmode" start="23" end="27" type="YUV_PMODE"/>
+               <field name="yuv_downscale" start="20" end="22" type="YUV_DOWNSCALE"/>
+               <field name="source_format" start="19" end="19" type="SOURCE_FORMAT"/>
+               <field name="mrt_index" start="16" end="18" type="uint"/>
+               <field name="source_pos" start="14" end="15" type="SOURCE_POS"/>
+               <field name="norm" start="13" end="13" type="bool"/>
+               <field name="packmode" start="7" end="12" type="PACKMODE"/>
+               <field name="emptytile" start="6" end="6" type="bool"/>
+               <field name="address_high" start="0" end="5" shift="34" type="address"/>
+       </struct>
+
+       <struct name="REG_WORD0" length="2">
+               <field name="tfbc_lossy" start="62" end="63" type="TFBC_LOSSY">
+                       <define name="LOSSY37_75_TFBC_LOSSY_37_PERCENT_ENABLED" value="1"/>
+               </field>
+               <field name="lossy" start="62" end="62" type="LOSSY"/>
+               <field name="compress_size_ext" start="61" end="61" type="COMPRESS_SIZE_EXT"/>
+               <field name="comp_iaddr_mode" start="60" end="60" type="COMP_IADDR_TYPE"/>
+               <field name="comp_cor_enable" start="59" end="59" type="bool"/>
+               <field name="dither" start="58" end="58" type="bool"/>
+               <field name="tilerelative" start="57" end="57" type="bool"/>
+               <field name="downscale" start="56" end="56" type="bool"/>
+               <field name="size_z" start="52" end="55" type="SIZE"/>
+               <field name="rotation" start="50" end="51" type="ROTATION_TYPE"/>
+               <field name="linestride" start="34" end="49" type="uint">
+                       <define name="UNIT_SIZE" value="2"/>
+               </field>
+               <field name="memlayout" start="32" end="33" type="MEMLAYOUT"/>
+               <field name="swiz_chan3" start="29" end="31" type="SWIZ"/>
+               <field name="swiz_chan2" start="26" end="28" type="SWIZ"/>
+               <field name="swiz_chan1" start="23" end="25" type="SWIZ"/>
+               <field name="swiz_chan0" start="20" end="22" type="SWIZ"/>
+               <field name="minclip_x" start="6" end="19" type="uint"/>
+               <field name="twocomp_gamma" start="5" end="5" type="TWOCOMP_GAMMA"/>
+               <field name="gamma" start="4" end="4" type="bool"/>
+               <field name="compression" start="3" end="3" type="COMPRESSION"/>
+               <field name="compress_size" start="2" end="2" type="COMPRESS_SIZE"/>
+               <field name="comp_indirect_table" start="1" end="1" type="bool"/>
+               <field name="y_flip" start="0" end="0" type="Y_FLIP"/>
+       </struct>
+
+       <struct name="REG_WORD1" length="2">
+               <field name="size_x" start="60" end="63" type="SIZE"/>
+               <field name="minclip_y" start="46" end="59" type="uint"/>
+               <field name="maxclip_x" start="32" end="45" type="uint"/>
+               <field name="size_y" start="28" end="31" type="SIZE"/>
+               <field name="zslice" start="14" end="27" type="uint"/>
+               <field name="maxclip_y" start="0" end="13" type="uint"/>
+       </struct>
+
+       <struct name="REG_WORD2" length="2">
+               <field name="pair_tiles" start="46" end="46" type="PAIR_TILES">
+                       <!-- TODO: Do we need this? -->
+                       <define name="SHIFT" value="46"/>
+               </field>
+               <field name="surface_y_size" start="32" end="45" type="uint"/>
+               <field name="sw_bytemask" start="0" end="31" type="uint"/>
+       </struct>
+
+</csbgen>
diff --git a/src/imagination/csbgen/rogue_pds.xml b/src/imagination/csbgen/rogue_pds.xml
new file mode 100644 (file)
index 0000000..e3b673d
--- /dev/null
@@ -0,0 +1,116 @@
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="PDSINST">
+
+       <enum name="CMODE_LD">
+               <value name="CACHED"          value="0"/>
+               <value name="BYPASS"          value="1"/>
+               <value name="FORCE_LINE_FILL" value="2"/>
+       </enum>
+
+       <enum name="DOUTD_DEST">
+               <value name="UNIFIED_STORE" value="0"/>
+               <value name="COMMON_STORE"  value="1"/>
+       </enum>
+
+       <enum name="DOUTI_SHADEMODEL">
+               <value name="FLAT_VERTEX0" value="0"/>
+               <value name="FLAT_VERTEX1" value="1"/>
+               <value name="FLAT_VERTEX2" value="2"/>
+               <value name="GOURUAD"      value="3"/>
+       </enum>
+
+       <enum name="DOUTI_SIZE">
+               <value name="1D" value="0"/>
+               <value name="2D" value="1"/>
+               <value name="3D" value="2"/>
+               <value name="4D" value="3"/>
+       </enum>
+
+       <enum name="DOUTU_SAMPLE_RATE">
+               <value name="INSTANCE"  value="0"/>
+               <value name="SELECTIVE" value="1"/>
+               <value name="FULL"      value="2"/>
+       </enum>
+
+       <enum name="SLC_MODE_LD">
+               <value name="BYPASS"       value="0"/>
+               <value name="CACHED"       value="1"/>
+               <value name="CACHED_RD_NA" value="3"/>
+       </enum>
+
+       <enum name="WORDSIZE">
+               <value name="ONE"   value="0"/>
+               <value name="TWO"   value="1"/>
+               <value name="THREE" value="2"/>
+               <value name="FOUR"  value="3"/>
+       </enum>
+
+       <struct name="DOUTU_SRC0" length="2">
+               <field name="dual_phase" start="41" end ="41" type="bool"/>
+               <field name="temps" start="35" end="40" type="uint">
+                       <define name="UNIT_SIZE" value="2"/>
+               </field>
+               <field name="sample_rate" start="33" end="34" type="DOUTU_SAMPLE_RATE"/>
+               <field name="exe_off" start="2" end="31" shift="2" type="address"/>
+       </struct>
+
+       <struct name="DOUT_FIELDS_DOUTD_SRC0" length="2">
+               <condition type="if" check="SLC_MCU_CACHE_CONTROLS"/>
+                       <field name="slcmode" start="60" end="61" type="SLC_MODE_LD"/>
+               <condition type="endif" check="SLC_MCU_CACHE_CONTROLS"/>
+               <field name="doffset" start="40" end="52" type="uint"/>
+               <field name="sbase" start="0" end="39" shift="0" type="address"/>
+       </struct>
+
+       <struct name="DOUT_FIELDS_DOUTD_SRC1" length="1">
+               <field name="last" start="31" end="31" type="bool"/>
+               <field name="wordsize" start="29" end="30" type="WORDSIZE"/>
+               <field name="dest" start="28" end="28" type="DOUTD_DEST"/>
+               <field name="cmode" start="26" end="27" type="CMODE_LD"/>
+               <field name="a0" start="13" end="25" type="uint">
+                       <define name="UNIT_SIZE" value="4"/>
+               </field>
+               <field name="repeat" start="12" end="12" type="bool"/>
+               <field name="bsize" start="0" end="11" type="uint"/>
+       </struct>
+
+       <struct name="DOUT_FIELDS_DOUTI_SRC" length="1">
+               <field name="depthbias" start="27" end="27" type="bool"/>
+               <field name="primitiveid" start="26" end="26" type="bool"/>
+               <field name="shademodel" start="24" end="25" type="DOUTI_SHADEMODEL"/>
+               <field name="pointsprite" start="23" end="23" type="bool"/>
+               <field name="wraps" start="22" end="22" type="bool"/>
+               <field name="wrapv" start="21" end="21" type="bool"/>
+               <field name="wrapu" start="20" end="20" type="bool"/>
+               <field name="size" start="18" end="19" type="DOUTI_SIZE"/>
+               <field name="f16" start="17" end="17" type="bool"/>
+               <field name="perspective" start="16" end="16" type="bool"/>
+               <field name="f32_offset" start="8" end="15" type="uint"/>
+               <field name="f16_offset" start="0" end="7" type="uint"/>
+       </struct>
+
+</csbgen>
diff --git a/src/imagination/csbgen/rogue_ppp.xml b/src/imagination/csbgen/rogue_ppp.xml
new file mode 100644 (file)
index 0000000..f3a8878
--- /dev/null
@@ -0,0 +1,329 @@
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="TA">
+
+       <enum name="CLIP_MODE">
+               <value name="NO_FRONT_OR_REAR" value="0"/>
+               <value name="FRONT_REAR"       value="1"/>
+               <value name="FRONT_REAR_DEPTH" value="2"/>
+       </enum>
+
+       <enum name="CMPMODE">
+               <value name="NEVER"            value="0"/>
+               <value name="LESS"             value="1"/>
+               <value name="EQUAL"            value="2"/>
+               <value name="LESS_OR_EQUAL"    value="3"/>
+               <value name="GREATER"          value="4"/>
+               <value name="NOT_EQUAL"        value="5"/>
+               <value name="GREATER_OR_EQUAL" value="6"/>
+               <value name="ALWAYS"           value="7"/>
+       </enum>
+
+       <enum name="CULLMODE">
+               <value name="NO_CULLING" value="0"/>
+               <value name="CULL_CW"    value="1"/>
+               <value name="CULL_CCW"   value="2"/>
+       </enum>
+
+       <enum name="FLATSHADE">
+               <value name="VTX_VERTEX_0" value="1"/>
+               <value name="VTX_VERTEX_1" value="2"/>
+               <value name="VTX_VERTEX_2" value="3"/>
+       </enum>
+
+       <enum name="GS_OUTPUT_TOPOLOGY">
+               <value name="POINT_LIST" value="0"/>
+               <value name="LINE_STRIP" value="1"/>
+               <value name="TRI_STRIP"  value="2"/>
+       </enum>
+
+       <enum name="ISPB_STENCILOP">
+               <value name="KEEP"               value="0"/>
+               <value name="ZERO"               value="1"/>
+               <value name="REPLACE"            value="2"/>
+               <value name="INCREMENT_SATURATE" value="3"/>
+               <value name="DECREMENT_SATURATE" value="4"/>
+               <value name="INVERT"             value="5"/>
+               <value name="INCREMENT"          value="6"/>
+               <value name="DECREMENT"          value="7"/>
+       </enum>
+
+       <enum name="OBJTYPE">
+               <value name="TRIANGLE"                   value="0"/>
+               <value name="LINE"                       value="1"/>
+               <value name="SPRITE_10UV"                value="2"/>
+               <value name="SPRITE_UV"                  value="3"/>
+               <value name="SPRITE_01UV"                value="4"/>
+               <value name="LINE_FILLED_TRIANGLE"       value="5"/>
+               <value name="POINT_FILLED_TRIANGLE"      value="6"/>
+               <value name="TESSELLATED_OBJECT_NO_GS"   value="7"/>
+               <value name="TESSELLATED_OBJECT_WITH_GS" value="8"/>
+       </enum>
+
+       <!--
+               TODO: Add support for "ifs" in csbgen root element.
+       -->
+       <enum name="PASSTYPE">
+               <value name="OPAQUE"             value="0"/>
+               <value name="TRANSLUCENT"        value="1"/>
+               <value name="PUNCH_THROUGH"      value="2"/>
+               <value name="VIEWPORT_OBJECT"    value="3"/>
+               <value name="FAST_PUNCH_THROUGH" value="4"/>
+               <value name="DEPTH_FEEDBACK"     value="5"/>
+               <value name="ANTI_ALIASED"       value="6"/>
+       </enum>
+
+       <enum name="REGION_CLIP_MODE">
+               <value name="NONE"    value="0"/>
+               <value name="OUTSIDE" value="1"/>
+       </enum>
+
+       <struct name="STATE_HEADER" length="1">
+               <field name="not_final_term" start="26" end="26" type="bool"/>
+               <field name="pres_terminate" start="25" end="25" type="bool"/>
+               <field name="context_switch" start="24" end="24" type="bool"/>
+               <field name="pres_stream_out_program" start="23" end="23" type="bool"/>
+               <field name="pres_stream_out_size" start="22" end="22" type="bool"/>
+               <field name="pres_ppp_ctrl" start="21" end="21" type="bool"/>
+               <field name="pres_varying_word2" start="20" end="20" type="bool"/>
+               <field name="pres_varying_word1" start="19" end="19" type="bool"/>
+               <field name="pres_varying_word0" start="18" end="18" type="bool"/>
+               <field name="pres_outselects" start="17" end="17" type="bool"/>
+               <field name="pres_wclamp" start="16" end="16" type="bool"/>
+               <field name="view_port_count" start="12" end="15" type="uint"/>
+               <field name="pres_viewport" start="11" end="11" type="bool"/>
+               <field name="pres_region_clip" start="10" end="10" type="bool"/>
+               <field name="pres_pds_state_ptr3" start="9" end="9" type="bool"/>
+               <field name="pres_pds_state_ptr2" start="8" end="8" type="bool"/>
+               <field name="pres_pds_state_ptr1" start="7" end="7" type="bool"/>
+               <field name="pres_pds_state_ptr0" start="6" end="6" type="bool"/>
+               <field name="pres_ispctl_dbsc" start="5" end="5" type="bool"/>
+               <field name="pres_ispctl_bb" start="4" end="4" type="bool"/>
+               <field name="pres_ispctl_ba" start="3" end="3" type="bool"/>
+               <field name="pres_ispctl_fb" start="2" end="2" type="bool"/>
+               <field name="pres_ispctl_fa" start="1" end="1" type="bool"/>
+               <field name="pres_ispctl" start="0" end="0" type="bool"/>
+       </struct>
+
+       <struct name="STATE_ISPCTL" length="1">
+               <field name="validid" start="26" end="31" type="uint"/>
+               <field name="upass" start="22" end="25" type="uint"/>
+               <field name="tagwritedisable" start="21" end="21" type="bool"/>
+               <field name="ovgmtestdisable" start="20" end="20" type="bool"/>
+               <field name="two_sided" start="19" end="19" type="bool"/>
+               <field name="bpres" start="18" end="18" type="bool"/>
+               <field name="dbenable" start="17" end="17" type="bool"/>
+               <field name="scenable" start="16" end="16" type="bool"/>
+               <field name="vistest" start="15" end="15" type="bool"/>
+               <field name="visbool" start="14" end="14" type="bool"/>
+               <field name="visreg" start="0" end="13" type="uint"/>
+       </struct>
+
+       <struct name="STATE_ISPA" length="1">
+               <field name="objtype" start="28" end="31" type="OBJTYPE"/>
+               <field name="passtype" start="24" end="26" type="PASSTYPE"/>
+               <field name="ovgvispassmaskop" start="23" end="23" type="bool"/>
+               <field name="maskval" start="22" end="22" type="bool"/>
+               <field name="dwritedisable" start="21" end="21" type="bool"/>
+               <field name="dfbztestenable" start="20" end="20" type="bool"/>
+               <field name="dcmpmode" start="17" end="19" type="CMPMODE"/>
+               <field name="linefilllastpixel" start="16" end="16" type="bool"/>
+               <field name="pointlinewidth" start="8" end="15" type="uint">
+                       <define name="SIZE_MAX" value="255"/>
+               </field>
+               <field name="sref" start="0" end="7" type="uint"/>
+       </struct>
+
+       <struct name="STATE_ISPB" length="1">
+               <field name="scmpmode" start="25" end="27" type="CMPMODE"/>
+               <field name="sop1" start="22" end="24" type="ISPB_STENCILOP"/>
+               <field name="sop2" start="19" end="21" type="ISPB_STENCILOP"/>
+               <field name="sop3" start="16" end="18" type="ISPB_STENCILOP"/>
+               <field name="scmpmask" start="8" end="15" type="uint"/>
+               <field name="swmask" start="0" end="7" type="uint"/>
+       </struct>
+
+       <struct name="REGION_CLIP0" length="1">
+               <field name="mode" start="31" end="31" type="REGION_CLIP_MODE"/>
+               <field name="left" start="16" end="24" type="uint"/>
+               <field name="right" start="0" end="8" type="uint"/>
+       </struct>
+
+       <struct name="REGION_CLIP1" length="1">
+               <field name="top" start="16" end="24" type="uint"/>
+               <field name="bottom" start="0" end="8" type="uint"/>
+       </struct>
+
+       <struct name="STATE_ISPDBSC" length="1">
+               <field name="dbindex" start="16" end="31" type="uint"/>
+               <field name="scindex" start="0" end="15" type="uint"/>
+       </struct>
+
+       <struct name="OUTPUT_SEL" length="1">
+               <field name="vtxsize" start="24" end="31" type="uint"/>
+               <field name="tsp_unclamped_z_pres" start="21" end="21" type="bool"/>
+               <field name="render_tgt_pres" start="20" end="20" type="bool"/>
+               <field name="vpt_tgt_pres" start="19" end="19" type="bool"/>
+               <field name="psprite_size_pres" start="18" end="18" type="bool"/>
+               <field name="isp_position_depth_clamp_z" start="17" end="17" type="bool"/>
+               <field name="rhw_pres" start="16" end="16" type="bool"/>
+               <field name="cullplane7" start="15" end="15" type="bool"/>
+               <field name="cullplane6" start="14" end="14" type="bool"/>
+               <field name="cullplane5" start="13" end="13" type="bool"/>
+               <field name="cullplane4" start="12" end="12" type="bool"/>
+               <field name="cullplane3" start="11" end="11" type="bool"/>
+               <field name="cullplane2" start="10" end="10" type="bool"/>
+               <field name="cullplane1" start="9" end="9" type="bool"/>
+               <field name="cullplane0" start="8" end="8" type="bool"/>
+               <field name="plane7" start="7" end="7" type="bool"/>
+               <field name="plane6" start="6" end="6" type="bool"/>
+               <field name="plane5" start="5" end="5" type="bool"/>
+               <field name="plane4" start="4" end="4" type="bool"/>
+               <field name="plane3" start="3" end="3" type="bool"/>
+               <field name="plane2" start="2" end="2" type="bool"/>
+               <field name="plane1" start="1" end="1" type="bool"/>
+               <field name="plane0" start="0" end="0" type="bool"/>
+       </struct>
+
+       <struct name="STATE_VARYING0" length="1">
+               <condition type="if" check="TEXTURE_WRAP_VARYING"/>
+                       <field name="f32_linear_wrap" start="24" end="31" type="uint"/>
+               <condition type="endif" check="TEXTURE_WRAP_VARYING"/>
+               <field name="f32_npc" start="16" end="23" type="uint"/>
+               <field name="f32_flat" start="8" end="15" type="uint"/>
+               <field name="f32_linear" start="0" end="7" type="uint"/>
+       </struct>
+
+       <struct name="STATE_VARYING1" length="1">
+               <field name="f16_npc" start="24" end="31" type="uint"/>
+               <field name="f16_flat" start="16" end="23" type="uint"/>
+               <field name="f16_linear" start="8" end="15" type="uint"/>
+               <condition type="if" check="TEXTURE_WRAP_VARYING"/>
+                       <field name="f32_npc_wrap" start="0" end=" 7" type="uint"/>
+               <condition type="endif" check="TEXTURE_WRAP_VARYING"/>
+       </struct>
+
+       <struct name="STATE_TERMINATE0" length="1">
+               <field name="clip_right" start="18" end="26" type="uint">
+                       <define name="BLOCK_SIZE_IN_PIXELS" value="32"/>
+               </field>
+               <field name="clip_top" start="9" end="17" type="uint">
+                       <define name="BLOCK_SIZE_IN_PIXELS" value="32"/>
+               </field>
+               <field name="clip_bottom" start="0" end="8" type="uint">
+                       <define name="BLOCK_SIZE_IN_PIXELS" value="32"/>
+               </field>
+       </struct>
+
+       <struct name="STATE_TERMINATE1" length="1">
+               <field name="clip_left" start="23" end="31" type="uint">
+                       <define name="BLOCK_SIZE_IN_PIXELS" value="32"/>
+               </field>
+               <field name="render_target" start="0" end="10" type="uint"/>
+       </struct>
+
+       <struct name="STATE_STREAM_OUT1" length="1">
+               <field name="sync" start="10" end="10" type="bool"/>
+               <field name="pds_data_size" start="4" end="9" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+               <field name="pds_temp_size" start="0" end="3" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+       </struct>
+
+       <struct name="STATE_STREAM_OUT2" length="1">
+               <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+               <field name="pds_data_addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="STATE_PDS_SHADERBASE" length="1">
+               <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+               <field name="addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="STATE_PDS_TEXUNICODEBASE" length="1">
+               <field name="addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="STATE_PDS_VARYINGBASE" length="1">
+               <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+               <field name="addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="STATE_PDS_TEXTUREDATABASE" length="1">
+               <field name="addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="STATE_PDS_UNIFORMDATABASE" length="1">
+               <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+               <field name="addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="STATE_PDS_SIZEINFO1" length="1">
+               <field name="pds_uniformsize" start="23" end="31" type="uint">
+                       <define name="UNIT_SIZE" value="4"/>
+               </field>
+               <field name="pds_texturestatesize" start="16" end="22" type="uint">
+                       <define name="UNIT_SIZE" value="4"/>
+               </field>
+               <field name="pds_varyingsize" start="10" end="15" type="uint">
+                       <define name="UNIT_SIZE" value="4"/>
+               </field>
+               <field name="usc_varyingsize" start="4" end="9" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+                       <define name="MAX_SIZE" value="63"/>
+               </field>
+               <field name="pds_tempsize" start="0" end="3" type="uint">
+                       <define name="UNIT_SIZE" value="4"/>
+               </field>
+       </struct>
+
+       <struct name="STATE_PDS_SIZEINFO2" length="1">
+               <field name="usc_sharedsize" start="23" end="31" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+               <field name="pds_tri_merge_disable" start="14" end="14" type="bool"/>
+               <field name="pds_batchnum" start="0" end="13" type="uint"/>
+       </struct>
+
+       <struct name="STATE_PPP_CTRL" length="1">
+               <field name="trp" start="16" end="16" type="bool"/>
+               <field name="prim_msaa" start="15" end="15" type="bool"/>
+               <field name="gs_output_topology" start="13" end="14" type="GS_OUTPUT_TOPOLOGY"/>
+               <field name="pres_prim_id" start="12" end="12" type="bool"/>
+               <field name="clip_mode" start="10" end="11" type="CLIP_MODE"/>
+               <field name="drawclippededges" start="9" end="9" type="bool"/>
+               <field name="flatshade_vtx" start="7" end="8" type="FLATSHADE"/>
+               <field name="pretransform" start="6" end="6" type="bool"/>
+               <field name="wclampen" start="5" end="5" type="bool"/>
+               <field name="wbuffen" start="4" end="4" type="bool"/>
+               <field name="resetbbox" start="3" end="3" type="bool"/>
+               <field name="updatebbox" start="2" end="2" type="bool"/>
+               <field name="cullmode" start="0" end="1" type="CULLMODE"/>
+       </struct>
+
+</csbgen>
diff --git a/src/imagination/csbgen/rogue_texstate.xml b/src/imagination/csbgen/rogue_texstate.xml
new file mode 100644 (file)
index 0000000..f11eab4
--- /dev/null
@@ -0,0 +1,334 @@
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="TEXSTATE">
+
+       <enum name="ADDRMODE">
+               <value name="REPEAT"               value="0"/>
+               <value name="FLIP"                 value="1"/>
+               <value name="CLAMP_TO_EDGE"        value="2"/>
+               <value name="FLIP_ONCE_THEN_CLAMP" value="3"/>
+               <value name="CLAMP_TO_BORDER"      value="4"/>
+               <value name="OGL_CLAMP"            value="5"/>
+       </enum>
+
+       <enum name="ANISOCTL">
+               <value name="DISABLED" value="0"/>
+               <value name="X2"       value="1"/>
+               <value name="X4"       value="2"/>
+               <value name="X8"       value="3"/>
+               <value name="X16"      value="4"/>
+       </enum>
+
+       <enum name="CLAMP">
+               <value name="MIN"             value="0"/>
+               <value name="MAX"             value="959"/>
+               <value name="INTEGER_BITS"    value="4"/>
+               <value name="FRACTIONAL_BITS" value="6"/>
+       </enum>
+
+       <enum name="CMP_MODE">
+               <value name="NEVER"        value="0"/>
+               <value name="LESS"         value="1"/>
+               <value name="EQUAL"        value="2"/>
+               <value name="LESSEQUAL"    value="3"/>
+               <value name="GREATER"      value="4"/>
+               <value name="NOTEQUAL"     value="5"/>
+               <value name="GREATEREQUAL" value="6"/>
+               <value name="ALWAYS"       value="7"/>
+       </enum>
+
+       <enum name="COMPRESSION_LEVEL">
+               <value name="LOSSLESS" value="0"/>
+               <value name="LOSSY_75" value="1"/>
+               <value name="LOSSY_50" value="2"/>
+               <value name="LOSSY_25" value="3"/>
+       </enum>
+
+       <enum name="COMPRESSION_MODE">
+               <value name="NONE"                   value="0"/>
+               <value name="TPU"                    value="1"/>
+               <value name="FB_DIRECT_8X8"          value="2"/>
+               <value name="FB_DIRECT_16X4"         value="3"/>
+               <value name="FB_INDIRECT_1TILE_8X8"  value="4"/>
+               <value name="FB_INDIRECT_1TILE_16X4" value="5"/>
+               <value name="FB_INDIRECT_4TILE_8X8"  value="6"/>
+               <value name="FB_INDIRECT_4TILE_16X4" value="7"/>
+       </enum>
+
+       <enum name="DADJUST">
+               <value name="MIN_UINT"        value="0"/>
+               <value name="ZERO_UINT"       value="4095"/>
+               <value name="MAX_UINT"        value="8191"/>
+               <value name="INTEGER_BITS"    value="5"/>
+               <value name="FRACTIONAL_BITS" value="8"/>
+       </enum>
+
+       <enum name="FILTER">
+               <value name="POINT"   value="0"/>
+               <value name="LINEAR"  value="1"/>
+               <value name="BICUBIC" value="2"/>
+       </enum>
+
+       <enum name="FORMAT">
+               <value name="U8"                            value="0"/>
+               <value name="S8"                            value="1"/>
+               <value name="A4R4G4B4"                      value="2"/>
+               <value name="A8R3G3B2"                      value="3"/>
+               <value name="A1R5G5B5"                      value="4"/>
+               <value name="R5G6B5"                        value="5"/>
+               <value name="R5sG5sB6"                      value="6"/>
+               <value name="U8U8"                          value="7"/>
+               <value name="S8S8"                          value="8"/>
+               <value name="U16"                           value="9"/>
+               <value name="S16"                           value="10"/>
+               <value name="F16"                           value="11"/>
+               <value name="U8U8U8U8"                      value="12"/>
+               <value name="S8S8S8S8"                      value="13"/>
+               <value name="A2R10B10G10"                   value="14"/>
+               <value name="U16U16"                        value="15"/>
+               <value name="S16S16"                        value="16"/>
+               <value name="F16F16"                        value="17"/>
+               <value name="F32"                           value="18"/>
+               <value name="F32_SIGNMASK"                  value="19"/>
+               <value name="X8U8S8S8"                      value="20"/>
+               <value name="X8U24"                         value="21"/>
+               <value name="ST8U24"                        value="22"/>
+               <value name="U8X24"                         value="23"/>
+               <value name="U32"                           value="24"/>
+               <value name="S32"                           value="25"/>
+               <value name="SE9995"                        value="26"/>
+               <value name="F11F11F10"                     value="27"/>
+               <value name="F16F16F16F16"                  value="28"/>
+               <value name="U16U16U16U16"                  value="29"/>
+               <value name="S16S16S16S16"                  value="30"/>
+               <value name="F16F16F16"                     value="31"/>
+               <value name="U16U16U16"                     value="32"/>
+               <value name="S16S16S16"                     value="33"/>
+               <value name="F32F32"                        value="34"/>
+               <value name="U32U32"                        value="35"/>
+               <value name="S32S32"                        value="36"/>
+               <value name="X24U8F32"                      value="37"/>
+               <value name="X24X8F32"                      value="38"/>
+               <value name="X24G8X32"                      value="39"/>
+               <value name="YUV420_2PLANE"                 value="54"/>
+               <value name="YVU420_2PLANE"                 value="55"/>
+               <value name="YUV420_3PLANE"                 value="56"/>
+               <value name="YVU420_3PLANE"                 value="57"/>
+               <value name="U8U8U8"                        value="58"/>
+               <value name="S8S8S8"                        value="59"/>
+               <value name="A2F10F10F10"                   value="60"/>
+               <value name="F32F32F32F32"                  value="61"/>
+               <value name="U32U32U32U32"                  value="62"/>
+               <value name="S32S32S32S32"                  value="63"/>
+               <value name="F32F32F32"                     value="64"/>
+               <value name="U32U32U32"                     value="65"/>
+               <value name="S32S32S32"                     value="66"/>
+               <value name="A2_XRBIAS_U10U10U10"           value="67"/>
+               <value name="O8"                            value="81"/>
+               <value name="O8O8"                          value="82"/>
+               <value name="R5G5B5A1"                      value="83"/>
+               <value name="B6G5sR5s"                      value="84"/>
+               <value name="R10B10G10A2"                   value="85"/>
+               <value name="X8S8S8U8"                      value="86"/>
+               <value name="U24ST8"                        value="87"/>
+               <value name="F10F11F11"                     value="88"/>
+               <value name="VYUY"                          value="89"/>
+               <value name="UYVY"                          value="90"/>
+               <value name="YVYU"                          value="91"/>
+               <value name="YUYV"                          value="92"/>
+               <value name="F10F10F10A2"                   value="93"/>
+               <value name="YUV420_2PLANE_MACRO_BLOCK"     value="94"/>
+               <value name="YVU420_2PLANE_MACRO_BLOCK"     value="95"/>
+               <value name="ARGBV16_XR10"                  value="96"/>
+               <value name="YVU8_422_2PLANE_PACK8"         value="97"/>
+               <value name="YVU8_444_2PLANE_PACK8"         value="98"/>
+               <value name="YVU10_444_1PLANE_PACK10"       value="99"/>
+               <value name="YVU10_422_2PLANE_PACK16"       value="100"/>
+               <value name="YVU10_420_2PLANE_PACK16"       value="101"/>
+               <value name="YVU10_444_2PLANE_PACK16"       value="102"/>
+               <value name="YUV8_422_2PLANE_PACK8"         value="103"/>
+               <value name="YUV8_444_3PLANE_PACK8"         value="104"/>
+               <value name="YUV10_444_3PLANE_PACK16"       value="105"/>
+               <value name="YVU10_420_2PLANE_PACK10"       value="106"/>
+               <value name="YUV10_420_2PLANE_PACK10"       value="107"/>
+               <value name="YVU10_422_2PLANE_PACK10"       value="108"/>
+               <value name="YUV10_422_2PLANE_PACK10"       value="109"/>
+               <value name="YUV10_444_3PLANE_PACK10"       value="110"/>
+               <value name="R8G8_B8G8"                     value="111"/>
+               <value name="G8R8_G8B8"                     value="112"/>
+               <value name="YVYU_IMPLIED_CSC"              value="113"/>
+               <value name="VYUY_IMPLIED_CSC"              value="114"/>
+               <value name="YUV10_420_2PLANE_PACK10_R"     value="115"/>
+               <value name="YVU10_420_2PLANE_PACK10_R"     value="116"/>
+               <value name="YUV10_420_2PLANE_PACK10_T"     value="117"/>
+               <value name="YVU10_420_2PLANE_PACK10_T"     value="118"/>
+               <value name="YUV8_420_2PLANE_PACK8_P"       value="119"/>
+               <value name="YVU8_420_2PLANE_PACK8_P"       value="120"/>
+               <value name="YUV8_420_2PLANE_PACK8_F"       value="121"/>
+               <value name="YVU8_420_2PLANE_PACK8_F"       value="122"/>
+               <value name="COMPRESSED_ASTC_4x4"           value="0"/>
+               <value name="COMPRESSED_ASTC_5x4"           value="1"/>
+               <value name="COMPRESSED_ASTC_5x5"           value="2"/>
+               <value name="COMPRESSED_ASTC_6x5"           value="3"/>
+               <value name="COMPRESSED_ASTC_6x6"           value="4"/>
+               <value name="COMPRESSED_ASTC_8x5"           value="5"/>
+               <value name="COMPRESSED_ASTC_8x6"           value="6"/>
+               <value name="COMPRESSED_ASTC_8x8"           value="7"/>
+               <value name="COMPRESSED_ASTC_10x5"          value="8"/>
+               <value name="COMPRESSED_ASTC_10x6"          value="9"/>
+               <value name="COMPRESSED_ASTC_10x8"          value="10"/>
+               <value name="COMPRESSED_ASTC_10x10"         value="11"/>
+               <value name="COMPRESSED_ASTC_12x10"         value="12"/>
+               <value name="COMPRESSED_ASTC_12x12"         value="13"/>
+               <value name="COMPRESSED_PVRT2BPP"           value="40"/>
+               <value name="COMPRESSED_PVRT4BPP"           value="41"/>
+               <value name="COMPRESSED_PVRTII2BPP"         value="42"/>
+               <value name="COMPRESSED_PVRTII4BPP"         value="43"/>
+               <value name="COMPRESSED_UBC1"               value="44"/>
+               <value name="COMPRESSED_UBC2"               value="45"/>
+               <value name="COMPRESSED_UBC3"               value="46"/>
+               <value name="COMPRESSED_UBC4"               value="47"/>
+               <value name="COMPRESSED_SBC4"               value="48"/>
+               <value name="COMPRESSED_UBC5"               value="49"/>
+               <value name="COMPRESSED_SBC5"               value="50"/>
+               <value name="COMPRESSED_UBC6"               value="51"/>
+               <value name="COMPRESSED_SBC6"               value="52"/>
+               <value name="COMPRESSED_UBC7"               value="53"/>
+               <value name="COMPRESSED_ETC2_RGB"           value="68"/>
+               <value name="COMPRESSED_ETC2A_RGBA"         value="69"/>
+               <value name="COMPRESSED_ETC2_PUNCHTHROUGHA" value="70"/>
+               <value name="COMPRESSED_EAC_R11_UNSIGNED"   value="71"/>
+               <value name="COMPRESSED_EAC_R11_SIGNED"     value="72"/>
+               <value name="COMPRESSED_EAC_RG11_UNSIGNED"  value="73"/>
+               <value name="COMPRESSED_EAC_RG11_SIGNED"    value="74"/>
+               <value name="INVALID"                       value="0xFFFFFFFF"/>
+       </enum>
+
+       <enum name="GAMMA">
+               <value name="OFF" value="0"/>
+               <value name="ON"  value="1"/>
+       </enum>
+
+       <enum name="SWIZ">
+               <value name="SRCCHAN_0" value="0"/>
+               <value name="SRCCHAN_1" value="1"/>
+               <value name="SRCCHAN_2" value="2"/>
+               <value name="SRCCHAN_3" value="3"/>
+               <value name="SRC_ONE"   value="4"/>
+               <value name="SRC_ZERO"  value="5"/>
+       </enum>
+
+       <enum name="TEXTYPE">
+               <value name="1D"            value="0"/>
+               <value name="2D"            value="1"/>
+               <value name="3D"            value="2"/>
+               <value name="CUBE"          value="3"/>
+               <value name="STRIDE"        value="4"/>
+               <value name="PAGETILE"      value="5"/>
+               <value name="BUFFER_LOOKUP" value="6"/>
+       </enum>
+
+       <enum name="TWOCOMP_GAMMA">
+               <value name="OFF" value="0"/>
+               <value name="R"   value="1"/>
+               <value name="RG"  value="3"/>
+       </enum>
+
+       <struct name="IMAGE_WORD0" length="2">
+               <field name="smpcnt" start="62" end="63" type="uint"/>
+               <field name="height" start="48" end="61" type="uint"/>
+               <field name="width" start="34" end="47" type="uint"/>
+               <field name="texformat" start="27" end="33" type="FORMAT"/>
+               <field name="minlod" start="17" end="26" type="uint"/>
+               <field name="swiz0" start="14" end="16" type="SWIZ"/>
+               <field name="swiz1" start="11" end="13" type="SWIZ"/>
+               <field name="swiz2" start="8" end="10" type="SWIZ"/>
+               <field name="swiz3" start="5" end="7" type="SWIZ"/>
+               <field name="twocomp_gamma" start="3" end="4" type="TWOCOMP_GAMMA"/>
+               <field name="gamma" start="3" end="3" type="GAMMA"/>
+               <field name="textype" start="0" end="2" type="TEXTYPE"/>
+       </struct>
+
+       <struct name="IMAGE_WORD1" length="2">
+               <field name="baselevel" start="60" end="63" type="uint"/>
+               <field name="alpha_msb" start="59" end="59" type="bool"/>
+               <field name="border" start="58" end="58" type="bool"/>
+               <condition type="if" check="TPU_IMAGE_STATE_V2"/>
+                       <condition type="if" check="TFBC"/>
+                               <field name="lossy_compression_mode" start="56" end="57" type="COMPRESSION_LEVEL"/>
+                       <condition type="endif" check="TFBC"/>
+                       <field name="tpu_image_state_v2_compression_mode" start="54" end="56" type="COMPRESSION_MODE"/>
+               <condition type="else" check="TPU_IMAGE_STATE_V2"/>
+                       <field name="tile_size" start="57" end="57" type="bool"/>
+                       <field name="index_lookup" start="56" end="56" type="bool"/>
+                       <field name="frame_buffer_compression" start="55" end="55" type="bool"/>
+                       <field name="frame_buffer_compression_addressing_mode" start="54" end="54" type="bool"/>
+               <condition type="endif" check="TPU_IMAGE_STATE_V2"/>
+               <field name="texaddr" start="16" end="53" shift="2" type="address"/>
+               <field name="mipmaps_present" start="15" end="15" type="bool"/>
+               <field name="depth" start="4" end="14" type="uint"/>
+               <field name="num_mip_levels" start="0" end="3" type="uint"/>
+       </struct>
+
+       <struct name="STRIDE_IMAGE_WORD1" length="2">
+               <field name="num_mip_levels" start="60" end="63" type="uint"/>
+               <field name="alpha_msb" start="59" end="59" type="bool"/>
+               <condition type="if" check="TPU_IMAGE_STATE_V2"/>
+                       <condition type="if" check="TFBC"/>
+                               <field name="lossy_compression_mode" start="56" end="57" type="COMPRESSION_LEVEL"/>
+                       <condition type="endif" check="TFBC"/>
+                       <field name="tpu_image_state_v2_compression_mode" start="54" end="56" type="COMPRESSION_MODE"/>
+               <condition type="else" check="TPU_IMAGE_STATE_V2"/>
+                       <field name="tile_size" start="57" end="57" type="bool"/>
+                       <field name="index_lookup" start="56" end="56" type="bool"/>
+                       <field name="frame_buffer_compression" start="55" end="55" type="bool"/>
+                       <field name="frame_buffer_compression_addressing_mode" start="54" end="54" type="bool"/>
+               <condition type="endif" check="TPU_IMAGE_STATE_V2"/>
+               <field name="texaddr" start="16" end="53" shift="2" type="address"/>
+               <field name="mipmaps_present" start="15" end="15" type="bool"/>
+               <field name="stride" start="0" end="14" type="uint"/>
+       </struct>
+
+       <struct name="SAMPLER" length="2">
+               <field name="texaddr_plane2_lo" start="50" end="63" shift="2" type="address"/>
+               <field name="cmp_mode" start="59" end="61" type="CMP_MODE"/>
+               <field name="addrmode_w" start="56" end="58" type="ADDRMODE"/>
+               <field name="bordercolor_index" start="50" end="55" type="uint"/>
+               <field name="non_normalized_coords" start="49" end="49" type="bool"/>
+               <field name="lumakey_alphamult" start="48" end="48" type="bool"/>
+               <field name="lumakey" start="47" end="47" type="bool"/>
+               <field name="addrmode_v" start="44" end="46" type="ADDRMODE"/>
+               <field name="addrmode_u" start="41" end="43" type="ADDRMODE"/>
+               <field name="mipfilter" start="40" end="40" type="bool"/>
+               <field name="minfilter" start="38" end="39" type="FILTER"/>
+               <field name="magfilter" start="36" end="37" type="FILTER"/>
+               <field name="anisoctl" start="33" end="35" type="ANISOCTL"/>
+               <field name="maxlod" start="23" end="32" type="CLAMP"/>
+               <field name="minlod" start="13" end="22" type="CLAMP"/>
+               <field name="dadjust" start="0" end="12" type="DADJUST"/>
+       </struct>
+
+</csbgen>
diff --git a/src/imagination/csbgen/rogue_vdm.xml b/src/imagination/csbgen/rogue_vdm.xml
new file mode 100644 (file)
index 0000000..a0b2376
--- /dev/null
@@ -0,0 +1,256 @@
+<?xml version="1.0" ?>
+
+<!--
+Copyright © 2022 Imagination Technologies Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice (including the next
+paragraph) shall be included in all copies or substantial portions of the
+Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+-->
+
+<csbgen name="ROGUE" prefix="VDMCTRL">
+
+       <enum name="BLOCK_TYPE">
+               <value name="PPP_STATE_UPDATE" value="0"/>
+               <value name="PDS_STATE_UPDATE" value="1"/>
+               <value name="VDM_STATE_UPDATE" value="2"/>
+               <value name="INDEX_LIST"       value="3"/>
+               <value name="STREAM_LINK"      value="4"/>
+               <value name="STREAM_RETURN"    value="5"/>
+               <value name="STREAM_TERMINATE" value="6"/>
+               <value name="CONTROL"          value="7"/>
+       </enum>
+
+       <enum name="DM_TARGET">
+               <value name="VDM" value="0"/>
+               <value name="DDM" value="1"/>
+       </enum>
+
+       <enum name="FLATSHADE_CONTROL">
+               <value name="VERTEX_0" value="0"/>
+               <value name="VERTEX_1" value="1"/>
+               <value name="VERTEX_2" value="2"/>
+       </enum>
+
+       <enum name="INDEX_SIZE">
+               <value name="B8"  value="0"/>
+               <value name="B16" value="1"/>
+               <value name="B32" value="2"/>
+       </enum>
+
+       <enum name="PRIMITIVE_TOPOLOGY">
+               <value name="POINT_LIST"     value="0"/>
+               <value name="LINE_LIST"      value="1"/>
+               <value name="LINE_LIST_ADJ"  value="2"/>
+               <value name="LINE_STRIP"     value="3"/>
+               <value name="LINE_STRIP_ADJ" value="4"/>
+               <value name="LINE_LOOP"      value="5"/>
+               <value name="TRI_LIST"       value="6"/>
+               <value name="TRI_LIST_ADJ"   value="7"/>
+               <value name="TRI_LIST_EDGE"  value="8"/>
+               <value name="TRI_STRIP"      value="9"/>
+               <value name="TRI_STRIP_ADJ"  value="10"/>
+               <value name="TRI_FAN"        value="11"/>
+               <value name="PATCH_LIST"     value="12"/>
+       </enum>
+
+       <enum name="SD_TYPE">
+               <value name="NONE" value="0"/>
+               <value name="PDS"  value="1"/>
+               <value name="USC"  value="2"/>
+       </enum>
+
+       <enum name="USC_TARGET">
+               <value name="ALL" value="0"/>
+               <value name="ANY" value="1"/>
+       </enum>
+
+       <enum name="UVS_SCRATCH_SIZE_SELECT">
+               <value name="FIVE" value="0"/>
+               <value name="ONE"  value="1"/>
+       </enum>
+
+       <struct name="PPP_STATE0" length="1">
+               <field name="block_type" start="29" end="31" type="BLOCK_TYPE" default="PPP_STATE_UPDATE"/>
+               <field name="word_count" start="8" end="15" type="uint"/>
+               <field name="addrmsb" start="0" end="7" shift="32" type="address"/>
+       </struct>
+
+       <struct name="PPP_STATE1" length="1">
+               <field name="addrlsb" start="2" end="31" shift="2" type="address"/>
+       </struct>
+
+       <struct name="PDS_STATE0" length="1">
+               <field name="block_type" start="29" end="31" type="BLOCK_TYPE" default="PDS_STATE_UPDATE"/>
+               <field name="dm_target" start="28" end="28" type="DM_TARGET"/>
+               <field name="usc_target" start="25" end="25" type="USC_TARGET"/>
+               <field name="usc_common_size" start="16" end="24" type="uint">
+                       <define name="UNIT_SIZE" value="64"/>
+               </field>
+               <field name="usc_unified_size" start="10" end="15" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+               <field name="pds_temp_size" start="6" end="9" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+               <field name="pds_data_size" start="0" end="5" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+       </struct>
+
+       <struct name="PDS_STATE1" length="1">
+               <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+               <field name="pds_data_addr" start="4" end="31" shift="4" type="address"/>
+               <field name="sd_type" start="2" end="3" type="SD_TYPE"/>
+               <field name="sd_next_type" start="0" end="1" type="SD_TYPE"/>
+       </struct>
+
+       <struct name="PDS_STATE2" length="1">
+               <!-- This is an offset actually. Note for when we auto-generate the xmls. -->
+               <field name="pds_code_addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="STREAM_LINK0" length="1">
+               <field name="block_type" start="29" end="31" type="BLOCK_TYPE" default="STREAM_LINK"/>
+               <field name="with_return" start="28" end="28" type="bool"/>
+               <field name="compare_present" start="27" end="27" type="bool"/>
+               <field name="compare_mode" start="24" end="26" type="uint"/>
+               <field name="compare_data" start="8" end="23" type="uint"/>
+               <field name="link_addrmsb" start="0" end="7" shift="32" type="address"/>
+       </struct>
+
+       <struct name="STREAM_LINK1" length="1">
+               <field name="link_addrlsb" start="2" end="31" shift="2" type="address"/>
+       </struct>
+
+       <struct name="STREAM_RETURN" length="1">
+               <field name="block_type" start="29" end="31" type="BLOCK_TYPE" default="STREAM_RETURN"/>
+       </struct>
+
+       <struct name="STREAM_TERMINATE" length="1">
+               <field name="block_type" start="29" end="31" type="BLOCK_TYPE" default="STREAM_TERMINATE"/>
+               <field name="context" start="0" end="0" type="bool"/>
+       </struct>
+
+       <struct name="VDM_STATE0" length="1">
+               <field name="block_type" start="29" end="31" type="BLOCK_TYPE" default="VDM_STATE_UPDATE"/>
+               <field name="cut_index_present" start="28" end="28" type="bool"/>
+               <field name="vs_data_addr_present" start="27" end="27" type="bool"/>
+               <field name="vs_other_present" start="26" end="26" type="bool"/>
+               <field name="ds_present" start="24" end="24" type="bool"/>
+               <field name="gs_present" start="23" end="23" type="bool"/>
+               <field name="hs_present" start="22" end="22" type="bool"/>
+               <field name="cam_size" start="7" end="14" type="uint"/>
+               <field name="uvs_scratch_size_select" start="6" end="6" type="UVS_SCRATCH_SIZE_SELECT"/>
+               <field name="cut_index_enable" start="5" end="5" type="bool"/>
+               <field name="tess_enable" start="4" end="4" type="bool"/>
+               <field name="gs_enable" start="3" end="3" type="bool"/>
+               <field name="flatshade_control" start="1" end="2" type="FLATSHADE_CONTROL"/>
+               <field name="generate_primitive_id" start="0" end="0" type="bool"/>
+       </struct>
+
+       <struct name="VDM_STATE1" length="1">
+               <field name="cut_index" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="VDM_STATE2" length="1">
+               <field name="vs_pds_data_base_addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="VDM_STATE3" length="1">
+               <field name="vs_pds_code_base_addr" start="4" end="31" shift="4" type="address"/>
+       </struct>
+
+       <struct name="VDM_STATE4" length="1">
+               <field name="vs_output_size" start="0" end="7" type="uint">
+                       <define name="UNIT_SIZE" value="4"/>
+               </field>
+       </struct>
+
+       <struct name="VDM_STATE5" length="1">
+               <field name="vs_max_instances_ext" start="31" end="31" type="bool"/>
+               <field name="vs_max_instances" start="25" end="29" type="uint"/>
+               <field name="vs_usc_common_size" start="16" end="24" type="uint">
+                       <define name="UNIT_SIZE" value="64"/>
+               </field>
+               <field name="vs_usc_unified_size" start="10" end="15" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+               <field name="vs_pds_temp_size" start="6" end="9" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+               <field name="vs_pds_data_size" start="0" end="5" type="uint">
+                       <define name="UNIT_SIZE" value="16"/>
+               </field>
+       </struct>
+
+       <struct name="INDEX_LIST0" length="1">
+               <field name="block_type" start="29" end="31" type="BLOCK_TYPE" default="INDEX_LIST"/>
+               <field name="index_addr_present" start="28" end="28" type="bool"/>
+               <field name="index_count_present" start="27" end="27" type="bool"/>
+               <field name="index_instance_count_present" start="26" end="26" type="bool"/>
+               <field name="index_offset_present" start="25" end="25" type="bool"/>
+               <field name="start_present" start="24" end="24" type="bool"/>
+               <field name="indirect_addr_present" start="23" end="23" type="bool"/>
+               <field name="split_count_present" start="22" end="22" type="bool"/>
+               <condition type="if" check="VDM_DEGENERATE_CULLING"/>
+                       <field name="degen_cull_enable" start="19" end="19" type="bool"/>
+               <condition type="endif" check="VDM_DEGENERATE_CULLING"/>
+               <field name="index_size" start="17" end="18" type="INDEX_SIZE"/>
+               <field name="patch_count" start="12" end="16" type="uint"/>
+               <field name="primitive_topology" start="8" end="11" type="PRIMITIVE_TOPOLOGY"/>
+               <field name="index_base_addrmsb" start="0" end="7" shift="32" type="address"/>
+       </struct>
+
+       <struct name="INDEX_LIST1" length="1">
+               <field name="index_base_addrlsb" start="0" end="31" shift="0" type="address"/>
+       </struct>
+
+       <struct name="INDEX_LIST2" length="1">
+               <field name="index_count" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="INDEX_LIST3" length="1">
+               <field name="instance_count" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="INDEX_LIST4" length="1">
+               <field name="index_offset" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="INDEX_LIST5" length="1">
+               <field name="start_index" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="INDEX_LIST6" length="1">
+               <field name="start_instance" start="0" end="31" type="uint"/>
+       </struct>
+
+       <struct name="INDEX_LIST7" length="1">
+               <field name="indirect_base_addrmsb" start="0" end="7" shift="32" type="address"/>
+       </struct>
+
+       <struct name="INDEX_LIST8" length="1">
+               <field name="indirect_base_addrlsb" start="2" end="31" shift="2" type="address"/>
+       </struct>
+
+       <struct name="INDEX_LIST9" length="1">
+               <field name="split_count" start="0" end="15" type="uint"/>
+       </struct>
+
+</csbgen>
diff --git a/src/imagination/include/hwdef/rogue_hw_defs.h b/src/imagination/include/hwdef/rogue_hw_defs.h
new file mode 100644 (file)
index 0000000..1ee1eb5
--- /dev/null
@@ -0,0 +1,127 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* This file is based on rgxdefs.h and should only contain object-like macros.
+ * Any function-like macros or inline functions should instead appear in
+ * rogue_hw_utils.h.
+ */
+
+#ifndef ROGUE_HW_DEFS_H
+#define ROGUE_HW_DEFS_H
+
+#include <stdint.h>
+
+#include "util/macros.h"
+
+#define ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT 12U
+#define ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE \
+   BITFIELD_BIT(ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT)
+
+/* ISP triangle merging constants. */
+/* tan(15) (0x3E8930A3) */
+#define ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR 0.267949f
+/* tan(60) (0x3FDDB3D7) */
+#define ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR 1.732051f
+#define ROGUE_ISP_MERGE_SCALE_FACTOR 16.0f
+
+#define ROGUE_MAX_INSTR_BYTES 32U
+
+/* MList entry stride in bytes */
+#define ROGUE_MLIST_ENTRY_STRIDE 4U
+
+/* VCE & TE share virtual space and Alist. */
+#define ROGUE_NUM_PM_ADDRESS_SPACES 2U
+
+/* PM Maximum addressable limit (as determined by the size field of the
+ * PM_*_FSTACK registers).
+ */
+#define ROGUE_PM_MAX_PB_VIRT_ADDR_SPACE UINT64_C(0x400000000)
+
+/* Vheap entry size in bytes. */
+#define ROGUE_PM_VHEAP_ENTRY_SIZE 4U
+
+#define ROGUE_RTC_SIZE_IN_BYTES 256U
+
+#define ROGUE_NUM_VCE 1U
+
+#define ROGUE_NUM_TEAC 1U
+
+#define ROGUE_NUM_TE 1U
+
+/* Tail pointer size in bytes. */
+#define ROGUE_TAIL_POINTER_SIZE 8U
+
+/* Tail pointer cache line size. */
+#define ROGUE_TE_TPC_CACHE_LINE_SIZE 64U
+
+#define ROGUE_MAX_VERTEX_SHARED_REGISTERS 1024U
+
+#define ROGUE_MAX_PIXEL_SHARED_REGISTERS 1024U
+
+/* Number of CR_PDS_BGRND values that need setting up. */
+#define ROGUE_NUM_CR_PDS_BGRND_WORDS 3U
+
+/* Number of PBESTATE_REG_WORD values that need setting up. */
+#define ROGUE_NUM_PBESTATE_REG_WORDS 3U
+
+/* Number of PBESTATE_STATE_WORD values that need setting up. */
+#define ROGUE_NUM_PBESTATE_STATE_WORDS 2U
+
+/* Number of TEXSTATE_IMAGE_WORD values that need setting up. */
+#define ROGUE_NUM_TEXSTATE_IMAGE_WORDS 2U
+
+#define ROGUE_MAX_RENDER_TARGETS 2048U
+
+/* 12 dwords reserved for shared register management. The first dword is the
+ * number of shared register blocks to reload. Should be a multiple of 4 dwords,
+ * size in bytes.
+ */
+#define ROGUE_LLS_SHARED_REGS_RESERVE_SIZE 48U
+
+#define ROGUE_USC_TASK_PROGRAM_SIZE 512U
+
+#define ROGUE_CSRM_LINE_SIZE_IN_DWORDS (64U * 4U * 4U)
+
+/* The maximum amount of local memory which can be allocated by a single kernel
+ * (in dwords/32-bit registers).
+ *
+ * ROGUE_CDMCTRL_USC_COMMON_SIZE_UNIT_SIZE is in bytes so we divide by four.
+ */
+#define ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS        \
+   ((ROGUE_CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE *  \
+     ROGUE_CDMCTRL_KERNEL0_USC_COMMON_SIZE_MAX_SIZE) >> \
+    2)
+
+#define ROGUE_MAX_INSTANCES_PER_TASK \
+   (ROGUE_CDMCTRL_KERNEL8_MAX_INSTANCES_MAX_SIZE + 1U)
+
+/* Optimal number for packing work groups into a slot. */
+#define ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK 8U
+
+/* The maximum number of pixel task instances which might be running overlapped
+ * with compute. Once we have 8 pixel task instances we have a complete set and
+ * task will be able to run and allocations will be freed.
+ */
+#define ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES 7U
+
+#endif /* ROGUE_HW_DEFS_H */
diff --git a/src/imagination/include/hwdef/rogue_hw_utils.h b/src/imagination/include/hwdef/rogue_hw_utils.h
new file mode 100644 (file)
index 0000000..fc1e4e6
--- /dev/null
@@ -0,0 +1,379 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* This file is based on rgxdefs.h and should only contain function-like macros
+ * and inline functions. Any object-like macros should instead appear in
+ * rogue_hw_defs.h.
+ */
+
+#ifndef ROGUE_HW_UTILS_H
+#define ROGUE_HW_UTILS_H
+
+#include <stdint.h>
+
+#include "pvr_winsys.h"
+
+#define __pvr_address_type pvr_dev_addr_t
+#define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr).addr
+
+#include "csbgen/rogue_cdm.h"
+#include "csbgen/rogue_lls.h"
+
+#undef __pvr_get_address
+#undef __pvr_address_type
+
+#include "rogue_hw_defs.h"
+#include "pvr_device_info.h"
+#include "util/compiler.h"
+#include "util/macros.h"
+
+static inline void
+rogue_get_isp_samples_per_tile_xy(const struct pvr_device_info *dev_info,
+                                  uint32_t samples,
+                                  uint32_t *const x_out,
+                                  uint32_t *const y_out)
+{
+   const uint32_t tile_size_x =
+      PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0U);
+   const uint32_t tile_size_y =
+      PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0U);
+   const uint32_t samples_per_pixel =
+      PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0U);
+
+#if !defined(NDEBUG)
+   switch (samples_per_pixel) {
+   case 1:
+   case 2:
+   case 4:
+      break;
+   default:
+      assert(!"Unsupported ISP samples per pixel");
+   }
+#endif
+
+   *x_out = tile_size_x;
+   *y_out = tile_size_y;
+
+   switch (samples) {
+   default:
+      assert(!"Unsupported number of samples");
+      FALLTHROUGH;
+   case 1:
+      break;
+   case 2:
+      if (samples_per_pixel == 2 || samples_per_pixel == 4)
+         *y_out *= 2;
+
+      break;
+   case 4:
+      if (samples_per_pixel == 2 || samples_per_pixel == 4)
+         *x_out *= 2;
+
+      if (samples_per_pixel == 2)
+         *y_out *= 2;
+
+      break;
+   case 8:
+      *y_out *= 2;
+      break;
+   }
+}
+
+static inline uint64_t
+rogue_get_min_free_list_size(const struct pvr_device_info *dev_info)
+{
+   uint64_t min_num_pages;
+
+   if (PVR_HAS_FEATURE(dev_info, roguexe)) {
+      if (PVR_HAS_QUIRK(dev_info, 66011))
+         min_num_pages = 40U;
+      else
+         min_num_pages = 25U;
+   } else {
+      min_num_pages = 50U;
+   }
+
+   return min_num_pages << ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
+}
+
+static inline uint32_t
+rogue_get_max_num_vdm_pds_tasks(const struct pvr_device_info *dev_info)
+{
+   /* Default value based on the minimum value found in all existing cores. */
+   uint32_t max_usc_tasks = PVR_GET_FEATURE_VALUE(dev_info, max_usc_tasks, 24U);
+
+   /* FIXME: Where does the 9 come from? */
+   return max_usc_tasks - 9;
+}
+
+static inline uint32_t
+rogue_get_max_output_regs_per_pixel(const struct pvr_device_info *dev_info)
+{
+   if (PVR_HAS_FEATURE(dev_info, eight_output_registers))
+      return 8U;
+
+   return 4U;
+}
+
+static inline void
+rogue_get_num_macrotiles_xy(const struct pvr_device_info *dev_info,
+                            uint32_t *const x_out,
+                            uint32_t *const y_out)
+{
+   uint32_t version;
+
+   if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version))
+      version = 0;
+
+   if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ||
+       version == 2) {
+      *x_out = 4;
+      *y_out = 4;
+   } else {
+      *x_out = 1;
+      *y_out = 1;
+   }
+}
+
+static inline uint32_t
+rogue_get_macrotile_array_size(const struct pvr_device_info *dev_info)
+{
+   uint32_t num_macrotiles_x;
+   uint32_t num_macrotiles_y;
+
+   if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
+      return 0;
+
+   rogue_get_num_macrotiles_xy(dev_info, &num_macrotiles_x, &num_macrotiles_y);
+
+   return num_macrotiles_x * num_macrotiles_y * 8U;
+}
+
+/* To get the number of required Bernado/Phantom(s), divide the number of
+ * clusters by 4 and round up.
+ */
+static inline uint32_t
+rogue_get_num_phantoms(const struct pvr_device_info *dev_info)
+{
+   return DIV_ROUND_UP(PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U), 4U);
+}
+
+/* Region header size in bytes. */
+static inline uint32_t
+rogue_get_region_header_size(const struct pvr_device_info *dev_info)
+{
+   uint32_t version;
+
+   if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version))
+      version = 0;
+
+   if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
+       version == 2) {
+      return 6;
+   }
+
+   return 5;
+}
+
+/* Return the total reserved size of partition in dwords. */
+static inline uint32_t
+rogue_get_total_reserved_partition_size(const struct pvr_device_info *dev_info)
+{
+   uint32_t tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0);
+   uint32_t tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0);
+   uint32_t max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0);
+
+   if (tile_size_x == 16 && tile_size_y == 16) {
+      return tile_size_x * tile_size_y * max_partitions *
+             PVR_GET_FEATURE_VALUE(dev_info,
+                                   usc_min_output_registers_per_pix,
+                                   0);
+   }
+
+   return max_partitions * 1024U;
+}
+
+static inline uint32_t
+rogue_get_render_size_max(const struct pvr_device_info *dev_info)
+{
+   if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format))
+      if (!PVR_HAS_FEATURE(dev_info, screen_size8K))
+         return 4096U;
+
+   return 8192U;
+}
+
+#define rogue_get_render_size_max_x(dev_info) \
+   rogue_get_render_size_max(dev_info)
+
+#define rogue_get_render_size_max_y(dev_info) \
+   rogue_get_render_size_max(dev_info)
+
+static inline uint32_t
+rogue_get_slc_cache_line_size(const struct pvr_device_info *dev_info)
+{
+   return PVR_GET_FEATURE_VALUE(dev_info, slc_cache_line_size_bits, 8U) / 8U;
+}
+
+static inline uint32_t pvr_get_max_user_vertex_output_components(
+   const struct pvr_device_info *dev_info)
+{
+   const uint32_t uvs_pba_entries =
+      PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 0U);
+   const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 0U);
+
+   if (uvs_banks <= 8U && uvs_pba_entries == 160U)
+      return 64U;
+
+   return 128U;
+}
+
+static inline uint32_t
+rogue_get_reserved_shared_size(const struct pvr_device_info *dev_info)
+{
+   uint32_t common_store_size_in_dwords =
+      PVR_GET_FEATURE_VALUE(dev_info,
+                            common_store_size_in_dwords,
+                            512U * 4U * 4U);
+   uint32_t reserved_shared_size =
+      common_store_size_in_dwords - (256U * 4U) -
+      rogue_get_total_reserved_partition_size(dev_info);
+
+   if (PVR_HAS_QUIRK(dev_info, 44079)) {
+      uint32_t common_store_split_point = (768U * 4U * 4U);
+
+      return MIN2(common_store_split_point - (256U * 4U), reserved_shared_size);
+   }
+
+   return reserved_shared_size;
+}
+
+static inline uint32_t
+rogue_max_compute_shared_registers(const struct pvr_device_info *dev_info)
+{
+   if (PVR_HAS_FEATURE(dev_info, compute))
+      return 2U * 1024U;
+
+   return 0U;
+}
+
+static inline uint32_t
+rogue_get_max_coeffs(const struct pvr_device_info *dev_info)
+{
+   uint32_t max_coeff_additional_portion = ROGUE_MAX_VERTEX_SHARED_REGISTERS;
+   uint32_t pending_allocation_shared_regs = 2U * 1024U;
+   uint32_t pending_allocation_coeff_regs = 0U;
+   uint32_t num_phantoms = rogue_get_num_phantoms(dev_info);
+   uint32_t tiles_in_flight =
+      PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0);
+   uint32_t max_coeff_pixel_portion =
+      DIV_ROUND_UP(tiles_in_flight, num_phantoms);
+
+   max_coeff_pixel_portion *= ROGUE_MAX_PIXEL_SHARED_REGISTERS;
+
+   /* Compute tasks on cores with BRN48492 and without compute overlap may lock
+    * up without two additional lines of coeffs.
+    */
+   if (PVR_HAS_QUIRK(dev_info, 48492) &&
+       !PVR_HAS_FEATURE(dev_info, compute_overlap)) {
+      pending_allocation_coeff_regs = 2U * 1024U;
+   }
+
+   if (PVR_HAS_ERN(dev_info, 38748))
+      pending_allocation_shared_regs = 0U;
+
+   if (PVR_HAS_ERN(dev_info, 38020)) {
+      max_coeff_additional_portion +=
+         rogue_max_compute_shared_registers(dev_info);
+   }
+
+   return rogue_get_reserved_shared_size(dev_info) +
+          pending_allocation_coeff_regs -
+          (max_coeff_pixel_portion + max_coeff_additional_portion +
+           pending_allocation_shared_regs);
+}
+
+static inline uint32_t
+rogue_get_cdm_context_resume_buffer_size(const struct pvr_device_info *dev_info)
+{
+   if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support)) {
+      const uint32_t max_num_cores =
+         PVR_GET_FEATURE_VALUE(dev_info, xpu_max_slaves, 0U) + 1U;
+      const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+      const uint32_t cdm_context_resume_buffer_stride =
+         ALIGN_POT(ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE, cache_line_size);
+
+      return cdm_context_resume_buffer_stride * max_num_cores;
+   }
+
+   return ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_SIZE;
+}
+
+static inline uint32_t rogue_get_cdm_context_resume_buffer_alignment(
+   const struct pvr_device_info *dev_info)
+{
+   if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support))
+      return rogue_get_slc_cache_line_size(dev_info);
+
+   return ROGUE_LLS_CDM_CONTEXT_RESUME_BUFFER_ALIGNMENT;
+}
+
+static inline uint32_t
+rogue_get_cdm_max_local_mem_size_regs(const struct pvr_device_info *dev_info)
+{
+   uint32_t available_coeffs_in_dwords = rogue_get_max_coeffs(dev_info);
+
+   if (PVR_HAS_QUIRK(dev_info, 48492) && PVR_HAS_FEATURE(dev_info, roguexe) &&
+       !PVR_HAS_FEATURE(dev_info, compute_overlap)) {
+      /* Driver must not use the 2 reserved lines. */
+      available_coeffs_in_dwords -= ROGUE_CSRM_LINE_SIZE_IN_DWORDS * 2;
+   }
+
+   /* The maximum amount of local memory available to a kernel is the minimum
+    * of the total number of coefficient registers available and the max common
+    * store allocation size which can be made by the CDM.
+    *
+    * If any coeff lines are reserved for tessellation or pixel then we need to
+    * subtract those too.
+    */
+   return MIN2(available_coeffs_in_dwords,
+               ROGUE_MAX_PER_KERNEL_LOCAL_MEM_SIZE_REGS);
+}
+
+static inline uint32_t
+rogue_get_compute_max_work_group_size(const struct pvr_device_info *dev_info)
+{
+   /* The number of tasks which can be executed per USC - Limited to 16U by the
+    * CDM.
+    */
+   const uint32_t max_tasks_per_usc = 16U;
+
+   if (!PVR_HAS_ERN(dev_info, 35421)) {
+      /* Barriers on work-groups > 32 instances aren't supported. */
+      return ROGUE_MAX_INSTANCES_PER_TASK;
+   }
+
+   return ROGUE_MAX_INSTANCES_PER_TASK * max_tasks_per_usc;
+}
+
+#endif /* ROGUE_HW_UTILS_H */
diff --git a/src/imagination/include/pvr_rogue_fw.h b/src/imagination/include/pvr_rogue_fw.h
new file mode 100644 (file)
index 0000000..a737ac6
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* This file is based on rgx_fwif_shared.h and rgx_fwif_client.h. It contains
+ * information about the firmware that is needed by the driver.
+ */
+
+#ifndef PVR_ROGUE_FW_H
+#define PVR_ROGUE_FW_H
+
+/**
+ * Maximum PB free list size supported by RGX and Services.
+ *
+ * Maximum PB free list size must ensure that no PM address space can be fully
+ * used, because if the full address space was used it would wrap and corrupt
+ * itself. Since there are two freelists (local is always minimum sized) this
+ * can be described as following three conditions being met:
+ *
+ *  Minimum PB + Maximum PB < ALIST PM address space size (16GB)
+ *  Minimum PB + Maximum PB < TE PM address space size (16GB) / NUM_TE_PIPES
+ *  Minimum PB + Maximum PB < VCE PM address space size (16GB) / NUM_VCE_PIPES
+ *
+ * Since the max of NUM_TE_PIPES and NUM_VCE_PIPES is 4, we have a hard limit
+ * of 4GB minus the Minimum PB. For convenience we take the smaller power-of-2
+ * value of 2GB. This is far more than any normal application would request
+ * or use.
+ */
+#define ROGUE_FREE_LIST_MAX_SIZE (2ULL * 1024ULL * 1024ULL * 1024ULL)
+
+/* FIXME: This will change based on the firmware configuration, which will vary
+ * depending on the BVNC and firmware version. The powervr KM driver allows this
+ * information to be queried, but the pvrsrvkm KM driver doesn't. This
+ * information should really be queried from the winsys.
+ */
+/* Indicates the number of Render Target Datas in a Render Target Dataset. */
+#define ROGUE_NUM_RTDATAS 2U
+
+#endif /* PVR_ROGUE_FW_H */
diff --git a/src/imagination/meson.build b/src/imagination/meson.build
new file mode 100644 (file)
index 0000000..626a03a
--- /dev/null
@@ -0,0 +1,33 @@
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+inc_imagination = include_directories([
+  '.',
+  'common',
+  'include',
+])
+
+if with_imagination_vk
+  subdir('common')
+  subdir('csbgen')
+  subdir('rogue')
+  subdir('vulkan')
+endif
diff --git a/src/imagination/rogue/meson.build b/src/imagination/rogue/meson.build
new file mode 100644 (file)
index 0000000..240dfa2
--- /dev/null
@@ -0,0 +1,77 @@
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+libpowervr_rogue_files = files(
+  'nir/rogue_nir_constreg.c',
+  'nir/rogue_nir_lower_io.c',
+  'nir/rogue_nir_pfo.c',
+
+  'rogue.c',
+  'rogue_build_data.c',
+  'rogue_compiler.c',
+  'rogue_constreg.c',
+  'rogue_dump.c',
+  'rogue_encode.c',
+  'rogue_encoders.c',
+  'rogue_instr.c',
+  'rogue_nir.c',
+  'rogue_operand.c',
+  'rogue_regalloc.c',
+  'rogue_shader.c',
+  'rogue_util.c',
+  'rogue_validate.c',
+)
+
+libpowervr_rogue = shared_library(
+  'powervr_rogue',
+  libpowervr_rogue_files,
+  include_directories : [
+    inc_imagination,
+    inc_include,
+    inc_compiler,
+    inc_src,
+    inc_mapi,
+    inc_mesa,
+    inc_gallium,
+    inc_gallium_aux,
+  ],
+  c_args : [c_msvc_compat_args, no_override_init_args],
+  gnu_symbol_visibility : 'hidden',
+  dependencies : [idep_mesautil, idep_nir, dep_csbgen],
+  install : true,
+)
+
+rogue_compiler = executable(
+  'rogue_compiler',
+  'tools/offline_compiler.c',
+  link_with : [libpowervr_rogue],
+  dependencies : [idep_mesautil, idep_nir],
+  include_directories : [
+    inc_mesa,
+    inc_include,
+    inc_src,
+    inc_mapi,
+    inc_gallium,
+    inc_gallium_aux,
+    inc_compiler,
+  ],
+  install : false,
+)
diff --git a/src/imagination/rogue/nir/rogue_nir_constreg.c b/src/imagination/rogue/nir/rogue_nir_constreg.c
new file mode 100644 (file)
index 0000000..30bda0c
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "nir/nir_search_helpers.h"
+#include "rogue_constreg.h"
+#include "rogue_nir.h"
+
+/* TODO: optimize: if value is in const regs, replace, else, use shared regs and
+ * notify driver they need to be populated?
+ */
+
+/* Replaces multiple ssa uses from load_const with a single use -> a register.
+ */
+void rogue_nir_constreg(nir_shader *shader)
+{
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+   nir_builder b;
+
+   nir_builder_init(&b, impl);
+
+   /* Find load_const instructions. */
+   nir_foreach_block (block, impl) {
+      nir_foreach_instr_safe (instr, block) {
+         if (instr->type != nir_instr_type_load_const)
+            continue;
+
+         nir_load_const_instr *load_const = nir_instr_as_load_const(instr);
+
+         /* Skip values that can be pulled from constant registers. */
+         uint32_t value = nir_const_value_as_uint(load_const->value[0], 32);
+         size_t const_reg = rogue_constreg_lookup(value);
+         if (const_reg != ROGUE_NO_CONST_REG)
+            continue;
+
+         b.cursor = nir_after_instr(&load_const->instr);
+         nir_ssa_def *mov = nir_mov(&b, &load_const->def);
+
+         nir_foreach_use_safe (use_src, &load_const->def) {
+            if (use_src->parent_instr == mov->parent_instr)
+               continue;
+
+            /* Skip when used as an index for intrinsics, as we want to
+             * access that value directly.
+             */
+            if (use_src->parent_instr->type == nir_instr_type_intrinsic)
+               continue;
+
+            nir_instr_rewrite_src_ssa(use_src->parent_instr, use_src, mov);
+         }
+      }
+   }
+}
diff --git a/src/imagination/rogue/nir/rogue_nir_lower_io.c b/src/imagination/rogue/nir/rogue_nir_lower_io.c
new file mode 100644 (file)
index 0000000..63cbae1
--- /dev/null
@@ -0,0 +1,171 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "nir/nir_search_helpers.h"
+#include "rogue_nir.h"
+#include "rogue_nir_helpers.h"
+
+static void lower_vulkan_resource_index(nir_builder *b,
+                                        nir_intrinsic_instr *intr,
+                                        void *pipeline_layout)
+{
+   unsigned desc_set = nir_intrinsic_desc_set(intr);
+   unsigned binding = nir_intrinsic_binding(intr);
+
+   nir_ssa_def *def = nir_vec3(b,
+                               nir_imm_int(b, desc_set),
+                               nir_imm_int(b, binding),
+                               nir_imm_int(b, 0));
+   nir_ssa_def_rewrite_uses(&intr->dest.ssa, def);
+   nir_instr_remove(&intr->instr);
+}
+
+static void lower_load_vulkan_descriptor(nir_builder *b,
+                                         nir_intrinsic_instr *intr)
+{
+   /* Loading the descriptor happens as part of the load/store instruction so
+    * this is a no-op.
+    */
+
+   nir_ssa_def_rewrite_uses(&intr->dest.ssa, intr->src[0].ssa);
+   nir_instr_remove(&intr->instr);
+}
+
+static void lower_load_ubo_to_scalar(nir_builder *b, nir_intrinsic_instr *intr)
+{
+   /* Scalarize the load_ubo. */
+   b->cursor = nir_before_instr(&intr->instr);
+
+   assert(intr->dest.is_ssa);
+   assert(intr->num_components > 1);
+
+   nir_ssa_def *loads[NIR_MAX_VEC_COMPONENTS];
+
+   for (uint8_t i = 0; i < intr->num_components; i++) {
+      size_t scaled_range = nir_intrinsic_range(intr) / intr->num_components;
+      nir_intrinsic_instr *chan_intr =
+         nir_intrinsic_instr_create(b->shader, intr->intrinsic);
+      nir_ssa_dest_init(&chan_intr->instr,
+                        &chan_intr->dest,
+                        1,
+                        intr->dest.ssa.bit_size,
+                        NULL);
+      chan_intr->num_components = 1;
+
+      nir_intrinsic_set_access(chan_intr, nir_intrinsic_access(intr));
+      nir_intrinsic_set_align_mul(chan_intr, nir_intrinsic_align_mul(intr));
+      nir_intrinsic_set_align_offset(chan_intr,
+                                     nir_intrinsic_align_offset(intr));
+      nir_intrinsic_set_range_base(chan_intr,
+                                   nir_intrinsic_range_base(intr) +
+                                      (i * intr->num_components));
+      nir_intrinsic_set_range(chan_intr, scaled_range);
+
+      /* Base (desc_set, binding). */
+      nir_src_copy(&chan_intr->src[0], &intr->src[0]);
+
+      /* Offset (unused). */
+      chan_intr->src[1] = nir_src_for_ssa(nir_imm_int(b, 0));
+
+      nir_builder_instr_insert(b, &chan_intr->instr);
+
+      loads[i] = &chan_intr->dest.ssa;
+   }
+
+   nir_ssa_def_rewrite_uses(&intr->dest.ssa,
+                            nir_vec(b, loads, intr->num_components));
+   nir_instr_remove(&intr->instr);
+}
+
+static bool
+lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr, void *layout)
+{
+   switch (instr->intrinsic) {
+   case nir_intrinsic_load_vulkan_descriptor:
+      lower_load_vulkan_descriptor(b, instr);
+      return true;
+
+   case nir_intrinsic_vulkan_resource_index:
+      lower_vulkan_resource_index(b, instr, layout);
+      return true;
+
+   case nir_intrinsic_load_ubo:
+      lower_load_ubo_to_scalar(b, instr);
+      return true;
+
+   default:
+      break;
+   }
+
+   return false;
+}
+
+static bool lower_impl(nir_function_impl *impl, void *layout)
+{
+   bool progress = false;
+   nir_builder b;
+
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block (block, impl) {
+      nir_foreach_instr_safe (instr, block) {
+         b.cursor = nir_before_instr(instr);
+         switch (instr->type) {
+         case nir_instr_type_intrinsic:
+            progress |=
+               lower_intrinsic(&b, nir_instr_as_intrinsic(instr), layout);
+            break;
+
+         default:
+            break;
+         }
+      }
+   }
+
+   if (progress)
+      nir_metadata_preserve(impl, nir_metadata_none);
+   else
+      nir_metadata_preserve(impl, nir_metadata_all);
+
+   return progress;
+}
+
+bool rogue_nir_lower_io(nir_shader *shader, void *layout)
+{
+   bool progress = false;
+
+   nir_foreach_function (function, shader) {
+      if (function->impl)
+         progress |= lower_impl(function->impl, layout);
+   }
+
+   if (progress)
+      nir_opt_dce(shader);
+
+   return progress;
+}
diff --git a/src/imagination/rogue/nir/rogue_nir_pfo.c b/src/imagination/rogue/nir/rogue_nir_pfo.c
new file mode 100644 (file)
index 0000000..d14d11b
--- /dev/null
@@ -0,0 +1,86 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "nir/nir_search_helpers.h"
+#include "rogue_nir.h"
+
+static void insert_pfo(nir_builder *b,
+                       nir_intrinsic_instr *store_output,
+                       nir_src *output_src)
+{
+   /* TODO: Support complex PFO with blending. */
+   /* TODO: Verify type is vec4. */
+
+   /* Pack the output color components into U8888 format. */
+   nir_ssa_def *new_output_src_ssa = nir_pack_unorm_4x8(b, output_src->ssa);
+   nir_src new_output_src = nir_src_for_ssa(new_output_src_ssa);
+
+   /* Update the store_output intrinsic. */
+   nir_instr_rewrite_src(&store_output->instr, output_src, new_output_src);
+   nir_intrinsic_set_write_mask(store_output, 1);
+   store_output->num_components = 1;
+   nir_intrinsic_set_src_type(store_output, nir_type_uint32);
+}
+
+void rogue_nir_pfo(nir_shader *shader)
+{
+   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
+   nir_builder b;
+
+   /* Only apply to fragment shaders. */
+   if (shader->info.stage != MESA_SHADER_FRAGMENT)
+      return;
+
+   nir_builder_init(&b, impl);
+
+   nir_foreach_block (block, impl) {
+      nir_foreach_instr_safe (instr, block) {
+         if (instr->type == nir_instr_type_intrinsic) {
+            /* Find the store_output intrinsic and pack the output value. */
+            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+
+            if (intr->intrinsic != nir_intrinsic_store_output)
+               continue;
+
+            b.cursor = nir_before_instr(&intr->instr);
+            insert_pfo(&b, intr, &intr->src[0]);
+         } else if (instr->type == nir_instr_type_deref) {
+            /* Find variable derefs and update their type. */
+            nir_deref_instr *deref = nir_instr_as_deref(instr);
+
+            if (!nir_deref_mode_is(deref, nir_var_shader_out))
+               continue;
+
+            if (deref->deref_type != nir_deref_type_var)
+               continue;
+
+            nir_variable *out = nir_deref_instr_get_variable(deref);
+
+            deref->type = glsl_uintN_t_type(32);
+            out->type = glsl_uintN_t_type(32);
+         }
+      }
+   }
+}
diff --git a/src/imagination/rogue/rogue.c b/src/imagination/rogue/rogue.c
new file mode 100644 (file)
index 0000000..5481bc9
--- /dev/null
@@ -0,0 +1,789 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "compiler/shader_enums.h"
+#include "compiler/spirv/nir_spirv.h"
+#include "nir/nir.h"
+#include "rogue.h"
+#include "rogue_build_data.h"
+#include "rogue_compiler.h"
+#include "rogue_constreg.h"
+#include "rogue_encode.h"
+#include "rogue_nir.h"
+#include "rogue_nir_helpers.h"
+#include "rogue_operand.h"
+#include "rogue_regalloc.h"
+#include "rogue_shader.h"
+#include "rogue_validate.h"
+#include "util/macros.h"
+#include "util/memstream.h"
+#include "util/ralloc.h"
+
+/**
+ * \file rogue.c
+ *
+ * \brief Contains the top-level Rogue compiler interface for Vulkan driver and
+ * the offline compiler.
+ */
+
+/**
+ * \brief Converts a SPIR-V shader to NIR.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] stage Shader stage.
+ * \param[in] spirv_size SPIR-V data length in DWORDs.
+ * \param[in] spirv_data SPIR-V data.
+ * \param[in] num_spec Number of SPIR-V specializations.
+ * \param[in] spec SPIR-V specializations.
+ * \return A nir_shader* if successful, or NULL if unsuccessful.
+ */
+nir_shader *rogue_spirv_to_nir(struct rogue_build_ctx *ctx,
+                               gl_shader_stage stage,
+                               const char *entry,
+                               size_t spirv_size,
+                               const uint32_t *spirv_data,
+                               unsigned num_spec,
+                               struct nir_spirv_specialization *spec)
+{
+   nir_shader *nir;
+
+   nir = spirv_to_nir(spirv_data,
+                      spirv_size,
+                      spec,
+                      num_spec,
+                      stage,
+                      entry,
+                      rogue_get_spirv_options(ctx->compiler),
+                      rogue_get_compiler_options(ctx->compiler));
+   if (!nir)
+      return NULL;
+
+   ralloc_steal(ctx, nir);
+
+   /* Apply passes. */
+   if (!rogue_nir_passes(ctx, nir, stage)) {
+      ralloc_free(nir);
+      return NULL;
+   }
+
+   /* Collect I/O data to pass back to the driver. */
+   if (!rogue_collect_io_data(ctx, nir)) {
+      ralloc_free(nir);
+      return NULL;
+   }
+
+   return nir;
+}
+
+/**
+ * \brief Converts a Rogue shader to binary.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] shader Rogue shader.
+ * \return A rogue_shader_binary* if successful, or NULL if unsuccessful.
+ */
+struct rogue_shader_binary *rogue_to_binary(struct rogue_build_ctx *ctx,
+                                            const struct rogue_shader *shader)
+{
+   struct rogue_shader_binary *binary;
+   struct u_memstream mem;
+   size_t buf_size;
+   char *buf;
+
+   if (!rogue_validate_shader(shader))
+      return NULL;
+
+   if (!u_memstream_open(&mem, &buf, &buf_size))
+      return NULL;
+
+   if (!rogue_encode_shader(shader, u_memstream_get(&mem))) {
+      u_memstream_close(&mem);
+      free(buf);
+      return NULL;
+   }
+
+   u_memstream_close(&mem);
+
+   binary = rzalloc_size(ctx, sizeof(*binary) + buf_size);
+   if (!binary) {
+      free(buf);
+      return NULL;
+   }
+
+   binary->size = buf_size;
+   memcpy(binary->data, buf, buf_size);
+
+   free(buf);
+
+   return binary;
+}
+
+static bool
+setup_alu_dest(struct rogue_instr *instr, size_t dest_index, nir_alu_instr *alu)
+{
+   assert(dest_index == 0);
+
+   /* Dest validation. */
+   assert(nir_dest_num_components(alu->dest.dest) == 1 ||
+          nir_dest_num_components(alu->dest.dest) == 4);
+   assert(nir_dest_bit_size(alu->dest.dest) == 32);
+
+   size_t nir_dest_reg = nir_alu_dest_regindex(alu);
+
+   if (nir_dest_num_components(alu->dest.dest) == 1) {
+      CHECK(rogue_instr_set_operand_vreg(instr, dest_index, nir_dest_reg));
+   } else {
+      size_t comp = nir_alu_dest_comp(alu);
+      CHECK(rogue_instr_set_operand_vreg_vec(instr,
+                                             dest_index,
+                                             comp,
+                                             nir_dest_reg));
+   }
+
+   return true;
+}
+
+static bool trans_constreg_operand(struct rogue_instr *instr,
+                                   size_t operand_index,
+                                   uint32_t const_value)
+{
+   size_t const_reg = rogue_constreg_lookup(const_value);
+
+   /* Only values that can be sourced from const regs should be left from the
+    * rogue_nir_constreg pass.
+    */
+   assert(const_reg != ROGUE_NO_CONST_REG);
+
+   CHECK(rogue_instr_set_operand_reg(instr,
+                                     operand_index,
+                                     ROGUE_OPERAND_TYPE_REG_CONST,
+                                     const_reg));
+
+   return true;
+}
+
+static bool trans_nir_alu_fmax(struct rogue_shader *shader, nir_alu_instr *alu)
+{
+   /* Src validation. */
+   assert(nir_src_num_components(alu->src[0].src) == 1);
+   assert(nir_src_bit_size(alu->src[0].src) == 32);
+
+   assert(nir_src_num_components(alu->src[1].src) == 1);
+   assert(nir_src_bit_size(alu->src[1].src) == 32);
+
+   struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MAX);
+
+   CHECK(setup_alu_dest(instr, 0, alu));
+
+   for (size_t u = 0; u < nir_op_infos[nir_op_fmax].num_inputs; ++u) {
+      /* Handle values that can be pulled from const regs. */
+      if (nir_alu_src_is_const(alu, u)) {
+         CHECK(trans_constreg_operand(instr, u + 1, nir_alu_src_const(alu, u)));
+         continue;
+      }
+
+      size_t nir_src_reg = nir_alu_src_regindex(alu, u);
+
+      CHECK(rogue_instr_set_operand_vreg(instr, u + 1, nir_src_reg));
+   }
+
+   return true;
+}
+
+static bool trans_nir_alu_fmin(struct rogue_shader *shader, nir_alu_instr *alu)
+{
+   /* Src validation. */
+   assert(nir_src_num_components(alu->src[0].src) == 1);
+   assert(nir_src_bit_size(alu->src[0].src) == 32);
+
+   assert(nir_src_num_components(alu->src[1].src) == 1);
+   assert(nir_src_bit_size(alu->src[1].src) == 32);
+
+   struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MIN);
+
+   CHECK(setup_alu_dest(instr, 0, alu));
+
+   for (size_t u = 0; u < nir_op_infos[nir_op_fmin].num_inputs; ++u) {
+      /* Handle values that can be pulled from const regs. */
+      if (nir_alu_src_is_const(alu, u)) {
+         CHECK(trans_constreg_operand(instr, u + 1, nir_alu_src_const(alu, u)));
+         continue;
+      }
+
+      size_t nir_src_reg = nir_alu_src_regindex(alu, u);
+
+      CHECK(rogue_instr_set_operand_vreg(instr, u + 1, nir_src_reg));
+   }
+
+   return true;
+}
+
+static bool trans_nir_alu_mov_imm(struct rogue_shader *shader,
+                                  nir_alu_instr *alu)
+{
+   /* Src validation. */
+   assert(nir_src_num_components(alu->src[0].src) == 1);
+   assert(nir_src_bit_size(alu->src[0].src) == 32);
+
+   uint32_t value = nir_alu_src_const(alu, 0);
+
+   struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MOV_IMM);
+
+   CHECK(setup_alu_dest(instr, 0, alu));
+   CHECK(rogue_instr_set_operand_imm(instr, 1, value));
+
+   return true;
+}
+
+static bool trans_nir_alu_mov(struct rogue_shader *shader, nir_alu_instr *alu)
+{
+   /* Constant value that isn't in constregs. */
+   if (nir_alu_src_is_const(alu, 0) &&
+       nir_dest_num_components(alu->dest.dest) == 1)
+      return trans_nir_alu_mov_imm(shader, alu);
+
+   /* Src validation. */
+   assert(nir_src_num_components(alu->src[0].src) == 1);
+   assert(nir_src_bit_size(alu->src[0].src) == 32);
+
+   struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MOV);
+
+   CHECK(setup_alu_dest(instr, 0, alu));
+
+   /* Handle values that can be pulled from const regs. */
+   if (nir_alu_src_is_const(alu, 0)) {
+      return trans_constreg_operand(instr, 1, nir_alu_src_const(alu, 0));
+   }
+
+   size_t nir_src_reg = nir_alu_src_regindex(alu, 0);
+   CHECK(rogue_instr_set_operand_vreg(instr, 1, nir_src_reg));
+
+   return true;
+}
+
+static bool trans_nir_alu_pack_unorm_4x8(struct rogue_shader *shader,
+                                         nir_alu_instr *alu)
+{
+   /* Src/dest validation. */
+   assert(nir_dest_num_components(alu->dest.dest) == 1);
+   assert(nir_dest_bit_size(alu->dest.dest) == 32);
+
+   assert(nir_src_num_components(alu->src[0].src) == 4);
+   assert(nir_src_bit_size(alu->src[0].src) == 32);
+
+   size_t nir_src_reg = nir_alu_src_regindex(alu, 0);
+   size_t nir_dest_reg = nir_alu_dest_regindex(alu);
+
+   struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_PACK_U8888);
+
+   CHECK(rogue_instr_set_operand_vreg(instr, 0, nir_dest_reg));
+
+   /* Ensure all 4 components are being sourced in order. */
+   for (size_t u = 0; u < nir_src_num_components(alu->src[0].src); ++u)
+      assert(alu->src->swizzle[u] == u);
+
+   CHECK(rogue_instr_set_operand_vreg_vec(instr,
+                                          1,
+                                          ROGUE_COMPONENT_ALL,
+                                          nir_src_reg));
+
+   return true;
+}
+
+static bool trans_nir_alu_fmul(struct rogue_shader *shader, nir_alu_instr *alu)
+{
+   /* Src validation. */
+   assert(nir_src_num_components(alu->src[0].src) == 1);
+   assert(nir_src_bit_size(alu->src[0].src) == 32);
+
+   assert(nir_src_num_components(alu->src[1].src) == 1);
+   assert(nir_src_bit_size(alu->src[1].src) == 32);
+
+   size_t nir_in_reg_a = nir_alu_src_regindex(alu, 0);
+   size_t nir_in_reg_b = nir_alu_src_regindex(alu, 1);
+
+   struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MUL);
+
+   CHECK(setup_alu_dest(instr, 0, alu));
+   CHECK(rogue_instr_set_operand_vreg(instr, 1, nir_in_reg_a));
+   CHECK(rogue_instr_set_operand_vreg(instr, 2, nir_in_reg_b));
+
+   return true;
+}
+
+static bool trans_nir_alu_ffma(struct rogue_shader *shader, nir_alu_instr *alu)
+{
+   /* Src validation. */
+   assert(nir_src_num_components(alu->src[0].src) == 1);
+   assert(nir_src_bit_size(alu->src[0].src) == 32);
+
+   assert(nir_src_num_components(alu->src[1].src) == 1);
+   assert(nir_src_bit_size(alu->src[1].src) == 32);
+
+   assert(nir_src_num_components(alu->src[2].src) == 1);
+   assert(nir_src_bit_size(alu->src[2].src) == 32);
+
+   size_t nir_in_reg_a = nir_alu_src_regindex(alu, 0);
+   size_t nir_in_reg_b = nir_alu_src_regindex(alu, 1);
+   size_t nir_in_reg_c = nir_alu_src_regindex(alu, 2);
+
+   struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_FMA);
+
+   CHECK(setup_alu_dest(instr, 0, alu));
+   CHECK(rogue_instr_set_operand_vreg(instr, 1, nir_in_reg_a));
+   CHECK(rogue_instr_set_operand_vreg(instr, 2, nir_in_reg_b));
+   CHECK(rogue_instr_set_operand_vreg(instr, 3, nir_in_reg_c));
+
+   return true;
+}
+
+static bool trans_nir_alu(struct rogue_shader *shader, nir_alu_instr *alu)
+{
+   switch (alu->op) {
+   case nir_op_fmax:
+      return trans_nir_alu_fmax(shader, alu);
+
+   case nir_op_fmin:
+      return trans_nir_alu_fmin(shader, alu);
+
+   case nir_op_pack_unorm_4x8:
+      return trans_nir_alu_pack_unorm_4x8(shader, alu);
+
+   case nir_op_mov:
+      return trans_nir_alu_mov(shader, alu);
+
+   case nir_op_fmul:
+      return trans_nir_alu_fmul(shader, alu);
+
+   case nir_op_ffma:
+      return trans_nir_alu_ffma(shader, alu);
+
+   default:
+      break;
+   }
+
+   unreachable("Unimplemented NIR ALU instruction.");
+}
+
+static bool trans_nir_intrinsic_load_input_fs(struct rogue_shader *shader,
+                                              nir_intrinsic_instr *intr)
+{
+   struct rogue_fs_build_data *fs_data = &shader->ctx->stage_data.fs;
+
+   /* Src/dest validation. */
+   assert(nir_dest_num_components(intr->dest) == 1);
+   assert(nir_dest_bit_size(intr->dest) == 32);
+
+   assert(nir_src_num_components(intr->src[0]) == 1);
+   assert(nir_src_bit_size(intr->src[0]) == 32);
+   assert(nir_intr_src_is_const(intr, 0));
+
+   /* Intrinsic index validation. */
+   assert(nir_intrinsic_dest_type(intr) == nir_type_float32);
+
+   struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr);
+   size_t component = nir_intrinsic_component(intr);
+   size_t coeff_index = rogue_coeff_index_fs(&fs_data->iterator_args,
+                                             io_semantics.location,
+                                             component);
+   size_t wcoeff_index = rogue_coeff_index_fs(&fs_data->iterator_args, ~0, 0);
+   size_t drc_num = rogue_acquire_drc(shader);
+   uint64_t source_count = nir_dest_num_components(intr->dest);
+
+   size_t nir_dest_reg = nir_intr_dest_regindex(intr);
+
+   /* pixiter.w instruction. */
+   struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_PIX_ITER_W);
+
+   CHECK(rogue_instr_set_operand_vreg(instr, 0, nir_dest_reg));
+   CHECK(rogue_instr_set_operand_drc(instr, 1, drc_num));
+   CHECK(rogue_instr_set_operand_reg(instr,
+                                     2,
+                                     ROGUE_OPERAND_TYPE_REG_COEFF,
+                                     coeff_index));
+   CHECK(rogue_instr_set_operand_reg(instr,
+                                     3,
+                                     ROGUE_OPERAND_TYPE_REG_COEFF,
+                                     wcoeff_index));
+   CHECK(rogue_instr_set_operand_imm(instr, 4, source_count));
+
+   /* wdf instruction must follow the pixiter.w. */
+   instr = rogue_shader_insert(shader, ROGUE_OP_WDF);
+
+   CHECK(rogue_instr_set_operand_drc(instr, 0, drc_num));
+   rogue_release_drc(shader, drc_num);
+
+   return true;
+}
+
+static bool trans_nir_intrinsic_load_input_vs(struct rogue_shader *shader,
+                                              nir_intrinsic_instr *intr)
+{
+   /* Src/dest validation. */
+   assert(nir_dest_num_components(intr->dest) == 1);
+   assert(nir_dest_bit_size(intr->dest) == 32);
+
+   assert(nir_src_num_components(intr->src[0]) == 1);
+   assert(nir_src_bit_size(intr->src[0]) == 32);
+   assert(nir_intr_src_is_const(intr, 0));
+
+   /* Intrinsic index validation. */
+   assert(nir_intrinsic_dest_type(intr) == nir_type_float32);
+
+   size_t component = nir_intrinsic_component(intr);
+   struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr);
+   size_t vi_reg_index = ((io_semantics.location - VERT_ATTRIB_GENERIC0) * 3) +
+                         component; /* TODO: get these properly with the
+                                     * intrinsic index (ssa argument)
+                                     */
+
+   size_t nir_dest_reg = nir_intr_dest_regindex(intr);
+
+   struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MOV);
+
+   CHECK(rogue_instr_set_operand_vreg(instr, 0, nir_dest_reg));
+   CHECK(rogue_instr_set_operand_reg(instr,
+                                     1,
+                                     ROGUE_OPERAND_TYPE_REG_VERTEX_IN,
+                                     vi_reg_index));
+
+   return true;
+}
+
+static bool trans_nir_intrinsic_load_input(struct rogue_shader *shader,
+                                           nir_intrinsic_instr *intr)
+{
+   switch (shader->stage) {
+   case MESA_SHADER_FRAGMENT:
+      return trans_nir_intrinsic_load_input_fs(shader, intr);
+
+   case MESA_SHADER_VERTEX:
+      return trans_nir_intrinsic_load_input_vs(shader, intr);
+
+   default:
+      break;
+   }
+
+   unreachable("Unimplemented NIR load_input variant.");
+}
+
+static bool trans_nir_intrinsic_store_output_fs(struct rogue_shader *shader,
+                                                nir_intrinsic_instr *intr)
+{
+   /* Src/dest validation. */
+   assert(nir_src_num_components(intr->src[0]) == 1);
+   assert(nir_src_bit_size(intr->src[0]) == 32);
+   assert(!nir_intr_src_is_const(intr, 0));
+
+   assert(nir_src_num_components(intr->src[1]) == 1);
+   assert(nir_src_bit_size(intr->src[1]) == 32);
+   assert(nir_intr_src_is_const(intr, 1));
+
+   /* Intrinsic index validation. */
+   assert(nir_intrinsic_src_type(intr) == nir_type_uint32);
+
+   /* Fetch the output offset. */
+   /* TODO: Is this really the right value to use for pixel out reg. num? */
+   size_t offset = nir_intr_src_const(intr, 1);
+
+   /* Fetch the components. */
+   size_t src_reg = nir_intr_src_regindex(intr, 0);
+
+   /* mov.olchk instruction. */
+   struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MOV);
+
+   CHECK(rogue_instr_set_operand_reg(instr,
+                                     0,
+                                     ROGUE_OPERAND_TYPE_REG_PIXEL_OUT,
+                                     offset));
+   CHECK(rogue_instr_set_operand_vreg(instr, 1, src_reg));
+   CHECK(rogue_instr_set_flag(instr, ROGUE_INSTR_FLAG_OLCHK));
+
+   return true;
+}
+
+static bool trans_nir_intrinsic_store_output_vs(struct rogue_shader *shader,
+                                                nir_intrinsic_instr *intr)
+{
+   struct rogue_vs_build_data *vs_data = &shader->ctx->stage_data.vs;
+
+   /* Src/dest validation. */
+   assert(nir_src_num_components(intr->src[0]) == 1);
+   assert(nir_src_bit_size(intr->src[0]) == 32);
+   assert(!nir_intr_src_is_const(intr, 0));
+
+   assert(nir_src_num_components(intr->src[1]) == 1);
+   assert(nir_src_bit_size(intr->src[1]) == 32);
+   assert(nir_intr_src_is_const(intr, 1));
+
+   /* Intrinsic index validation. */
+   assert(nir_intrinsic_src_type(intr) == nir_type_float32);
+   assert(util_bitcount(nir_intrinsic_write_mask(intr)) == 1);
+
+   struct nir_io_semantics io_semantics = nir_intrinsic_io_semantics(intr);
+   size_t component = nir_intrinsic_component(intr);
+   size_t vo_index = rogue_output_index_vs(&vs_data->outputs,
+                                           io_semantics.location,
+                                           component);
+
+   size_t src_reg = nir_intr_src_regindex(intr, 0);
+
+   struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_VTXOUT);
+
+   CHECK(rogue_instr_set_operand_imm(instr, 0, vo_index));
+   CHECK(rogue_instr_set_operand_vreg(instr, 1, src_reg));
+
+   return true;
+}
+
+static bool trans_nir_intrinsic_store_output(struct rogue_shader *shader,
+                                             nir_intrinsic_instr *intr)
+{
+   switch (shader->stage) {
+   case MESA_SHADER_FRAGMENT:
+      return trans_nir_intrinsic_store_output_fs(shader, intr);
+
+   case MESA_SHADER_VERTEX:
+      return trans_nir_intrinsic_store_output_vs(shader, intr);
+
+   default:
+      break;
+   }
+
+   unreachable("Unimplemented NIR store_output variant.");
+}
+
+static bool trans_nir_intrinsic_load_ubo(struct rogue_shader *shader,
+                                         nir_intrinsic_instr *intr)
+{
+   struct rogue_ubo_data *ubo_data =
+      &shader->ctx->common_data[shader->stage].ubo_data;
+
+   /* Src/dest validation. */
+   assert(nir_dest_num_components(intr->dest) == 1);
+   assert(nir_dest_bit_size(intr->dest) == 32);
+
+   assert(nir_src_num_components(intr->src[0]) == 2);
+   assert(nir_src_bit_size(intr->src[0]) == 32);
+   assert(nir_intr_src_is_const(intr, 0));
+
+   assert(nir_src_num_components(intr->src[1]) == 1);
+   assert(nir_src_bit_size(intr->src[1]) == 32);
+   assert(nir_intr_src_is_const(intr, 1));
+
+   /* Intrinsic index validation. */
+   assert((nir_intrinsic_range_base(intr) % ROGUE_REG_SIZE_BYTES) == 0);
+   assert(nir_intrinsic_range(intr) == ROGUE_REG_SIZE_BYTES);
+
+   size_t nir_dest_reg = nir_intr_dest_regindex(intr);
+
+   size_t desc_set = nir_intr_src_comp_const(intr, 0, 0);
+   size_t binding = nir_intr_src_comp_const(intr, 0, 1);
+   size_t offset = nir_intrinsic_range_base(intr);
+
+   size_t sh_num = rogue_ubo_reg(ubo_data, desc_set, binding, offset);
+
+   struct rogue_instr *instr = rogue_shader_insert(shader, ROGUE_OP_MOV);
+
+   CHECK(rogue_instr_set_operand_vreg(instr, 0, nir_dest_reg));
+   CHECK(rogue_instr_set_operand_reg(instr,
+                                     1,
+                                     ROGUE_OPERAND_TYPE_REG_SHARED,
+                                     sh_num));
+   return true;
+}
+
+static bool trans_nir_intrinsic(struct rogue_shader *shader,
+                                nir_intrinsic_instr *intr)
+{
+   switch (intr->intrinsic) {
+   case nir_intrinsic_load_input:
+      return trans_nir_intrinsic_load_input(shader, intr);
+
+   case nir_intrinsic_store_output:
+      return trans_nir_intrinsic_store_output(shader, intr);
+
+   case nir_intrinsic_load_ubo:
+      return trans_nir_intrinsic_load_ubo(shader, intr);
+
+   default:
+      break;
+   }
+
+   unreachable("Unimplemented NIR intrinsic instruction.");
+}
+
+static bool trans_nir_load_const(struct rogue_shader *shader,
+                                 nir_load_const_instr *load_const)
+{
+   /* Src/dest validation. */
+   assert(load_const->def.bit_size == 32);
+
+   /* Ensure that two-component load_consts are used only by load_ubos. */
+   if (load_const->def.num_components == 2) {
+      nir_foreach_use (use_src, &load_const->def) {
+         nir_instr *instr = use_src->parent_instr;
+         assert(instr->type == nir_instr_type_intrinsic);
+
+         ASSERTED nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+         assert(intr->intrinsic == nir_intrinsic_load_ubo);
+      }
+   } else {
+      assert(load_const->def.num_components == 1);
+   }
+
+   /* TODO: This is currently done in MOV_IMM, but instead now would be the
+    * time to lookup the constant value, see if it lives in const regs, or if
+    * it needs to generate a MOV_IMM (or be constant calc-ed).
+    */
+   return true;
+}
+
+static bool trans_nir_jump_return(struct rogue_shader *shader,
+                                  nir_jump_instr *jump)
+{
+   enum rogue_opcode return_op;
+
+   switch (shader->stage) {
+   case MESA_SHADER_FRAGMENT:
+      return_op = ROGUE_OP_END_FRAG;
+      break;
+
+   case MESA_SHADER_VERTEX:
+      return_op = ROGUE_OP_END_VERT;
+      break;
+
+   default:
+      unreachable("Unimplemented NIR return instruction type.");
+   }
+
+   rogue_shader_insert(shader, return_op);
+
+   return true;
+}
+
+static bool trans_nir_jump(struct rogue_shader *shader, nir_jump_instr *jump)
+{
+   switch (jump->type) {
+   case nir_jump_return:
+      return trans_nir_jump_return(shader, jump);
+
+   default:
+      break;
+   }
+
+   unreachable("Unimplemented NIR jump instruction type.");
+}
+
+/**
+ * \brief Converts a NIR shader to Rogue.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] nir NIR shader.
+ * \return A rogue_shader* if successful, or NULL if unsuccessful.
+ */
+struct rogue_shader *rogue_nir_to_rogue(struct rogue_build_ctx *ctx,
+                                        const nir_shader *nir)
+{
+   gl_shader_stage stage = nir->info.stage;
+   struct rogue_shader *shader = rogue_shader_create(ctx, stage);
+   if (!shader)
+      return NULL;
+
+   /* Make sure we only have a single function. */
+   assert(exec_list_length(&nir->functions) == 1);
+
+   /* Translate shader entrypoint. */
+   nir_function_impl *entry = nir_shader_get_entrypoint((nir_shader *)nir);
+   nir_foreach_block (block, entry) {
+      nir_foreach_instr (instr, block) {
+         switch (instr->type) {
+         case nir_instr_type_alu:
+            /* TODO: Cleanup on failure. */
+            CHECKF(trans_nir_alu(shader, nir_instr_as_alu(instr)),
+                   "Failed to translate NIR ALU instruction.");
+            break;
+
+         case nir_instr_type_intrinsic:
+            CHECKF(trans_nir_intrinsic(shader, nir_instr_as_intrinsic(instr)),
+                   "Failed to translate NIR intrinsic instruction.");
+            break;
+
+         case nir_instr_type_load_const:
+            CHECKF(trans_nir_load_const(shader, nir_instr_as_load_const(instr)),
+                   "Failed to translate NIR load_const instruction.");
+            break;
+
+         case nir_instr_type_jump:
+            CHECKF(trans_nir_jump(shader, nir_instr_as_jump(instr)),
+                   "Failed to translate NIR jump instruction.");
+            break;
+
+         default:
+            unreachable("Unimplemented NIR instruction type.");
+         }
+      }
+   }
+
+   /* Perform register allocation. */
+   /* TODO: handle failure. */
+   if (!rogue_ra_alloc(&shader->instr_list,
+                       shader->ra,
+                       &ctx->common_data[stage].temps,
+                       &ctx->common_data[stage].internals))
+      return NULL;
+
+   return shader;
+}
+
+/**
+ * \brief Creates and sets up a shared multi-stage build context.
+ *
+ * \param[in] compiler The compiler context.
+ * \return A pointer to the new build context, or NULL on failure.
+ */
+struct rogue_build_ctx *
+rogue_create_build_context(struct rogue_compiler *compiler)
+{
+   struct rogue_build_ctx *ctx;
+
+   ctx = rzalloc_size(compiler, sizeof(*ctx));
+   if (!ctx)
+      return NULL;
+
+   ctx->compiler = compiler;
+
+   /* nir/rogue/binary shaders need to be default-zeroed;
+    * this is taken care of by rzalloc_size.
+    */
+
+   /* Setup non-zero defaults. */
+   ctx->stage_data.fs.msaa_mode = ROGUE_MSAA_MODE_PIXEL;
+
+   return ctx;
+}
diff --git a/src/imagination/rogue/rogue.h b/src/imagination/rogue/rogue.h
new file mode 100644 (file)
index 0000000..ca9ff8c
--- /dev/null
@@ -0,0 +1,75 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_H
+#define ROGUE_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "compiler/shader_enums.h"
+#include "nir/nir.h"
+
+/* All registers are 32-bit in size. */
+#define ROGUE_REG_SIZE_BYTES 4
+#define ROGUE_REG_UNUSED UINT32_MAX
+
+struct nir_spirv_specialization;
+struct rogue_build_ctx;
+struct rogue_shader;
+
+enum rogue_msaa_mode {
+   ROGUE_MSAA_MODE_UNDEF = 0, /* explicitly treat 0 as undefined */
+   /* One task for all samples. */
+   ROGUE_MSAA_MODE_PIXEL,
+   /* For on-edge pixels only: separate tasks for each sample. */
+   ROGUE_MSAA_MODE_SELECTIVE,
+   /* For all pixels: separate tasks for each sample. */
+   ROGUE_MSAA_MODE_FULL,
+};
+
+/**
+ * \brief Shader binary.
+ */
+struct rogue_shader_binary {
+   size_t size;
+   uint8_t data[];
+};
+
+PUBLIC
+nir_shader *rogue_spirv_to_nir(struct rogue_build_ctx *ctx,
+                               gl_shader_stage stage,
+                               const char *entry,
+                               size_t spirv_size,
+                               const uint32_t *spirv_data,
+                               unsigned num_spec,
+                               struct nir_spirv_specialization *spec);
+
+PUBLIC
+struct rogue_shader_binary *rogue_to_binary(struct rogue_build_ctx *ctx,
+                                            const struct rogue_shader *shader);
+
+PUBLIC
+struct rogue_shader *rogue_nir_to_rogue(struct rogue_build_ctx *ctx,
+                                        const nir_shader *nir);
+#endif /* ROGUE_H */
diff --git a/src/imagination/rogue/rogue_build_data.c b/src/imagination/rogue/rogue_build_data.c
new file mode 100644 (file)
index 0000000..010a109
--- /dev/null
@@ -0,0 +1,602 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "compiler/shader_enums.h"
+#include "nir/nir.h"
+#include "rogue_build_data.h"
+#include "rogue_nir_helpers.h"
+#include "rogue_operand.h"
+#include "util/macros.h"
+
+#define __pvr_address_type uint64_t
+#define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr)
+
+#include "csbgen/rogue_pds.h"
+
+#undef __pvr_get_address
+#undef __pvr_address_type
+
+/**
+ * \brief Allocates the coefficient registers that will contain the iterator
+ * data for the fragment shader input varyings.
+ *
+ * \param[in] args The iterator argument data.
+ * \return The total number of coefficient registers required by the iterators.
+ */
+static size_t alloc_iterator_regs(struct rogue_iterator_args *args)
+{
+   size_t coeffs = 0;
+
+   for (size_t u = 0; u < args->num_fpu_iterators; ++u) {
+      /* Ensure there aren't any gaps. */
+      assert(args->base[u] == ~0);
+
+      args->base[u] = coeffs;
+      coeffs += ROGUE_COEFF_ALIGN * args->components[u];
+   }
+
+   return coeffs;
+}
+
+/**
+ * \brief Reserves an iterator for a fragment shader input varying,
+ * and calculates its setup data.
+ *
+ * \param[in] args The iterator argument data.
+ * \param[in] i The iterator index.
+ * \param[in] type The interpolation type of the varying.
+ * \param[in] f16 Whether the data type is F16 or F32.
+ * \param[in] components The number of components in the varying.
+ */
+static void reserve_iterator(struct rogue_iterator_args *args,
+                             size_t i,
+                             enum glsl_interp_mode type,
+                             bool f16,
+                             size_t components)
+{
+   struct ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC data = { 0 };
+
+   assert(components >= 1 && components <= 4);
+
+   /* The first iterator (W) *must* be INTERP_MODE_NOPERSPECTIVE. */
+   assert(i > 0 || type == INTERP_MODE_NOPERSPECTIVE);
+   assert(i < ARRAY_SIZE(args->fpu_iterators));
+
+   switch (type) {
+   /* Default interpolation is smooth. */
+   case INTERP_MODE_NONE:
+      data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
+      data.perspective = true;
+      break;
+
+   case INTERP_MODE_NOPERSPECTIVE:
+      data.shademodel = ROGUE_PDSINST_DOUTI_SHADEMODEL_GOURUAD;
+      data.perspective = false;
+      break;
+
+   default:
+      unreachable("Unimplemented interpolation type.");
+   }
+
+   /* Number of components in this varying
+    * (corresponds to ROGUE_PDSINST_DOUTI_SIZE_1..4D).
+    */
+   data.size = (components - 1);
+
+   /* TODO: Investigate F16 support. */
+   assert(!f16);
+   data.f16 = f16;
+
+   /* Offsets within the vertex. */
+   data.f32_offset = 2 * i;
+   data.f16_offset = data.f32_offset;
+
+   ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_pack(&args->fpu_iterators[i], &data);
+   args->destination[i] = i;
+   args->base[i] = ~0;
+   args->components[i] = components;
+   ++args->num_fpu_iterators;
+}
+
+/**
+ * \brief Collects the fragment shader I/O data to feed-back to the driver.
+ *
+ * \sa #collect_io_data()
+ *
+ * \param[in] common_data Common build data.
+ * \param[in] fs_data Fragment-specific build data.
+ * \param[in] nir NIR fragment shader.
+ * \return true if successful, otherwise false.
+ */
+static bool collect_io_data_fs(struct rogue_common_build_data *common_data,
+                               struct rogue_fs_build_data *fs_data,
+                               nir_shader *nir)
+{
+   size_t num_inputs = nir_count_variables_with_modes(nir, nir_var_shader_in);
+   assert(num_inputs < (ARRAY_SIZE(fs_data->iterator_args.fpu_iterators) - 1));
+
+   /* Process inputs (if present). */
+   if (num_inputs) {
+      /* If the fragment shader has inputs, the first iterator
+       * must be used for the W component.
+       */
+      reserve_iterator(&fs_data->iterator_args,
+                       0,
+                       INTERP_MODE_NOPERSPECTIVE,
+                       false,
+                       1);
+
+      nir_foreach_shader_in_variable (var, nir) {
+         size_t i = (var->data.location - VARYING_SLOT_VAR0) + 1;
+         size_t components = glsl_get_components(var->type);
+         enum glsl_interp_mode interp = var->data.interpolation;
+         bool f16 = glsl_type_is_16bit(var->type);
+
+         /* Check that arguments are either F16 or F32. */
+         assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
+         assert(f16 || glsl_type_is_32bit(var->type));
+
+         /* Check input location. */
+         assert(var->data.location >= VARYING_SLOT_VAR0 &&
+                var->data.location <= VARYING_SLOT_VAR31);
+
+         reserve_iterator(&fs_data->iterator_args, i, interp, f16, components);
+      }
+
+      common_data->coeffs = alloc_iterator_regs(&fs_data->iterator_args);
+      assert(common_data->coeffs);
+      assert(common_data->coeffs < ROGUE_MAX_REG_COEFF);
+   }
+
+   /* TODO: Process outputs. */
+
+   return true;
+}
+
+/**
+ * \brief Allocates the vertex shader input registers.
+ *
+ * \param[in] inputs The vertex shader input data.
+ * \return The total number of vertex input registers required.
+ */
+static size_t alloc_vs_inputs(struct rogue_vertex_inputs *inputs)
+{
+   size_t vs_inputs = 0;
+
+   for (size_t u = 0; u < inputs->num_input_vars; ++u) {
+      /* Ensure there aren't any gaps. */
+      assert(inputs->base[u] == ~0);
+
+      inputs->base[u] = vs_inputs;
+      vs_inputs += inputs->components[u];
+   }
+
+   return vs_inputs;
+}
+
+/**
+ * \brief Allocates the vertex shader outputs.
+ *
+ * \param[in] outputs The vertex shader output data.
+ * \return The total number of vertex outputs required.
+ */
+static size_t alloc_vs_outputs(struct rogue_vertex_outputs *outputs)
+{
+   size_t vs_outputs = 0;
+
+   for (size_t u = 0; u < outputs->num_output_vars; ++u) {
+      /* Ensure there aren't any gaps. */
+      assert(outputs->base[u] == ~0);
+
+      outputs->base[u] = vs_outputs;
+      vs_outputs += outputs->components[u];
+   }
+
+   return vs_outputs;
+}
+
+/**
+ * \brief Counts the varyings used by the vertex shader.
+ *
+ * \param[in] outputs The vertex shader output data.
+ * \return The number of varyings used.
+ */
+static size_t count_vs_varyings(struct rogue_vertex_outputs *outputs)
+{
+   size_t varyings = 0;
+
+   /* Skip the position. */
+   for (size_t u = 1; u < outputs->num_output_vars; ++u)
+      varyings += outputs->components[u];
+
+   return varyings;
+}
+
+/**
+ * \brief Reserves space for a vertex shader input.
+ *
+ * \param[in] inputs The vertex input data.
+ * \param[in] i The vertex input index.
+ * \param[in] components The number of components in the input.
+ */
+static void reserve_vs_input(struct rogue_vertex_inputs *inputs,
+                             size_t i,
+                             size_t components)
+{
+   assert(components >= 1 && components <= 4);
+
+   assert(i < ARRAY_SIZE(inputs->base));
+
+   inputs->base[i] = ~0;
+   inputs->components[i] = components;
+   ++inputs->num_input_vars;
+}
+
+/**
+ * \brief Reserves space for a vertex shader output.
+ *
+ * \param[in] outputs The vertex output data.
+ * \param[in] i The vertex output index.
+ * \param[in] components The number of components in the output.
+ */
+static void reserve_vs_output(struct rogue_vertex_outputs *outputs,
+                              size_t i,
+                              size_t components)
+{
+   assert(components >= 1 && components <= 4);
+
+   assert(i < ARRAY_SIZE(outputs->base));
+
+   outputs->base[i] = ~0;
+   outputs->components[i] = components;
+   ++outputs->num_output_vars;
+}
+
+/**
+ * \brief Collects the vertex shader I/O data to feed-back to the driver.
+ *
+ * \sa #collect_io_data()
+ *
+ * \param[in] common_data Common build data.
+ * \param[in] vs_data Vertex-specific build data.
+ * \param[in] nir NIR vertex shader.
+ * \return true if successful, otherwise false.
+ */
+static bool collect_io_data_vs(struct rogue_common_build_data *common_data,
+                               struct rogue_vs_build_data *vs_data,
+                               nir_shader *nir)
+{
+   ASSERTED bool out_pos_present = false;
+   ASSERTED size_t num_outputs =
+      nir_count_variables_with_modes(nir, nir_var_shader_out);
+
+   /* Process inputs. */
+   nir_foreach_shader_in_variable (var, nir) {
+      size_t components = glsl_get_components(var->type);
+      size_t i = var->data.location - VERT_ATTRIB_GENERIC0;
+
+      /* Check that inputs are F32. */
+      /* TODO: Support other types. */
+      assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
+      assert(glsl_type_is_32bit(var->type));
+
+      /* Check input location. */
+      assert(var->data.location >= VERT_ATTRIB_GENERIC0 &&
+             var->data.location <= VERT_ATTRIB_GENERIC15);
+
+      reserve_vs_input(&vs_data->inputs, i, components);
+   }
+
+   vs_data->num_vertex_input_regs = alloc_vs_inputs(&vs_data->inputs);
+   assert(vs_data->num_vertex_input_regs);
+   assert(vs_data->num_vertex_input_regs < ROGUE_MAX_REG_VERTEX_IN);
+
+   /* Process outputs. */
+
+   /* We should always have at least a position variable. */
+   assert(num_outputs > 0 && "Invalid number of vertex shader outputs.");
+
+   nir_foreach_shader_out_variable (var, nir) {
+      size_t components = glsl_get_components(var->type);
+
+      /* Check that outputs are F32. */
+      /* TODO: Support other types. */
+      assert(glsl_get_base_type(var->type) == GLSL_TYPE_FLOAT);
+      assert(glsl_type_is_32bit(var->type));
+
+      if (var->data.location == VARYING_SLOT_POS) {
+         assert(components == 4);
+         out_pos_present = true;
+
+         reserve_vs_output(&vs_data->outputs, 0, components);
+      } else if ((var->data.location >= VARYING_SLOT_VAR0) &&
+                 (var->data.location <= VARYING_SLOT_VAR31)) {
+         size_t i = (var->data.location - VARYING_SLOT_VAR0) + 1;
+         reserve_vs_output(&vs_data->outputs, i, components);
+      } else {
+         unreachable("Unsupported vertex output type.");
+      }
+   }
+
+   /* Always need the output position to be present. */
+   assert(out_pos_present);
+
+   vs_data->num_vertex_outputs = alloc_vs_outputs(&vs_data->outputs);
+   assert(vs_data->num_vertex_outputs);
+   assert(vs_data->num_vertex_outputs < ROGUE_MAX_VERTEX_OUTPUTS);
+
+   vs_data->num_varyings = count_vs_varyings(&vs_data->outputs);
+
+   return true;
+}
+
+/**
+ * \brief Allocates the shared registers that will contain the UBOs.
+ *
+ * \param[in] ubo_data The UBO data.
+ * \return The total number of coefficient registers required by the iterators.
+ */
+static size_t alloc_ubos(struct rogue_ubo_data *ubo_data)
+{
+   size_t shareds = 0;
+
+   for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) {
+      /* Ensure there aren't any gaps. */
+      assert(ubo_data->dest[u] == ~0);
+
+      ubo_data->dest[u] = shareds;
+      shareds += ubo_data->size[u];
+   }
+
+   return shareds;
+}
+
+/**
+ * \brief Reserves a UBO and calculates its data.
+ *
+ * \param[in] ubo_data The UBO data.
+ * \param[in] desc_set The UBO descriptor set.
+ * \param[in] binding The UBO binding.
+ * \param[in] size The size required by the UBO (in dwords).
+ */
+static void reserve_ubo(struct rogue_ubo_data *ubo_data,
+                        size_t desc_set,
+                        size_t binding,
+                        size_t size)
+{
+   size_t i = ubo_data->num_ubo_entries;
+   assert(i < ARRAY_SIZE(ubo_data->desc_set));
+
+   ubo_data->desc_set[i] = desc_set;
+   ubo_data->binding[i] = binding;
+   ubo_data->dest[i] = ~0;
+   ubo_data->size[i] = size;
+   ++ubo_data->num_ubo_entries;
+}
+
+/**
+ * \brief Collects UBO data to feed-back to the driver.
+ *
+ * \param[in] common_data Common build data.
+ * \param[in] nir NIR shader.
+ * \return true if successful, otherwise false.
+ */
+static bool collect_ubo_data(struct rogue_common_build_data *common_data,
+                             nir_shader *nir)
+{
+   /* Iterate over each UBO. */
+   nir_foreach_variable_with_modes (var, nir, nir_var_mem_ubo) {
+      size_t desc_set = var->data.driver_location;
+      size_t binding = var->data.binding;
+      size_t ubo_size_regs = 0;
+
+      nir_function_impl *entry = nir_shader_get_entrypoint(nir);
+      /* Iterate over each load_ubo that uses this UBO. */
+      nir_foreach_block (block, entry) {
+         nir_foreach_instr (instr, block) {
+            if (instr->type != nir_instr_type_intrinsic)
+               continue;
+
+            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+            if (intr->intrinsic != nir_intrinsic_load_ubo)
+               continue;
+
+            assert(nir_src_num_components(intr->src[0]) == 2);
+            assert(nir_intr_src_is_const(intr, 0));
+
+            size_t load_desc_set = nir_intr_src_comp_const(intr, 0, 0);
+            size_t load_binding = nir_intr_src_comp_const(intr, 0, 1);
+
+            if (load_desc_set != desc_set || load_binding != binding)
+               continue;
+
+            ASSERTED size_t size_bytes = nir_intrinsic_range(intr);
+            assert(size_bytes == ROGUE_REG_SIZE_BYTES);
+
+            size_t offset_bytes = nir_intrinsic_range_base(intr);
+            assert(!(offset_bytes % ROGUE_REG_SIZE_BYTES));
+
+            size_t offset_regs = offset_bytes / ROGUE_REG_SIZE_BYTES;
+
+            /* TODO: Put offsets in a BITSET_DECLARE and check for gaps. */
+
+            /* Find the largest load offset. */
+            ubo_size_regs = MAX2(ubo_size_regs, offset_regs);
+         }
+      }
+
+      /* UBO size = largest offset + 1. */
+      ++ubo_size_regs;
+
+      reserve_ubo(&common_data->ubo_data, desc_set, binding, ubo_size_regs);
+   }
+
+   common_data->shareds = alloc_ubos(&common_data->ubo_data);
+   assert(common_data->shareds < ROGUE_MAX_REG_SHARED);
+
+   return true;
+}
+
+/**
+ * \brief Collects I/O data to feed-back to the driver.
+ *
+ * Collects the inputs/outputs/memory required, and feeds that back to the
+ * driver. Done at this stage rather than at the start of rogue_to_binary, so
+ * that all the I/O of all the shader stages is known before backend
+ * compilation, which would let us do things like cull unused inputs.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] nir NIR shader.
+ * \return true if successful, otherwise false.
+ */
+bool rogue_collect_io_data(struct rogue_build_ctx *ctx, nir_shader *nir)
+{
+   gl_shader_stage stage = nir->info.stage;
+   struct rogue_common_build_data *common_data = &ctx->common_data[stage];
+
+   /* Collect stage-agnostic data. */
+   if (!collect_ubo_data(common_data, nir))
+      return false;
+
+   /* Collect stage-specific data. */
+   switch (stage) {
+   case MESA_SHADER_FRAGMENT:
+      return collect_io_data_fs(common_data, &ctx->stage_data.fs, nir);
+
+   case MESA_SHADER_VERTEX:
+      return collect_io_data_vs(common_data, &ctx->stage_data.vs, nir);
+
+   default:
+      break;
+   }
+
+   return false;
+}
+
+/**
+ * \brief Returns the allocated coefficient register index for a component of an
+ * input varying location.
+ *
+ * \param[in] args The allocated iterator argument data.
+ * \param[in] location The input varying location, or ~0 for the W coefficient.
+ * \param[in] component The requested component.
+ * \return The coefficient register index.
+ */
+size_t rogue_coeff_index_fs(struct rogue_iterator_args *args,
+                            gl_varying_slot location,
+                            size_t component)
+{
+   size_t i;
+
+   /* Special case: W coefficient. */
+   if (location == ~0) {
+      /* The W component shouldn't be the only one. */
+      assert(args->num_fpu_iterators > 1);
+      assert(args->destination[0] == 0);
+      return 0;
+   }
+
+   i = (location - VARYING_SLOT_VAR0) + 1;
+   assert(location >= VARYING_SLOT_VAR0 && location <= VARYING_SLOT_VAR31);
+   assert(i < args->num_fpu_iterators);
+   assert(component < args->components[i]);
+   assert(args->base[i] != ~0);
+
+   return args->base[i] + (ROGUE_COEFF_ALIGN * component);
+}
+
+/**
+ * \brief Returns the allocated vertex output index for a component of an input
+ * varying location.
+ *
+ * \param[in] outputs The vertex output data.
+ * \param[in] location The output varying location.
+ * \param[in] component The requested component.
+ * \return The vertex output index.
+ */
+size_t rogue_output_index_vs(struct rogue_vertex_outputs *outputs,
+                             gl_varying_slot location,
+                             size_t component)
+{
+   size_t i;
+
+   if (location == VARYING_SLOT_POS) {
+      /* Always at location 0. */
+      assert(outputs->base[0] == 0);
+      i = 0;
+   } else if ((location >= VARYING_SLOT_VAR0) &&
+              (location <= VARYING_SLOT_VAR31)) {
+      i = (location - VARYING_SLOT_VAR0) + 1;
+   } else {
+      unreachable("Unsupported vertex output type.");
+   }
+
+   assert(i < outputs->num_output_vars);
+   assert(component < outputs->components[i]);
+   assert(outputs->base[i] != ~0);
+
+   return outputs->base[i] + component;
+}
+
+/**
+ * \brief Returns the allocated shared register index for a given UBO offset.
+ *
+ * \param[in] ubo_data The UBO data.
+ * \param[in] desc_set The UBO descriptor set.
+ * \param[in] binding The UBO binding.
+ * \param[in] offset_bytes The UBO offset in bytes.
+ * \return The UBO offset shared register index.
+ */
+size_t rogue_ubo_reg(struct rogue_ubo_data *ubo_data,
+                     size_t desc_set,
+                     size_t binding,
+                     size_t offset_bytes)
+{
+   size_t ubo_index = ~0;
+   size_t offset_regs;
+
+   /* Find UBO located at (desc_set, binding). */
+   for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) {
+      if (ubo_data->dest[u] == ~0)
+         continue;
+
+      if (ubo_data->desc_set[u] != desc_set || ubo_data->binding[u] != binding)
+         continue;
+
+      ubo_index = u;
+      break;
+   }
+
+   assert(ubo_index != ~0);
+
+   assert(!(offset_bytes % ROGUE_REG_SIZE_BYTES));
+   offset_regs = offset_bytes / ROGUE_REG_SIZE_BYTES;
+
+   return ubo_data->dest[ubo_index] + offset_regs;
+}
diff --git a/src/imagination/rogue/rogue_build_data.h b/src/imagination/rogue/rogue_build_data.h
new file mode 100644 (file)
index 0000000..b1d178a
--- /dev/null
@@ -0,0 +1,163 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_BUILD_DATA_H
+#define ROGUE_BUILD_DATA_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "compiler/shader_enums.h"
+#include "nir/nir.h"
+#include "rogue.h"
+
+/* Max number of I/O varying variables.
+ * Fragment shader: MAX_VARYING + 1 (W coefficient).
+ * Vertex shader: MAX_VARYING + 1 (position slot).
+ */
+#define ROGUE_MAX_IO_VARYING_VARS (MAX_VARYING + 1)
+
+/* VERT_ATTRIB_GENERIC0-15 */
+#define ROGUE_MAX_IO_ATTRIB_VARS 16
+
+/* Max buffers entries that can be used. */
+/* TODO: Currently UBOs are the only supported buffers. */
+#define ROGUE_MAX_BUFFERS 24
+
+struct rogue_compiler;
+struct rogue_shader;
+struct rogue_shader_binary;
+
+/**
+ * \brief UBO data.
+ */
+struct rogue_ubo_data {
+   size_t num_ubo_entries;
+   size_t desc_set[ROGUE_MAX_BUFFERS];
+   size_t binding[ROGUE_MAX_BUFFERS];
+   size_t dest[ROGUE_MAX_BUFFERS];
+   size_t size[ROGUE_MAX_BUFFERS];
+};
+
+/**
+ * \brief Per-stage common build data.
+ */
+struct rogue_common_build_data {
+   size_t temps;
+   size_t internals;
+   size_t coeffs;
+   size_t shareds;
+
+   struct rogue_ubo_data ubo_data;
+};
+
+/**
+ * \brief Arguments for the FPU iterator(s)
+ * (produces varyings for the fragment shader).
+ */
+struct rogue_iterator_args {
+   uint32_t num_fpu_iterators;
+   uint32_t fpu_iterators[ROGUE_MAX_IO_VARYING_VARS];
+   uint32_t destination[ROGUE_MAX_IO_VARYING_VARS];
+   size_t base[ROGUE_MAX_IO_VARYING_VARS];
+   size_t components[ROGUE_MAX_IO_VARYING_VARS];
+};
+
+/**
+ * \brief Vertex input register allocations.
+ */
+struct rogue_vertex_inputs {
+   size_t num_input_vars;
+   size_t base[ROGUE_MAX_IO_ATTRIB_VARS];
+   size_t components[ROGUE_MAX_IO_ATTRIB_VARS];
+};
+
+/**
+ * \brief Vertex output allocations.
+ */
+struct rogue_vertex_outputs {
+   size_t num_output_vars;
+   size_t base[ROGUE_MAX_IO_VARYING_VARS];
+   size_t components[ROGUE_MAX_IO_VARYING_VARS];
+};
+
+/**
+ * \brief Stage-specific build data.
+ */
+struct rogue_build_data {
+   struct rogue_fs_build_data {
+      struct rogue_iterator_args iterator_args;
+      enum rogue_msaa_mode msaa_mode;
+      bool phas; /* Indicates the presence of PHAS instruction. */
+   } fs;
+   struct rogue_vs_build_data {
+      struct rogue_vertex_inputs inputs;
+      size_t num_vertex_input_regs; /* Final number of inputs. */
+
+      struct rogue_vertex_outputs outputs;
+      size_t num_vertex_outputs; /* Final number of outputs. */
+
+      size_t num_varyings; /* Final number of varyings. */
+   } vs;
+};
+
+/**
+ * \brief Shared multi-stage build context.
+ */
+struct rogue_build_ctx {
+   struct rogue_compiler *compiler;
+
+   /* Shaders in various stages of compilations. */
+   nir_shader *nir[MESA_SHADER_FRAGMENT + 1];
+   struct rogue_shader *rogue[MESA_SHADER_FRAGMENT + 1];
+   struct rogue_shader_binary *binary[MESA_SHADER_FRAGMENT + 1];
+
+   struct rogue_common_build_data common_data[MESA_SHADER_FRAGMENT + 1];
+   struct rogue_build_data stage_data;
+};
+
+PUBLIC
+struct rogue_build_ctx *
+rogue_create_build_context(struct rogue_compiler *compiler);
+
+PUBLIC
+bool rogue_collect_io_data(struct rogue_build_ctx *ctx, nir_shader *nir);
+
+PUBLIC
+size_t rogue_coeff_index_fs(struct rogue_iterator_args *args,
+                            gl_varying_slot location,
+                            size_t component);
+
+PUBLIC
+size_t rogue_output_index_vs(struct rogue_vertex_outputs *outputs,
+                             gl_varying_slot location,
+                             size_t component);
+
+PUBLIC
+size_t rogue_ubo_reg(struct rogue_ubo_data *ubo_data,
+                     size_t desc_set,
+                     size_t binding,
+                     size_t offset_bytes);
+
+#endif /* ROGUE_BUILD_DATA_H */
diff --git a/src/imagination/rogue/rogue_compiler.c b/src/imagination/rogue/rogue_compiler.c
new file mode 100644 (file)
index 0000000..9ae95b0
--- /dev/null
@@ -0,0 +1,72 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stddef.h>
+
+#include "compiler/glsl_types.h"
+#include "rogue_compiler.h"
+#include "util/ralloc.h"
+
+/**
+ * \file rogue_compiler.c
+ *
+ * \brief Contains the Rogue compiler interface.
+ */
+
+/**
+ * \brief Creates and sets up a Rogue compiler context.
+ *
+ * \param[in] dev_info Device info pointer.
+ * \return A pointer to the new compiler context, or NULL on failure.
+ */
+struct rogue_compiler *
+rogue_compiler_create(const struct pvr_device_info *dev_info)
+{
+   struct rogue_compiler *compiler;
+
+   compiler = rzalloc_size(NULL, sizeof(*compiler));
+   if (!compiler)
+      return NULL;
+
+   compiler->dev_info = dev_info;
+
+   /* TODO: Additional compiler setup (allocators? error message output
+    * location?).
+    */
+
+   glsl_type_singleton_init_or_ref();
+
+   return compiler;
+}
+
+/**
+ * \brief Destroys and frees a compiler context.
+ *
+ * \param[in] compiler The compiler context.
+ */
+void rogue_compiler_destroy(struct rogue_compiler *compiler)
+{
+   glsl_type_singleton_decref();
+
+   ralloc_free(compiler);
+}
diff --git a/src/imagination/rogue/rogue_compiler.h b/src/imagination/rogue/rogue_compiler.h
new file mode 100644 (file)
index 0000000..ed7b6fc
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_COMPILER_H
+#define ROGUE_COMPILER_H
+
+#include "util/macros.h"
+
+struct pvr_device_info;
+
+/**
+ * \brief Compiler context.
+ */
+struct rogue_compiler {
+   const struct pvr_device_info *dev_info;
+};
+
+PUBLIC
+struct rogue_compiler *
+rogue_compiler_create(const struct pvr_device_info *dev_info);
+
+PUBLIC
+void rogue_compiler_destroy(struct rogue_compiler *compiler);
+
+#endif /* ROGUE_COMPILER_H */
diff --git a/src/imagination/rogue/rogue_constreg.c b/src/imagination/rogue/rogue_constreg.c
new file mode 100644 (file)
index 0000000..14c193c
--- /dev/null
@@ -0,0 +1,199 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "rogue_constreg.h"
+#include "util/macros.h"
+
+/**
+ * \file rogue_constreg.c
+ *
+ * \brief Contains functions to find and allocate constant register values.
+ */
+
+/**
+ * \brief Mapping of constant register values and their indices.
+ */
+struct rogue_constreg {
+   uint32_t value;
+   size_t index;
+};
+
+#define CONSTREG(VALUE, INDEX)            \
+   {                                      \
+      .value = (VALUE), .index = (INDEX), \
+   }
+
+/**
+ * \brief Constant register values (sorted for bsearch).
+ */
+static const struct rogue_constreg const_regs[] = {
+   CONSTREG(0x00000000U, 0U), /* 0   (INT32) / 0.0 (Float) */
+   CONSTREG(0x00000001U, 1U), /* 1   (INT32) */
+   CONSTREG(0x00000002U, 2U), /* 2   (INT32) */
+   CONSTREG(0x00000003U, 3U), /* 3   (INT32) */
+   CONSTREG(0x00000004U, 4U), /* 4   (INT32) */
+   CONSTREG(0x00000005U, 5U), /* 5   (INT32) */
+   CONSTREG(0x00000006U, 6U), /* 6   (INT32) */
+   CONSTREG(0x00000007U, 7U), /* 7   (INT32) */
+   CONSTREG(0x00000008U, 8U), /* 8   (INT32) */
+   CONSTREG(0x00000009U, 9U), /* 9   (INT32) */
+   CONSTREG(0x0000000aU, 10U), /* 10  (INT32) */
+   CONSTREG(0x0000000bU, 11U), /* 11  (INT32) */
+   CONSTREG(0x0000000cU, 12U), /* 12  (INT32) */
+   CONSTREG(0x0000000dU, 13U), /* 13  (INT32) */
+   CONSTREG(0x0000000eU, 14U), /* 14  (INT32) */
+   CONSTREG(0x0000000fU, 15U), /* 15  (INT32) */
+   CONSTREG(0x00000010U, 16U), /* 16  (INT32) */
+   CONSTREG(0x00000011U, 17U), /* 17  (INT32) */
+   CONSTREG(0x00000012U, 18U), /* 18  (INT32) */
+   CONSTREG(0x00000013U, 19U), /* 19  (INT32) */
+   CONSTREG(0x00000014U, 20U), /* 20  (INT32) */
+   CONSTREG(0x00000015U, 21U), /* 21  (INT32) */
+   CONSTREG(0x00000016U, 22U), /* 22  (INT32) */
+   CONSTREG(0x00000017U, 23U), /* 23  (INT32) */
+   CONSTREG(0x00000018U, 24U), /* 24  (INT32) */
+   CONSTREG(0x00000019U, 25U), /* 25  (INT32) */
+   CONSTREG(0x0000001aU, 26U), /* 26  (INT32) */
+   CONSTREG(0x0000001bU, 27U), /* 27  (INT32) */
+   CONSTREG(0x0000001cU, 28U), /* 28  (INT32) */
+   CONSTREG(0x0000001dU, 29U), /* 29  (INT32) */
+   CONSTREG(0x0000001eU, 30U), /* 30  (INT32) */
+   CONSTREG(0x0000001fU, 31U), /* 31  (INT32) */
+   CONSTREG(0x0000007fU, 147U), /* 127 (INT32) */
+
+   CONSTREG(0x37800000U, 134U), /* 1.0f/65536f */
+   CONSTREG(0x38000000U, 135U), /* 1.0f/32768f */
+   CONSTREG(0x38800000U, 88U), /* float(2^-14) */
+   CONSTREG(0x39000000U, 87U), /* float(2^-13) */
+   CONSTREG(0x39800000U, 86U), /* float(2^-12) */
+   CONSTREG(0x3a000000U, 85U), /* float(2^-11) */
+   CONSTREG(0x3a800000U, 84U), /* float(2^-10) */
+   CONSTREG(0x3b000000U, 83U), /* float(2^-9) */
+   CONSTREG(0x3b4d2e1cU, 136U), /* 0.0031308f */
+   CONSTREG(0x3b800000U, 82U), /* float(2^-8) */
+   CONSTREG(0x3c000000U, 81U), /* float(2^-7) */
+   CONSTREG(0x3c800000U, 80U), /* float(2^-6) */
+   CONSTREG(0x3d000000U, 79U), /* float(2^-5) */
+   CONSTREG(0x3d25aee6U, 156U), /* 0.04045f */
+   CONSTREG(0x3d6147aeU, 140U), /* 0.055f */
+   CONSTREG(0x3d800000U, 78U), /* float(2^-4) */
+   CONSTREG(0x3d9e8391U, 157U), /* 1.0f/12.92f */
+   CONSTREG(0x3e000000U, 77U), /* float(2^-3) */
+   CONSTREG(0x3e2aaaabU, 153U), /* 1/6 */
+   CONSTREG(0x3e800000U, 76U), /* float(2^-2) */
+   CONSTREG(0x3e9a209bU, 145U), /* Log_10(2) */
+   CONSTREG(0x3ea2f983U, 128U), /* Float 1/PI */
+   CONSTREG(0x3eaaaaabU, 152U), /* 1/3 */
+   CONSTREG(0x3ebc5ab2U, 90U), /* 1/e */
+   CONSTREG(0x3ed55555U, 138U), /* 1.0f/2.4f */
+   CONSTREG(0x3f000000U, 75U), /* float(2^-1) */
+   CONSTREG(0x3f22f983U, 129U), /* Float 2/PI */
+   CONSTREG(0x3f317218U, 146U), /* Log_e(2) */
+   CONSTREG(0x3f3504f3U, 92U), /* Float 1/SQRT(2) */
+   CONSTREG(0x3f490fdbU, 93U), /* Float PI/4 */
+   CONSTREG(0x3f72a76fU, 158U), /* 1.0f/1.055f */
+   CONSTREG(0x3f800000U, 64U), /* 1.0f */
+   CONSTREG(0x3f860a92U, 151U), /* Pi/3 */
+   CONSTREG(0x3f870a3dU, 139U), /* 1.055f */
+   CONSTREG(0x3fa2f983U, 130U), /* Float 4/PI */
+   CONSTREG(0x3fb504f3U, 91U), /* Float SQRT(2) */
+   CONSTREG(0x3fb8aa3bU, 155U), /* Log_2(e) */
+   CONSTREG(0x3fc90fdbU, 94U), /* Float PI/2 */
+   CONSTREG(0x40000000U, 65U), /* float(2^1) */
+   CONSTREG(0x4019999aU, 159U), /* 2.4f */
+   CONSTREG(0x402df854U, 89U), /* e */
+   CONSTREG(0x40490fdbU, 95U), /* Float PI */
+   CONSTREG(0x40549a78U, 154U), /* Log_2(10) */
+   CONSTREG(0x40800000U, 66U), /* float(2^2) */
+   CONSTREG(0x40c90fdbU, 131U), /* Float 2*PI */
+   CONSTREG(0x41000000U, 67U), /* float(2^3) */
+   CONSTREG(0x41490fdbU, 132U), /* Float 4*PI */
+   CONSTREG(0x414eb852U, 137U), /* 12.92f */
+   CONSTREG(0x41800000U, 68U), /* float(2^4) */
+   CONSTREG(0x41c90fdbU, 133U), /* Float 8*PI */
+   CONSTREG(0x42000000U, 69U), /* float(2^5) */
+   CONSTREG(0x42800000U, 70U), /* float(2^6) */
+   CONSTREG(0x43000000U, 71U), /* float(2^7) */
+   CONSTREG(0x43800000U, 72U), /* float(2^8) */
+   CONSTREG(0x44000000U, 73U), /* float(2^9) */
+   CONSTREG(0x44800000U, 74U), /* float(2^10) */
+   CONSTREG(0x4b000000U, 149U), /* 2^23 */
+   CONSTREG(0x4b800000U, 150U), /* 2^24 */
+   CONSTREG(0x7f7fffffU, 148U), /* FLT_MAX */
+   CONSTREG(0x7f800000U, 142U), /* Infinity */
+   CONSTREG(0x7fff7fffU, 144U), /* ARGB1555 mask */
+   CONSTREG(0x80000000U, 141U), /* -0.0f */
+   CONSTREG(0xffffffffU, 143U), /* -1 */
+};
+
+#undef CONSTREG
+
+/**
+ * \brief Comparison function for bsearch() to support struct rogue_constreg.
+ *
+ * \param[in] lhs The left hand side of the comparison.
+ * \param[in] rhs The right hand side of the comparison.
+ * \return 0 if (lhs == rhs), -1 if (lhs < rhs), 1 if (lhs > rhs).
+ */
+static int constreg_cmp(const void *lhs, const void *rhs)
+{
+   const struct rogue_constreg *l = lhs;
+   const struct rogue_constreg *r = rhs;
+
+   if (l->value < r->value)
+      return -1;
+   else if (l->value > r->value)
+      return 1;
+
+   return 0;
+}
+
+/**
+ * \brief Determines whether a given integer value exists in a constant
+ * register.
+ *
+ * \param[in] value The value required.
+ * \return The index of the constant register containing the value, or
+ * ROGUE_NO_CONST_REG if the value is not found.
+ */
+size_t rogue_constreg_lookup(uint32_t value)
+{
+   struct rogue_constreg constreg_target = {
+      .value = value,
+   };
+   const struct rogue_constreg *constreg;
+
+   constreg = bsearch(&constreg_target,
+                      const_regs,
+                      ARRAY_SIZE(const_regs),
+                      sizeof(struct rogue_constreg),
+                      constreg_cmp);
+   if (!constreg)
+      return ROGUE_NO_CONST_REG;
+
+   return constreg->index;
+}
diff --git a/src/imagination/rogue/rogue_constreg.h b/src/imagination/rogue/rogue_constreg.h
new file mode 100644 (file)
index 0000000..1d67857
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_CONSTREGS_H
+#define ROGUE_CONSTREGS_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "util/macros.h"
+#include "util/u_math.h"
+
+#define ROGUE_NO_CONST_REG SIZE_MAX
+
+PUBLIC
+size_t rogue_constreg_lookup(uint32_t value);
+
+/**
+ * \brief Determines whether a given floating point value exists in a constant
+ * register.
+ *
+ * \param[in] value The value required.
+ * \return The index of the constant register containing the value, or
+ * ROGUE_NO_CONST_REG if the value is not found.
+ */
+static inline size_t rogue_constreg_lookup_float(float value)
+{
+   return rogue_constreg_lookup(fui(value));
+}
+
+#endif /* ROGUE_CONSTREGS_H */
diff --git a/src/imagination/rogue/rogue_dump.c b/src/imagination/rogue/rogue_dump.c
new file mode 100644 (file)
index 0000000..ee21be0
--- /dev/null
@@ -0,0 +1,170 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "rogue_dump.h"
+#include "rogue_shader.h"
+#include "rogue_util.h"
+#include "util/bitscan.h"
+
+/**
+ * \file rogue_dump.c
+ *
+ * \brief Contains functions to dump Rogue data structures into a textual
+ * format.
+ */
+
+static const char *const rogue_operand_string[ROGUE_OPERAND_TYPE_COUNT] = {
+   [ROGUE_OPERAND_TYPE_REG_TEMP] = "r",
+   [ROGUE_OPERAND_TYPE_REG_COEFF] = "cf",
+   [ROGUE_OPERAND_TYPE_REG_CONST] = "c",
+   [ROGUE_OPERAND_TYPE_REG_SHARED] = "sh",
+   [ROGUE_OPERAND_TYPE_REG_PIXEL_OUT] = "po",
+   [ROGUE_OPERAND_TYPE_REG_VERTEX_IN] = "vi",
+   [ROGUE_OPERAND_TYPE_REG_INTERNAL] = "i",
+   [ROGUE_OPERAND_TYPE_IMMEDIATE] = "#",
+   [ROGUE_OPERAND_TYPE_DRC] = "drc",
+   [ROGUE_OPERAND_TYPE_VREG] = "V",
+};
+
+static const char *const rogue_opcode_string[ROGUE_OP_COUNT] = {
+   [ROGUE_OP_NOP] = "nop",
+   [ROGUE_OP_END_FRAG] = "end.frag",
+   [ROGUE_OP_END_VERT] = "end.vert",
+   [ROGUE_OP_WDF] = "wdf",
+   [ROGUE_OP_PIX_ITER_W] = "pixiter.w",
+   [ROGUE_OP_MAX] = "max",
+   [ROGUE_OP_MIN] = "min",
+   [ROGUE_OP_PACK_U8888] = "pack.u8888",
+   [ROGUE_OP_MOV] = "mov",
+   [ROGUE_OP_MOV_IMM] = "mov.imm",
+   [ROGUE_OP_FMA] = "fma",
+   [ROGUE_OP_MUL] = "mul",
+   [ROGUE_OP_VTXOUT] = "vtxout",
+};
+
+static const char *const rogue_instr_flag_string[ROGUE_INSTR_FLAG_COUNT] = {
+   [ROGUE_INSTR_FLAG_SAT] = "sat",
+   [ROGUE_INSTR_FLAG_LP] = "lp",
+   [ROGUE_INSTR_FLAG_OLCHK] = "olchk",
+};
+
+static const char rogue_vector_string[4] = {
+   'x',
+   'y',
+   'z',
+   'w',
+};
+
+/**
+ * \brief Dumps an operand as text to a file pointer.
+ *
+ * \param[in] operand The operand.
+ * \param[in] fp The file pointer.
+ * \return true if successful, otherwise false.
+ */
+bool rogue_dump_operand(const struct rogue_operand *operand, FILE *fp)
+{
+   ASSERT_OPERAND_RANGE(operand->type);
+
+   fprintf(fp, "%s", rogue_operand_string[operand->type]);
+
+   if (operand->type == ROGUE_OPERAND_TYPE_IMMEDIATE)
+      fprintf(fp, "%" PRIu64, operand->immediate.value);
+   else if (operand->type == ROGUE_OPERAND_TYPE_DRC)
+      fprintf(fp, "%zu", operand->drc.number);
+   else if (rogue_check_bitset(rogue_onehot(operand->type), ROGUE_MASK_ANY_REG))
+      fprintf(fp, "%zu", operand->reg.number);
+   else if (operand->type == ROGUE_OPERAND_TYPE_VREG) {
+      fprintf(fp, "%zu", operand->vreg.number);
+      if (operand->vreg.is_vector)
+         fprintf(fp, ".%c", rogue_vector_string[operand->vreg.component]);
+   }
+
+   return true;
+}
+
+/**
+ * \brief Dumps an instruction as text to a file pointer.
+ *
+ * \param[in] instr The instruction.
+ * \param[in] fp The file pointer.
+ * \return true if successful, otherwise false.
+ */
+bool rogue_dump_instr(const struct rogue_instr *instr, FILE *fp)
+{
+   uint64_t flags = 0U;
+
+   ASSERT_OPCODE_RANGE(instr->opcode);
+
+   flags = instr->flags;
+
+   fprintf(fp, "%s", rogue_opcode_string[instr->opcode]);
+
+   /* Iterate over each flag bit and print its string form. */
+   while (flags) {
+      uint64_t flag = u_bit_scan64(&flags);
+      ASSERT_INSTR_FLAG_RANGE(flag);
+      fprintf(fp, ".%s", rogue_instr_flag_string[flag]);
+   }
+
+   if (instr->num_operands)
+      fprintf(fp, " ");
+
+   /* Dump each operand. */
+   for (size_t u = 0U; u < instr->num_operands; ++u) {
+      CHECKF(rogue_dump_operand(&instr->operands[u], fp),
+             "Failed to dump operand.");
+      if (u < (instr->num_operands - 1))
+         fprintf(fp, ", ");
+   }
+
+   fprintf(fp, ";");
+
+   return true;
+}
+
+/**
+ * \brief Dumps a shader as text to a file pointer.
+ *
+ * \param[in] shader The shader.
+ * \param[in] fp The file pointer.
+ * \return true if successful, otherwise false.
+ */
+bool rogue_dump_shader(const struct rogue_shader *shader, FILE *fp)
+{
+   /* Dump the shader stage. */
+   fprintf(fp, "# %s shader\n", _mesa_shader_stage_to_string(shader->stage));
+
+   /* Dump each instruction. */
+   foreach_instr (instr, &shader->instr_list) {
+      CHECKF(rogue_dump_instr(instr, fp), "Failed to dump instruction.");
+      fprintf(fp, "\n");
+   }
+   fprintf(fp, "\n");
+
+   return true;
+}
diff --git a/src/imagination/rogue/rogue_dump.h b/src/imagination/rogue/rogue_dump.h
new file mode 100644 (file)
index 0000000..537f923
--- /dev/null
@@ -0,0 +1,44 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_DUMP_H
+#define ROGUE_DUMP_H
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "rogue_instr.h"
+#include "rogue_operand.h"
+#include "rogue_shader.h"
+#include "util/macros.h"
+
+PUBLIC
+bool rogue_dump_operand(const struct rogue_operand *operand, FILE *fp);
+
+PUBLIC
+bool rogue_dump_instr(const struct rogue_instr *instr, FILE *fp);
+
+PUBLIC
+bool rogue_dump_shader(const struct rogue_shader *shader, FILE *fp);
+
+#endif /* ROGUE_DUMP_H */
diff --git a/src/imagination/rogue/rogue_encode.c b/src/imagination/rogue/rogue_encode.c
new file mode 100644 (file)
index 0000000..4550dda
--- /dev/null
@@ -0,0 +1,851 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "rogue_encode.h"
+#include "rogue_encoders.h"
+#include "rogue_operand.h"
+#include "rogue_shader.h"
+#include "rogue_util.h"
+#include "util/bitscan.h"
+#include "util/macros.h"
+
+static size_t rogue_encode_reg_bank(const struct rogue_operand *operand)
+{
+   switch (operand->type) {
+   case ROGUE_OPERAND_TYPE_REG_INTERNAL:
+   case ROGUE_OPERAND_TYPE_REG_PIXEL_OUT:
+   case ROGUE_OPERAND_TYPE_REG_CONST:
+      return 0;
+   case ROGUE_OPERAND_TYPE_REG_TEMP:
+      return 1;
+   case ROGUE_OPERAND_TYPE_REG_VERTEX_IN:
+      return 2;
+   case ROGUE_OPERAND_TYPE_REG_COEFF:
+      return 3;
+   case ROGUE_OPERAND_TYPE_REG_SHARED:
+      return 4;
+   default:
+      break;
+   }
+
+   unreachable("Unimplemented register bank.");
+}
+
+/**
+ * \brief Field mapping type.
+ */
+enum rogue_map_type {
+   ROGUE_MAP_TYPE_INSTR_FLAG = 0,
+   ROGUE_MAP_TYPE_OPERAND_FLAG,
+   ROGUE_MAP_TYPE_OPERAND,
+
+   ROGUE_MAP_TYPE_COUNT,
+};
+
+/**
+ * \brief Field mapping rule description.
+ */
+struct rogue_field_mapping {
+   /* Type of mapping being performed. */
+   enum rogue_map_type type;
+
+   /* Index of the source operand/flag being mapped. */
+   size_t index;
+
+   /* List of ranges to perform mapping. */
+   struct rogue_rangelist rangelist;
+
+   /* Function used to encode the input into the value to be mapped. */
+   field_encoder_t encoder_fn;
+};
+
+/**
+ * \brief Instruction encoding rule description.
+ */
+struct rogue_instr_encoding {
+   /* Number of bytes making up the base mask. */
+   size_t num_bytes;
+   /* Base mask bytes. */
+   uint8_t *bytes;
+
+   /* Number of field mappings for this instruction. */
+   size_t num_mappings;
+   /* Field mappings. */
+   struct rogue_field_mapping *mappings;
+};
+
+static const
+struct rogue_instr_encoding instr_encodings[ROGUE_OP_COUNT] = {
+       [ROGUE_OP_NOP] = {
+               .num_bytes = 8,
+               .bytes = (uint8_t []) { 0x04, 0x80, 0x6e, 0x00, 0xf2, 0xff, 0xff, 0xff },
+       },
+
+       [ROGUE_OP_END_FRAG] = {
+               .num_bytes = 8,
+               .bytes = (uint8_t []) { 0x04, 0x80, 0xee, 0x00, 0xf2, 0xff, 0xff, 0xff },
+       },
+
+       [ROGUE_OP_END_VERT] = {
+               .num_bytes = 8,
+               .bytes = (uint8_t []) { 0x44, 0xa0, 0x80, 0x05, 0x00, 0x00, 0x00, 0xff },
+       },
+
+       [ROGUE_OP_WDF] = {
+               .num_bytes = 8,
+               .bytes = (uint8_t []) { 0x04, 0x80, 0x6a, 0xff, 0xf2, 0xff, 0xff, 0xff },
+               .num_mappings = 1,
+               .mappings = (struct rogue_field_mapping []) {
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 0,
+                               .rangelist = {
+                                       .num_ranges = 1,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 47, .num = 1, },
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_drc,
+                       },
+               },
+       },
+
+       [ROGUE_OP_PIX_ITER_W] = {
+               .num_bytes = 16,
+               .bytes = (uint8_t []) { 0x48, 0x20, 0xb0, 0x01, 0x80, 0x40, 0x80, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0xff, 0xf1, 0xff },
+               .num_mappings = 6,
+               .mappings = (struct rogue_field_mapping []) {
+                       /* Instruction flag mappings. */
+                       {
+                               .type = ROGUE_MAP_TYPE_INSTR_FLAG,
+                               .index = ROGUE_INSTR_FLAG_SAT,
+                               .rangelist = {
+                                       .num_ranges = 1,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 100, .num = 1, },
+                                       },
+                               },
+                               .encoder_fn = NULL,
+                       },
+                       /* Operand mappings. */
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 0,
+                               .rangelist = {
+                                       .num_ranges = 5,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 43, .num = 2, }, /* SB3(2..1) */
+                                               { .start = 54, .num = 1, }, /* SB3(0) */
+                                               { .start = 34, .num = 3, }, /* S3(10..8) */
+                                               { .start = 41, .num = 2, }, /* S3(7..6) */
+                                               { .start = 53, .num = 6, }, /* S3(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_11,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 1,
+                               .rangelist = {
+                                       .num_ranges = 1,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 59, .num = 1, },
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_drc,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 2,
+                               .rangelist = {
+                                       .num_ranges = 6,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 59, .num = 1, }, /* SB0(2) */
+                                               { .start = 76, .num = 1, }, /* SB0(1) */
+                                               { .start = 94, .num = 1, }, /* SB0(0) */
+                                               { .start = 57, .num = 1, }, /* S0(7) */
+                                               { .start = 74, .num = 1, }, /* S0(6) */
+                                               { .start = 93, .num = 6, }, /* S0(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_8,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 3,
+                               .rangelist = {
+                                       .num_ranges = 4,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 63, .num = 1, }, /* SB2(2) */
+                                               { .start = 71, .num = 2, }, /* SB2(1..0) */
+                                               { .start = 62, .num = 2, }, /* S2(7..6) */
+                                               { .start = 69, .num = 6, }, /* S2(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_8,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 4,
+                               .rangelist = {
+                                       .num_ranges = 1,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 99, .num = 4, },
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_ls_1_16,
+                       },
+               },
+       },
+
+       [ROGUE_OP_MAX] = {
+               .num_bytes = 16,
+               .bytes = (uint8_t []) { 0x68, 0x42, 0xd0, 0x3c, 0xfa, 0x10, 0x87, 0x80, 0xc0, 0x80, 0x10, 0x00, 0x32, 0x80, 0x00, 0xff },
+               .num_mappings = 3,
+               .mappings = (struct rogue_field_mapping []) {
+                       /* Operand mappings. */
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 0,
+                               .rangelist = {
+                                       .num_ranges = 5,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 11, .num = 2, }, /* DBn(2..1) */
+                                               { .start = 22, .num = 1, }, /* DBn(0) */
+                                               { .start = 14, .num = 3, }, /* Dn(10..8) */
+                                               { .start = 9, .num = 2, }, /* Dn(7..6) */
+                                               { .start = 21, .num = 6, }, /* Dn(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_11,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 1,
+                               .rangelist = {
+                                       .num_ranges = 7,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 43, .num = 1, }, /* SB0(2) */
+                                               { .start = 52, .num = 1, }, /* SB0(1) */
+                                               { .start = 70, .num = 1, }, /* SB0(0) */
+                                               { .start = 47, .num = 3, }, /* S0(10..8) */
+                                               { .start = 41, .num = 1, }, /* S0(7) */
+                                               { .start = 50, .num = 1, }, /* S0(6) */
+                                               { .start = 69, .num = 6, }, /* S0(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_11,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 2,
+                               .rangelist = {
+                                       .num_ranges = 5,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 51, .num = 1, }, /* SB1(1) */
+                                               { .start = 61, .num = 1, }, /* SB1(0) */
+                                               { .start = 40, .num = 1, }, /* S1(7) */
+                                               { .start = 49, .num = 2, }, /* S1(6..5) */
+                                               { .start = 60, .num = 5, }, /* S1(4..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_2_8,
+                       },
+               },
+       },
+
+       [ROGUE_OP_MIN] = {
+               .num_bytes = 16,
+               .bytes = (uint8_t []) { 0x68, 0x42, 0xd0, 0x3c, 0xf0, 0x11, 0x87, 0x80, 0xc0, 0x80, 0x10, 0x00, 0x32, 0x80, 0x00, 0xff },
+               .num_mappings = 3,
+               .mappings = (struct rogue_field_mapping []) {
+                       /* Operand mappings. */
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 0,
+                               .rangelist = {
+                                       .num_ranges = 5,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 11, .num = 2, }, /* DBn(2..1) */
+                                               { .start = 22, .num = 1, }, /* DBn(0) */
+                                               { .start = 14, .num = 3, }, /* Dn(10..8) */
+                                               { .start = 9, .num = 2, }, /* Dn(7..6) */
+                                               { .start = 21, .num = 6, }, /* Dn(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_11,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 1,
+                               .rangelist = {
+                                       .num_ranges = 7,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 43, .num = 1, }, /* SB0(2) */
+                                               { .start = 52, .num = 1, }, /* SB0(1) */
+                                               { .start = 70, .num = 1, }, /* SB0(0) */
+                                               { .start = 47, .num = 3, }, /* S0(10..8) */
+                                               { .start = 41, .num = 1, }, /* S0(7) */
+                                               { .start = 50, .num = 1, }, /* S0(6) */
+                                               { .start = 69, .num = 6, }, /* S0(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_11,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 2,
+                               .rangelist = {
+                                       .num_ranges = 5,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 51, .num = 1, }, /* SB1(1) */
+                                               { .start = 61, .num = 1, }, /* SB1(0) */
+                                               { .start = 40, .num = 1, }, /* S1(7) */
+                                               { .start = 49, .num = 2, }, /* S1(6..5) */
+                                               { .start = 60, .num = 5, }, /* S1(4..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_2_8,
+                       },
+               },
+       },
+
+       [ROGUE_OP_PACK_U8888] = {
+               .num_bytes = 16,
+               .bytes = (uint8_t []) { 0x58, 0x92, 0x06, 0x9c, 0x20, 0x80, 0x00, 0x00, 0x00, 0x2c, 0x80, 0x00, 0xf2, 0xff, 0xff, 0xff },
+               .num_mappings = 2,
+               .mappings = (struct rogue_field_mapping []) {
+                       /* Operand mappings. */
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 0,
+                               .rangelist = {
+                                       .num_ranges = 5,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 35, .num = 2, }, /* DBn(2..1) */
+                                               { .start = 46, .num = 1, }, /* DBn(0) */
+                                               { .start = 38, .num = 3, }, /* Dn(10..8) */
+                                               { .start = 33, .num = 2, }, /* Dn(7..6) */
+                                               { .start = 45, .num = 6, }, /* Dn(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_11,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 1,
+                               .rangelist = {
+                                       .num_ranges = 5,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 75, .num = 2, }, /* SB0(2..1) */
+                                               { .start = 86, .num = 1, }, /* SB0(0) */
+                                               { .start = 66, .num = 3, }, /* S0(10..8) */
+                                               { .start = 73, .num = 2, }, /* S0(7..6) */
+                                               { .start = 85, .num = 6, }, /* S0(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_11,
+                       },
+               },
+       },
+
+       [ROGUE_OP_MOV] = {
+               .num_bytes = 16,
+               .bytes = (uint8_t []) { 0x48, 0x42, 0xd0, 0x3f, 0x87, 0x80, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0xf2, 0xff, 0xff, 0xff },
+               .num_mappings = 3,
+               .mappings = (struct rogue_field_mapping []) {
+                       /* Instruction flag mappings. */
+                       {
+                               .type = ROGUE_MAP_TYPE_INSTR_FLAG,
+                               .index = ROGUE_INSTR_FLAG_OLCHK,
+                               .rangelist = {
+                                       .num_ranges = 1,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 115, .num = 1, },
+                                       },
+                               },
+                               .encoder_fn = NULL,
+                       },
+                       /* Operand mappings. */
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 0,
+                               .rangelist = {
+                                       .num_ranges = 5,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 35, .num = 2, }, /* DBn(2..1) */
+                                               { .start = 46, .num = 1, }, /* DBn(0) */
+                                               { .start = 38, .num = 3, }, /* Dn(10..8) */
+                                               { .start = 33, .num = 2, }, /* Dn(7..6) */
+                                               { .start = 45, .num = 6, }, /* Dn(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_11,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 1,
+                               .rangelist = {
+                                       .num_ranges = 5,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 75, .num = 2, }, /* SB0(2..1) */
+                                               { .start = 86, .num = 1, }, /* SB0(0) */
+                                               { .start = 66, .num = 3, }, /* S0(10..8) */
+                                               { .start = 73, .num = 2, }, /* S0(7..6) */
+                                               { .start = 85, .num = 6, }, /* S0(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_11,
+                       },
+               },
+       },
+
+       [ROGUE_OP_MOV_IMM] = {
+               .num_bytes = 16,
+               .bytes = (uint8_t []) { 0x88, 0x92, 0x40, 0x91, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0xf2, 0xff, 0xff, 0xff },
+               .num_mappings = 2,
+               .mappings = (struct rogue_field_mapping []) {
+                       /* Operand mappings. */
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 0,
+                               .rangelist = {
+                                       .num_ranges = 5,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 35, .num = 2, }, /* DBn(2..1) */
+                                               { .start = 46, .num = 1, }, /* DBn(0) */
+                                               { .start = 38, .num = 3, }, /* Dn(10..8) */
+                                               { .start = 33, .num = 2, }, /* Dn(7..6) */
+                                               { .start = 45, .num = 6, }, /* Dn(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_11,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 1,
+                               .rangelist = {
+                                       .num_ranges = 4,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 71, .num = 8, }, /* imm(31:24) */
+                                               { .start = 79, .num = 8, }, /* imm(23:16) */
+                                               { .start = 87, .num = 8, }, /* imm(15:8) */
+                                               { .start = 95, .num = 8, }, /* imm(7:0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_imm,
+                       },
+               },
+       },
+
+       [ROGUE_OP_FMA] = {
+               .num_bytes = 16,
+               .bytes = (uint8_t []) { 0x28, 0x02, 0xd0, 0x00, 0x80, 0x40, 0x80, 0x00, 0x00, 0x00, 0x00, 0x80, 0x00, 0xff, 0xf1, 0xff },
+               .num_mappings = 6,
+               .mappings = (struct rogue_field_mapping []) {
+                       /* Instruction flag mappings. */
+                       {
+                               .type = ROGUE_MAP_TYPE_INSTR_FLAG,
+                               .index = ROGUE_INSTR_FLAG_SAT,
+                               .rangelist = {
+                                       .num_ranges = 1,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 104, .num = 1, },
+                                       },
+                               },
+                               .encoder_fn = NULL,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_INSTR_FLAG,
+                               .index = ROGUE_INSTR_FLAG_LP,
+                               .rangelist = {
+                                       .num_ranges = 1,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 100, .num = 1, },
+                                       },
+                               },
+                               .encoder_fn = NULL,
+                       },
+                       /* Operand mappings. */
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 0,
+                               .rangelist = {
+                                       .num_ranges = 5,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 27, .num = 2, }, /* DBn(2..1) */
+                                               { .start = 38, .num = 1, }, /* DBn(0) */
+                                               { .start = 30, .num = 3, }, /* Dn(10..8) */
+                                               { .start = 25, .num = 2, }, /* Dn(7..6) */
+                                               { .start = 37, .num = 6, }, /* Dn(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_11,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 1,
+                               .rangelist = {
+                                       .num_ranges = 6,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 59, .num = 1, }, /* SB0(2) */
+                                               { .start = 76, .num = 1, }, /* SB0(1) */
+                                               { .start = 94, .num = 1, }, /* SB0(0) */
+                                               { .start = 57, .num = 1, }, /* S0(7) */
+                                               { .start = 74, .num = 1, }, /* S0(6) */
+                                               { .start = 93, .num = 6, }, /* S0(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_8,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 2,
+                               .rangelist = {
+                                       .num_ranges = 5,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 75, .num = 1, }, /* SB1(1) */
+                                               { .start = 85, .num = 1, }, /* SB1(0) */
+                                               { .start = 56, .num = 1, }, /* S1(7) */
+                                               { .start = 73, .num = 2, }, /* S1(6..5) */
+                                               { .start = 84, .num = 5, }, /* S1(4..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_2_8,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 3,
+                               .rangelist = {
+                                       .num_ranges = 4,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 63, .num = 1, }, /* SB2(2) */
+                                               { .start = 71, .num = 2, }, /* SB2(1..0) */
+                                               { .start = 62, .num = 2, }, /* S2(7..6) */
+                                               { .start = 69, .num = 6, }, /* S2(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_8,
+                       },
+               },
+       },
+
+       [ROGUE_OP_MUL] = {
+               .num_bytes = 16,
+               .bytes = (uint8_t []) { 0x28, 0x02, 0x40, 0x80, 0xc0, 0x80, 0x00, 0x00, 0x00, 0x80, 0x00, 0xff, 0xf2, 0xff, 0xff, 0xff },
+               .num_mappings = 5,
+               .mappings = (struct rogue_field_mapping []) {
+                       /* Instruction flag mappings. */
+                       {
+                               .type = ROGUE_MAP_TYPE_INSTR_FLAG,
+                               .index = ROGUE_INSTR_FLAG_SAT,
+                               .rangelist = {
+                                       .num_ranges = 1,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 108, .num = 1, },
+                                       },
+                               },
+                               .encoder_fn = NULL,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_INSTR_FLAG,
+                               .index = ROGUE_INSTR_FLAG_LP,
+                               .rangelist = {
+                                       .num_ranges = 1,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 109, .num = 1, },
+                                       },
+                               },
+                               .encoder_fn = NULL,
+                       },
+                       /* Operand mappings. */
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 0,
+                               .rangelist = {
+                                       .num_ranges = 5,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 43, .num = 2, }, /* DBn(2..1) */
+                                               { .start = 54, .num = 1, }, /* DBn(0) */
+                                               { .start = 46, .num = 3, }, /* Dn(10..8) */
+                                               { .start = 41, .num = 2, }, /* Dn(7..6) */
+                                               { .start = 53, .num = 6, }, /* Dn(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_11,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 1,
+                               .rangelist = {
+                                       .num_ranges = 7,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 75, .num = 1, }, /* SB0(2) */
+                                               { .start = 84, .num = 1, }, /* SB0(1) */
+                                               { .start = 102, .num = 1, }, /* SB0(0) */
+                                               { .start = 79, .num = 3, }, /* S0(10..8) */
+                                               { .start = 73, .num = 1, }, /* S0(7) */
+                                               { .start = 82, .num = 1, }, /* S0(6) */
+                                               { .start = 101, .num = 6, }, /* S0(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_11,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 2,
+                               .rangelist = {
+                                       .num_ranges = 5,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 83, .num = 1, }, /* SB1(1) */
+                                               { .start = 93, .num = 1, }, /* SB1(0) */
+                                               { .start = 72, .num = 1, }, /* S1(7) */
+                                               { .start = 81, .num = 2, }, /* S1(6..5) */
+                                               { .start = 92, .num = 5, }, /* S1(4..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_2_8,
+                       },
+               },
+       },
+
+       [ROGUE_OP_VTXOUT] = {
+               .num_bytes = 16,
+               .bytes = (uint8_t []) { 0x48, 0x20, 0x08, 0x00, 0x80, 0x00, 0x00, 0x00, 0x30, 0xff, 0xf3, 0xff, 0xff, 0xff, 0xff, 0xff },
+               .num_mappings = 2,
+               .mappings = (struct rogue_field_mapping []) {
+                       /* Operand mappings. */
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 0,
+                               .rangelist = {
+                                       .num_ranges = 1,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 103, .num = 8, }, /* Immediate address. */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_imm,
+                       },
+                       {
+                               .type = ROGUE_MAP_TYPE_OPERAND,
+                               .index = 1,
+                               .rangelist = {
+                                       .num_ranges = 5,
+                                       .ranges = (struct rogue_bitrange []) {
+                                               { .start = 83, .num = 2, }, /* SB0(2..1) */
+                                               { .start = 94, .num = 1, }, /* SB0(0) */
+                                               { .start = 74, .num = 3, }, /* S0(10..8) */
+                                               { .start = 81, .num = 2, }, /* S0(7..6) */
+                                               { .start = 93, .num = 6, }, /* S0(5..0) */
+                                       },
+                               },
+                               .encoder_fn = &rogue_encoder_reg_3_11,
+                       },
+               },
+       },
+};
+
+/**
+ * \brief Applies a boolean flag encoding onto an instruction mask.
+ *
+ * \param[in] set Whether to set/unset the flag.
+ * \param[in] mapping The field mapping to apply.
+ * \param[in] instr_size The size of the instruction mask in bytes.
+ * \param[in] instr_bytes The instruction mask.
+ * \return true if encoding was successful.
+ */
+static bool rogue_encode_flag(bool set,
+                              const struct rogue_field_mapping *mapping,
+                              size_t instr_size,
+                              uint8_t instr_bytes[instr_size])
+{
+   return rogue_distribute_value((uint64_t)set,
+                                 &mapping->rangelist,
+                                 instr_size,
+                                 instr_bytes);
+}
+
+/**
+ * \brief Applies an operand encoding onto an instruction mask.
+ *
+ * \param[in] operand The operand to apply.
+ * \param[in] mapping The field mapping to apply.
+ * \param[in] instr_size The size of the instruction mask in bytes.
+ * \param[in] instr_bytes The instruction mask.
+ * \return true if encoding was successful.
+ */
+static bool rogue_encode_operand(const struct rogue_operand *operand,
+                                 const struct rogue_field_mapping *mapping,
+                                 size_t instr_size,
+                                 uint8_t instr_bytes[instr_size])
+{
+   uint64_t value = 0U;
+
+   switch (operand->type) {
+   case ROGUE_OPERAND_TYPE_REG_PIXEL_OUT:
+      CHECKF(
+         mapping->encoder_fn(&value,
+                             2,
+                             rogue_encode_reg_bank(operand),
+                             operand->reg.number + ROGUE_PIXEL_OUT_REG_OFFSET),
+         "Failed to encode pixel output register operand.");
+      break;
+   case ROGUE_OPERAND_TYPE_REG_INTERNAL:
+      CHECKF(
+         mapping->encoder_fn(&value,
+                             2,
+                             rogue_encode_reg_bank(operand),
+                             operand->reg.number + ROGUE_INTERNAL_REG_OFFSET),
+         "Failed to encode internal register operand.");
+      break;
+   case ROGUE_OPERAND_TYPE_REG_TEMP:
+   case ROGUE_OPERAND_TYPE_REG_COEFF:
+   case ROGUE_OPERAND_TYPE_REG_CONST:
+   case ROGUE_OPERAND_TYPE_REG_SHARED:
+   case ROGUE_OPERAND_TYPE_REG_VERTEX_IN:
+      CHECKF(mapping->encoder_fn(&value,
+                                 2,
+                                 rogue_encode_reg_bank(operand),
+                                 operand->reg.number),
+             "Failed to encode register operand.");
+      break;
+
+   case ROGUE_OPERAND_TYPE_IMMEDIATE:
+      CHECKF(mapping->encoder_fn(&value, 1, operand->immediate.value),
+             "Failed to encode immediate operand.");
+      break;
+
+   case ROGUE_OPERAND_TYPE_DRC:
+      CHECKF(mapping->encoder_fn(&value, 1, (uint64_t)operand->drc.number),
+             "Failed to encode DRC operand.");
+      break;
+
+   default:
+      return false;
+   }
+
+   CHECKF(rogue_distribute_value(value,
+                                 &mapping->rangelist,
+                                 instr_size,
+                                 instr_bytes),
+          "Failed to distribute value.");
+
+   return true;
+}
+
+/**
+ * \brief Applies operand and flag encodings to the base instruction bytes, then
+ * writes the result to file pointer "fp".
+ *
+ * \param[in] instr The instruction to be encoded.
+ * \param[in] fp The file pointer.
+ * \return true if encoding was successful.
+ */
+bool rogue_encode_instr(const struct rogue_instr *instr, FILE *fp)
+{
+   const struct rogue_instr_encoding *instr_encoding;
+   size_t instr_size;
+   uint8_t instr_bytes[ROGUE_MAX_INSTR_BYTES];
+
+   ASSERT_OPCODE_RANGE(instr->opcode);
+
+   instr_encoding = &instr_encodings[instr->opcode];
+
+   /* Set up base instruction bytes. */
+   instr_size = instr_encoding->num_bytes;
+   assert(instr_size <= ARRAY_SIZE(instr_bytes));
+   memcpy(instr_bytes, instr_encoding->bytes, instr_size);
+
+   /* Encode the operands and flags. */
+   for (size_t u = 0U; u < instr_encoding->num_mappings; ++u) {
+      const struct rogue_field_mapping *mapping = &instr_encoding->mappings[u];
+
+      switch (mapping->type) {
+      case ROGUE_MAP_TYPE_INSTR_FLAG: {
+         uint64_t flag = rogue_onehot(mapping->index);
+         CHECKF(rogue_encode_flag(!!(instr->flags & flag),
+                                  mapping,
+                                  instr_size,
+                                  instr_bytes),
+                "Failed to encode instruction flag.");
+         break;
+      }
+
+      case ROGUE_MAP_TYPE_OPERAND_FLAG:
+         return false;
+
+      case ROGUE_MAP_TYPE_OPERAND: {
+         size_t operand_index = mapping->index;
+         CHECKF(rogue_encode_operand(&instr->operands[operand_index],
+                                     mapping,
+                                     instr_size,
+                                     instr_bytes),
+                "Failed to encode instruction operand.");
+         break;
+      }
+
+      default:
+         return false;
+      }
+   }
+
+   CHECKF(fwrite(instr_bytes, 1, instr_size, fp) == instr_size,
+          "Failed to write encoded instruction bytes.");
+   fflush(fp);
+
+   return true;
+}
+
+/**
+ * \brief Encodes each instruction in "shader", writing the output to "fp".
+ *
+ * \param[in] shader The shader to be encoded.
+ * \param[in] fp The file pointer.
+ * \return true if encoding was successful.
+ */
+bool rogue_encode_shader(const struct rogue_shader *shader, FILE *fp)
+{
+   long bytes_written;
+
+   /* Encode each instruction. */
+   foreach_instr (instr, &shader->instr_list)
+      CHECKF(rogue_encode_instr(instr, fp), "Failed to encode instruction.");
+
+   /* Pad end of shader if required. */
+   bytes_written = ftell(fp);
+   if (bytes_written <= 0)
+      return false;
+
+   /* FIXME: Figure out the define for alignment of 16. */
+   for (size_t u = 0; u < (bytes_written % 16); ++u)
+      fputc(0xff, fp);
+
+   return true;
+}
diff --git a/src/imagination/rogue/rogue_encode.h b/src/imagination/rogue/rogue_encode.h
new file mode 100644 (file)
index 0000000..0305e37
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_ENCODE_H
+#define ROGUE_ENCODE_H
+
+#include <stdbool.h>
+#include <stdio.h>
+
+#include "util/macros.h"
+
+struct rogue_instr;
+struct rogue_shader;
+
+PUBLIC
+bool rogue_encode_instr(const struct rogue_instr *instr, FILE *fp);
+
+PUBLIC
+bool rogue_encode_shader(const struct rogue_shader *shader, FILE *fp);
+
+#endif /* ROGUE_ENCODE_H */
diff --git a/src/imagination/rogue/rogue_encoders.c b/src/imagination/rogue/rogue_encoders.c
new file mode 100644 (file)
index 0000000..1da7990
--- /dev/null
@@ -0,0 +1,164 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "rogue_encoders.h"
+#include "rogue_util.h"
+#include "util/bitscan.h"
+
+/**
+ * \brief Passes the input value through unchanged.
+ *
+ * \param[in] value Pointer to the destination value.
+ * \param[in] inputs Number of inputs provided.
+ * \param[in] ... Input value(s).
+ * \return true if encoding was successful.
+ */
+bool rogue_encoder_pass(uint64_t *value, size_t inputs, ...)
+{
+   va_list args;
+
+   assert(inputs == 1);
+
+   va_start(args, inputs);
+   *value = va_arg(args, uint64_t);
+   va_end(args);
+
+   return true;
+}
+
+/**
+ * \brief Encoder for DRC values.
+ *
+ * \sa #rogue_encoder_pass()
+ *
+ * \param[in] value Pointer to the destination value.
+ * \param[in] inputs Number of inputs provided.
+ * \param[in] ... Input value(s).
+ * \return true if encoding was successful.
+ */
+bool rogue_encoder_drc(uint64_t *value, size_t inputs, ...)
+   __attribute__((alias("rogue_encoder_pass")));
+
+/**
+ * \brief Encoder for immediate values.
+ *
+ * \sa #rogue_encoder_pass()
+ *
+ * \param[in] value Pointer to the destination value.
+ * \param[in] inputs Number of inputs provided.
+ * \param[in] ... Input value(s).
+ * \return true if encoding was successful.
+ */
+bool rogue_encoder_imm(uint64_t *value, size_t inputs, ...)
+   __attribute__((alias("rogue_encoder_pass")));
+
+/**
+ * \brief Encodes input ranges {1..15 -> 1-15} and {16 -> 0}.
+ *
+ * The input should be in the range 1-16; the function represents 1-15 normally
+ * and represents 16 by 0.
+ *
+ * \param[in] value Pointer to the destination value.
+ * \param[in] inputs Number of inputs provided.
+ * \param[in] ... Input value(s).
+ * \return true if encoding was successful.
+ */
+bool rogue_encoder_ls_1_16(uint64_t *value, size_t inputs, ...)
+{
+   va_list args;
+   uint64_t input;
+
+   assert(inputs == 1);
+
+   va_start(args, inputs);
+   input = va_arg(args, uint64_t);
+   va_end(args);
+
+   /* Validate the input range. */
+   if (!input || input > 16) {
+      *value = UINT64_MAX;
+      return false;
+   }
+
+   *value = input % 16;
+
+   return true;
+}
+
+/**
+ * \brief Encodes registers according to the number of bits needed to specify
+ * the bank number and register number.
+ *
+ * \param[in] value Pointer to the destination value.
+ * \param[in] bank_bits The number of bits used to represent the register bank.
+ * \param[in] bank the register bank
+ * \param[in] num_bits The number of bits used to represent the register number.
+ * \param[in] num The register number.
+ * \return true if encoding was successful.
+ */
+static bool rogue_encoder_reg(uint64_t *value,
+                              size_t bank_bits,
+                              size_t bank,
+                              size_t num_bits,
+                              size_t num)
+{
+   /* Verify "num" fits in "num_bits" and "bank" fits in "bank_bits". */
+   assert(util_last_bit64(num) <= num_bits);
+   assert(util_last_bit64(bank) <= bank_bits);
+
+   *value = num;
+   *value |= (bank << num_bits);
+
+   return true;
+}
+
+/**
+ * \brief Macro to define the rogue_encoder_reg variants.
+ */
+#define ROGUE_ENCODER_REG_VARIANT(bank_bits, num_bits)                 \
+   bool rogue_encoder_reg_##bank_bits##_##num_bits(uint64_t *value,    \
+                                                   size_t inputs,      \
+                                                   ...)                \
+   {                                                                   \
+      va_list args;                                                    \
+      size_t bank;                                                     \
+      size_t num;                                                      \
+      assert(inputs == 2);                                             \
+      va_start(args, inputs);                                          \
+      bank = va_arg(args, size_t);                                     \
+      num = va_arg(args, size_t);                                      \
+      va_end(args);                                                    \
+      return rogue_encoder_reg(value, bank_bits, bank, num_bits, num); \
+   }
+
+ROGUE_ENCODER_REG_VARIANT(2, 8)
+ROGUE_ENCODER_REG_VARIANT(3, 8)
+ROGUE_ENCODER_REG_VARIANT(3, 11)
+
+#undef ROGUE_ENCODER_REG_VARIANT
diff --git a/src/imagination/rogue/rogue_encoders.h b/src/imagination/rogue/rogue_encoders.h
new file mode 100644 (file)
index 0000000..ea5c94d
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_ENCODERS_H
+#define ROGUE_ENCODERS_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "util/macros.h"
+
+/* Returns false if input was invalid. */
+typedef bool (*field_encoder_t)(uint64_t *value, size_t inputs, ...);
+
+bool rogue_encoder_pass(uint64_t *value, size_t inputs, ...);
+bool rogue_encoder_drc(uint64_t *value, size_t inputs, ...);
+bool rogue_encoder_imm(uint64_t *value, size_t inputs, ...);
+bool rogue_encoder_ls_1_16(uint64_t *value, size_t inputs, ...);
+
+/**
+ * \brief Macro to declare the rogue_encoder_reg variants.
+ */
+#define ROGUE_ENCODER_REG_VARIANT(bank_bits, num_bits)              \
+   bool rogue_encoder_reg_##bank_bits##_##num_bits(uint64_t *value, \
+                                                   size_t inputs,   \
+                                                   ...);
+ROGUE_ENCODER_REG_VARIANT(2, 8)
+ROGUE_ENCODER_REG_VARIANT(3, 8)
+ROGUE_ENCODER_REG_VARIANT(3, 11)
+#undef ROGUE_ENCODER_REG_VARIANT
+
+#endif /* ROGUE_ENCODERS_H */
diff --git a/src/imagination/rogue/rogue_instr.c b/src/imagination/rogue/rogue_instr.c
new file mode 100644 (file)
index 0000000..a698802
--- /dev/null
@@ -0,0 +1,227 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "rogue_instr.h"
+#include "rogue_operand.h"
+#include "rogue_util.h"
+#include "util/ralloc.h"
+
+/**
+ * \file rogue_instr.c
+ *
+ * \brief Contains functions to manipulate Rogue instructions.
+ */
+
+/* clang-format off */
+
+static const size_t instr_operand_count[ROGUE_OP_COUNT] = {
+   [ROGUE_OP_NOP] = 0,
+   [ROGUE_OP_END_FRAG] = 0,
+   [ROGUE_OP_END_VERT] = 0,
+   [ROGUE_OP_WDF] = 1,
+   [ROGUE_OP_PIX_ITER_W] = 5,
+   [ROGUE_OP_MAX] = 3,
+   [ROGUE_OP_MIN] = 3,
+   [ROGUE_OP_PACK_U8888] = 2,
+   [ROGUE_OP_MOV] = 2,
+   [ROGUE_OP_MOV_IMM] = 2,
+   [ROGUE_OP_FMA] = 4,
+   [ROGUE_OP_MUL] = 3,
+   [ROGUE_OP_VTXOUT] = 2,
+};
+
+/* clang-format on */
+
+/**
+ * \brief Returns the number of operands an instruction takes.
+ *
+ * \param[in] opcode The instruction opcode.
+ * \return The number of operands.
+ */
+static inline size_t rogue_instr_num_operands(enum rogue_opcode opcode)
+{
+   ASSERT_OPCODE_RANGE(opcode);
+
+   return instr_operand_count[opcode];
+}
+
+/**
+ * \brief Allocates and sets up a Rogue instruction.
+ *
+ * \param[in] mem_ctx The memory context for the instruction.
+ * \param[in] opcode The instruction opcode.
+ * \return A rogue_instr* if successful, or NULL if unsuccessful.
+ */
+struct rogue_instr *rogue_instr_create(void *mem_ctx, enum rogue_opcode opcode)
+{
+   struct rogue_instr *instr;
+
+   ASSERT_OPCODE_RANGE(opcode);
+
+   instr = rzalloc_size(mem_ctx, sizeof(*instr));
+   if (!instr)
+      return NULL;
+
+   instr->opcode = opcode;
+   instr->num_operands = rogue_instr_num_operands(opcode);
+
+   /* Allocate space for operand array. */
+   if (instr->num_operands) {
+      instr->operands = rzalloc_array_size(instr,
+                                           sizeof(*instr->operands),
+                                           instr->num_operands);
+      if (!instr->operands) {
+         ralloc_free(instr);
+         return NULL;
+      }
+   }
+
+   return instr;
+}
+
+/**
+ * \brief Sets a Rogue instruction flag.
+ *
+ * \param[in] instr The instruction.
+ * \param[in] flag The flag to set.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_instr_set_flag(struct rogue_instr *instr, enum rogue_instr_flag flag)
+{
+   instr->flags = ROH(flag);
+
+   return true;
+}
+
+/**
+ * \brief Sets a Rogue instruction operand to an immediate value.
+ *
+ * \param[in] instr The instruction.
+ * \param[in] index The operand index.
+ * \param[in] value The value to set.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_instr_set_operand_imm(struct rogue_instr *instr,
+                                 size_t index,
+                                 uint64_t value)
+{
+   ASSERT_INSTR_OPERAND_INDEX(instr, index);
+
+   instr->operands[index].type = ROGUE_OPERAND_TYPE_IMMEDIATE;
+   instr->operands[index].immediate.value = value;
+
+   return true;
+}
+
+/**
+ * \brief Sets a Rogue instruction operand to a DRC number.
+ *
+ * \param[in] instr The instruction.
+ * \param[in] index The operand index.
+ * \param[in] number The DRC number to set.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_instr_set_operand_drc(struct rogue_instr *instr,
+                                 size_t index,
+                                 size_t number)
+{
+   ASSERT_INSTR_OPERAND_INDEX(instr, index);
+
+   instr->operands[index].type = ROGUE_OPERAND_TYPE_DRC;
+   instr->operands[index].drc.number = number;
+
+   return true;
+}
+
+/**
+ * \brief Sets a Rogue instruction operand to a register.
+ *
+ * \param[in] instr The instruction.
+ * \param[in] index The operand index.
+ * \param[in] type The register type to set.
+ * \param[in] number The register number to set.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_instr_set_operand_reg(struct rogue_instr *instr,
+                                 size_t index,
+                                 enum rogue_operand_type type,
+                                 size_t number)
+{
+   ASSERT_INSTR_OPERAND_INDEX(instr, index);
+   ASSERT_OPERAND_REG(type);
+
+   instr->operands[index].type = type;
+   instr->operands[index].reg.number = number;
+
+   return true;
+}
+
+/**
+ * \brief Sets a Rogue instruction operand to a virtual register.
+ *
+ * \param[in] instr The instruction.
+ * \param[in] index The operand index.
+ * \param[in] number The register number to set.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_instr_set_operand_vreg(struct rogue_instr *instr,
+                                  size_t index,
+                                  size_t number)
+{
+   ASSERT_INSTR_OPERAND_INDEX(instr, index);
+
+   instr->operands[index].type = ROGUE_OPERAND_TYPE_VREG;
+   instr->operands[index].vreg.number = number;
+   instr->operands[index].vreg.is_vector = false;
+
+   return true;
+}
+
+/**
+ * \brief Sets a Rogue instruction operand to a virtual register
+ * that is a vector type.
+ *
+ * \param[in] instr The instruction.
+ * \param[in] index The operand index.
+ * \param[in] component The vector component.
+ * \param[in] number The register number to set.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_instr_set_operand_vreg_vec(struct rogue_instr *instr,
+                                      size_t index,
+                                      size_t component,
+                                      size_t number)
+{
+   ASSERT_INSTR_OPERAND_INDEX(instr, index);
+
+   instr->operands[index].type = ROGUE_OPERAND_TYPE_VREG;
+   instr->operands[index].vreg.number = number;
+   instr->operands[index].vreg.is_vector = true;
+   instr->operands[index].vreg.component = component;
+
+   return true;
+}
diff --git a/src/imagination/rogue/rogue_instr.h b/src/imagination/rogue/rogue_instr.h
new file mode 100644 (file)
index 0000000..5b6efa5
--- /dev/null
@@ -0,0 +1,113 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_INSTR_H
+#define ROGUE_INSTR_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "rogue_operand.h"
+#include "util/list.h"
+
+/**
+ * \brief Instruction opcodes.
+ */
+enum rogue_opcode {
+   ROGUE_OP_NOP = 0, /** No-operation. */
+   ROGUE_OP_END_FRAG, /** Fragment shader end. */
+   ROGUE_OP_END_VERT, /** Vertex shader end. */
+   ROGUE_OP_WDF, /** Write data fence. */
+
+   ROGUE_OP_PIX_ITER_W, /** Pixel iteration with coefficients. */
+
+   ROGUE_OP_MAX, /** Returns the largest out of two floats. */
+   ROGUE_OP_MIN, /** Returns the smallest out of two floats. */
+
+   ROGUE_OP_PACK_U8888, /** Scales the four input floats:
+                         * [0.0f, 0.1f] -> [0, 255] and packs them
+                         * into a 32-bit unsigned integer.
+                         */
+
+   ROGUE_OP_MOV, /** Register move instruction. */
+   ROGUE_OP_MOV_IMM, /** Move immediate instruction. */
+
+   ROGUE_OP_FMA, /** Fused-multiply-add (float). */
+   ROGUE_OP_MUL, /** Multiply (float). */
+
+   ROGUE_OP_VTXOUT, /** Writes the input register
+                     * to the given vertex output index.
+                     */
+
+   ROGUE_OP_COUNT,
+};
+
+/**
+ * \brief Instruction flags.
+ */
+enum rogue_instr_flag {
+   ROGUE_INSTR_FLAG_SAT = 0, /** Saturate values to 0.0 ... 1.0. */
+   ROGUE_INSTR_FLAG_LP, /** Low-precision modifier. */
+   ROGUE_INSTR_FLAG_OLCHK, /** Overlap check (pixel write). */
+
+   ROGUE_INSTR_FLAG_COUNT,
+};
+
+/**
+ * \brief Instruction description.
+ */
+struct rogue_instr {
+   enum rogue_opcode opcode;
+
+   size_t num_operands;
+   struct rogue_operand *operands;
+
+   uint64_t flags; /** A mask of #rogue_instr_flag values. */
+
+   struct list_head node; /** Linked list node. */
+};
+
+struct rogue_instr *rogue_instr_create(void *mem_ctx, enum rogue_opcode opcode);
+
+bool rogue_instr_set_flag(struct rogue_instr *instr,
+                          enum rogue_instr_flag flag);
+
+bool rogue_instr_set_operand_imm(struct rogue_instr *instr,
+                                 size_t index,
+                                 uint64_t value);
+bool rogue_instr_set_operand_drc(struct rogue_instr *instr,
+                                 size_t index,
+                                 size_t number);
+bool rogue_instr_set_operand_reg(struct rogue_instr *instr,
+                                 size_t index,
+                                 enum rogue_operand_type type,
+                                 size_t number);
+bool rogue_instr_set_operand_vreg(struct rogue_instr *instr,
+                                  size_t index,
+                                  size_t number);
+bool rogue_instr_set_operand_vreg_vec(struct rogue_instr *instr,
+                                      size_t index,
+                                      size_t component,
+                                      size_t number);
+#endif /* ROGUE_INSTR_H */
diff --git a/src/imagination/rogue/rogue_nir.c b/src/imagination/rogue/rogue_nir.c
new file mode 100644 (file)
index 0000000..3fe90b4
--- /dev/null
@@ -0,0 +1,191 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compiler/spirv/nir_spirv.h"
+#include "nir/nir.h"
+#include "nir/nir_schedule.h"
+#include "rogue_nir.h"
+#include "rogue_operand.h"
+
+/**
+ * \file rogue_nir.c
+ *
+ * \brief Contains NIR-specific functions.
+ */
+
+/**
+ * \brief SPIR-V to NIR compilation options.
+ */
+static const struct spirv_to_nir_options spirv_options = {
+   .environment = NIR_SPIRV_VULKAN,
+
+   /* Buffer address: (descriptor_set, binding), offset. */
+   .ubo_addr_format = nir_address_format_vec2_index_32bit_offset,
+};
+
+static const nir_shader_compiler_options nir_options = {
+   .lower_fsat = true,
+   .fuse_ffma32 = true,
+};
+
+const struct spirv_to_nir_options *
+rogue_get_spirv_options(const struct rogue_compiler *compiler)
+{
+   return &spirv_options;
+}
+
+const nir_shader_compiler_options *
+rogue_get_compiler_options(const struct rogue_compiler *compiler)
+{
+   return &nir_options;
+}
+
+static int rogue_glsl_type_size(const struct glsl_type *type, bool bindless)
+{
+   return glsl_count_attribute_slots(type, false);
+}
+
+/**
+ * \brief Applies optimizations and passes required to lower the NIR shader into
+ * a form suitable for lowering to Rogue IR.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] shader Rogue shader.
+ * \param[in] stage Shader stage.
+ * \return true if successful, otherwise false.
+ */
+bool rogue_nir_passes(struct rogue_build_ctx *ctx,
+                      nir_shader *nir,
+                      gl_shader_stage stage)
+{
+   bool progress;
+
+   nir_validate_shader(nir, "after spirv_to_nir");
+
+   /* Splitting. */
+   NIR_PASS_V(nir, nir_split_var_copies);
+   NIR_PASS_V(nir, nir_split_per_member_structs);
+
+   /* Ensure fs outputs are in the [0.0f...1.0f] range. */
+   NIR_PASS_V(nir, nir_lower_clamp_color_outputs);
+
+   /* Replace references to I/O variables with intrinsics. */
+   NIR_PASS_V(nir,
+              nir_lower_io,
+              nir_var_shader_in | nir_var_shader_out,
+              rogue_glsl_type_size,
+              (nir_lower_io_options)0);
+
+   /* Load inputs to scalars (single registers later). */
+   NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_in);
+
+   /* Optimize GL access qualifiers. */
+   const nir_opt_access_options opt_access_options = {
+      .is_vulkan = true,
+      .infer_non_readable = true,
+   };
+   NIR_PASS_V(nir, nir_opt_access, &opt_access_options);
+
+   /* Apply PFO code to the fragment shader output. */
+   if (nir->info.stage == MESA_SHADER_FRAGMENT)
+      NIR_PASS_V(nir, rogue_nir_pfo);
+
+   /* Load outputs to scalars (single registers later). */
+   NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_out);
+
+   /* Lower ALU operations to scalars. */
+   NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
+
+   /* Algebraic opts. */
+   do {
+      progress = false;
+
+      NIR_PASS(progress, nir, nir_copy_prop);
+      NIR_PASS(progress, nir, nir_opt_cse);
+      NIR_PASS(progress, nir, nir_opt_algebraic);
+      NIR_PASS(progress, nir, nir_opt_constant_folding);
+      NIR_PASS(progress, nir, nir_opt_dce);
+      NIR_PASS_V(nir, nir_opt_gcm, false);
+   } while (progress);
+
+   /* Additional I/O lowering. */
+   NIR_PASS_V(nir,
+              nir_lower_explicit_io,
+              nir_var_mem_ubo,
+              spirv_options.ubo_addr_format);
+   NIR_PASS_V(nir, rogue_nir_lower_io, NULL);
+
+   /* Late algebraic opts. */
+   do {
+      progress = false;
+
+      NIR_PASS(progress, nir, nir_opt_algebraic_late);
+      NIR_PASS_V(nir, nir_opt_constant_folding);
+      NIR_PASS_V(nir, nir_copy_prop);
+      NIR_PASS_V(nir, nir_opt_dce);
+      NIR_PASS_V(nir, nir_opt_cse);
+   } while (progress);
+
+   /* Replace SSA constant references with a register that loads the value. */
+   NIR_PASS_V(nir, rogue_nir_constreg);
+   /* Remove unused constant registers. */
+   NIR_PASS_V(nir, nir_opt_dce);
+
+   /* Move loads to just before they're needed. */
+   NIR_PASS_V(nir, nir_opt_move, nir_move_load_ubo | nir_move_load_input);
+
+   /* Convert vecNs to movs so we can sequentially allocate them later. */
+   NIR_PASS_V(nir, nir_lower_vec_to_movs, NULL, NULL);
+
+   /* Out of SSA pass. */
+   NIR_PASS_V(nir, nir_convert_from_ssa, false);
+
+   /* TODO: Re-enable scheduling after register pressure tweaks. */
+#if 0
+       /* Instruction scheduling. */
+       struct nir_schedule_options schedule_options = {
+               .threshold = ROGUE_MAX_REG_TEMP / 2,
+       };
+       NIR_PASS_V(nir, nir_schedule, &schedule_options);
+#endif
+
+   /* Assign I/O locations. */
+   nir_assign_io_var_locations(nir,
+                               nir_var_shader_in,
+                               &nir->num_inputs,
+                               nir->info.stage);
+   nir_assign_io_var_locations(nir,
+                               nir_var_shader_out,
+                               &nir->num_outputs,
+                               nir->info.stage);
+
+   /* Gather info into nir shader struct. */
+   nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
+
+   /* Clean-up after passes. */
+   nir_sweep(nir);
+
+   nir_validate_shader(nir, "after passes");
+
+   return true;
+}
diff --git a/src/imagination/rogue/rogue_nir.h b/src/imagination/rogue/rogue_nir.h
new file mode 100644 (file)
index 0000000..580e5bf
--- /dev/null
@@ -0,0 +1,52 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_NIR_H
+#define ROGUE_NIR_H
+
+#include "compiler/shader_enums.h"
+#include "nir/nir.h"
+#include "util/macros.h"
+
+struct rogue_build_ctx;
+struct rogue_compiler;
+struct spirv_to_nir_options;
+
+PUBLIC
+const struct spirv_to_nir_options *
+rogue_get_spirv_options(const struct rogue_compiler *compiler);
+
+PUBLIC
+const nir_shader_compiler_options *
+rogue_get_compiler_options(const struct rogue_compiler *compiler);
+
+bool rogue_nir_passes(struct rogue_build_ctx *ctx,
+                      nir_shader *nir,
+                      gl_shader_stage stage);
+
+/* Custom passes. */
+void rogue_nir_pfo(nir_shader *shader);
+void rogue_nir_constreg(nir_shader *shader);
+bool rogue_nir_lower_io(nir_shader *shader, void *layout);
+
+#endif /* ROGUE_NIR_H */
diff --git a/src/imagination/rogue/rogue_nir_helpers.h b/src/imagination/rogue/rogue_nir_helpers.h
new file mode 100644 (file)
index 0000000..85fc6db
--- /dev/null
@@ -0,0 +1,149 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_NIR_HELPERS_H
+#define ROGUE_NIR_HELPERS_H
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "nir/nir.h"
+#include "util/bitscan.h"
+
+/**
+ * \file rogue_nir.c
+ *
+ * \brief Contains various NIR helper functions.
+ */
+
+static inline unsigned nir_alu_dest_regindex(const nir_alu_instr *alu)
+{
+   assert(!alu->dest.dest.is_ssa);
+
+   return alu->dest.dest.reg.reg->index;
+}
+
+static inline unsigned nir_alu_dest_comp(const nir_alu_instr *alu)
+{
+   assert(!alu->dest.dest.is_ssa);
+   assert(util_is_power_of_two_nonzero(alu->dest.write_mask));
+
+   return ffs(alu->dest.write_mask) - 1;
+}
+
+static inline unsigned nir_alu_src_regindex(const nir_alu_instr *alu,
+                                            size_t src)
+{
+   assert(src < nir_op_infos[alu->op].num_inputs);
+   assert(!alu->src[src].src.is_ssa);
+
+   return alu->src[src].src.reg.reg->index;
+}
+
+static inline uint32_t nir_alu_src_const(const nir_alu_instr *alu, size_t src)
+{
+   assert(src < nir_op_infos[alu->op].num_inputs);
+   assert(alu->src[src].src.is_ssa);
+
+   nir_const_value *const_value = nir_src_as_const_value(alu->src[src].src);
+
+   return nir_const_value_as_uint(*const_value, 32);
+}
+
+static inline bool nir_alu_src_is_const(const nir_alu_instr *alu, size_t src)
+{
+   assert(src < nir_op_infos[alu->op].num_inputs);
+
+   if (!alu->src[src].src.is_ssa)
+      return false;
+
+   assert(alu->src[src].src.ssa->parent_instr);
+
+   return (alu->src[src].src.ssa->parent_instr->type ==
+           nir_instr_type_load_const);
+}
+
+static inline unsigned nir_intr_dest_regindex(const nir_intrinsic_instr *intr)
+{
+   assert(!intr->dest.is_ssa);
+
+   return intr->dest.reg.reg->index;
+}
+
+static inline unsigned nir_intr_src_regindex(const nir_intrinsic_instr *intr,
+                                             size_t src)
+{
+   assert(src < nir_intrinsic_infos[intr->intrinsic].num_srcs);
+   assert(!intr->src[src].is_ssa);
+
+   return intr->src[src].reg.reg->index;
+}
+
+static inline uint32_t nir_intr_src_const(const nir_intrinsic_instr *intr,
+                                          size_t src)
+{
+   assert(src < nir_intrinsic_infos[intr->intrinsic].num_srcs);
+   assert(intr->src[src].is_ssa);
+
+   nir_const_value *const_value = nir_src_as_const_value(intr->src[src]);
+
+   return nir_const_value_as_uint(*const_value, 32);
+}
+
+static inline uint32_t nir_intr_src_comp_const(const nir_intrinsic_instr *intr,
+                                               size_t src,
+                                               size_t comp)
+{
+   assert(src < nir_intrinsic_infos[intr->intrinsic].num_srcs);
+   assert(intr->src[src].is_ssa);
+   assert(comp < nir_src_num_components(intr->src[src]));
+
+   return nir_src_comp_as_uint(intr->src[src], comp);
+}
+
+static inline bool nir_intr_src_is_const(const nir_intrinsic_instr *intr,
+                                         size_t src)
+{
+   assert(src < nir_intrinsic_infos[intr->intrinsic].num_srcs);
+
+   if (!intr->src[src].is_ssa)
+      return false;
+
+   assert(intr->src[src].ssa->parent_instr);
+
+   return (intr->src[src].ssa->parent_instr->type == nir_instr_type_load_const);
+}
+
+static inline size_t nir_count_variables_with_modes(const nir_shader *nir,
+                                                    nir_variable_mode mode)
+{
+   size_t count = 0;
+
+   nir_foreach_variable_with_modes (var, nir, mode)
+      ++count;
+
+   return count;
+}
+
+#endif /* ROGUE_NIR_HELPERS_H */
diff --git a/src/imagination/rogue/rogue_operand.c b/src/imagination/rogue/rogue_operand.c
new file mode 100644 (file)
index 0000000..a0e86f9
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "rogue_operand.h"
+
+/**
+ * \file rogue_operand.c
+ *
+ * \brief Contains functions to manipulate Rogue instruction operands.
+ */
diff --git a/src/imagination/rogue/rogue_operand.h b/src/imagination/rogue/rogue_operand.h
new file mode 100644 (file)
index 0000000..753f090
--- /dev/null
@@ -0,0 +1,158 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_OPERAND_H
+#define ROGUE_OPERAND_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "rogue_util.h"
+#include "util/macros.h"
+
+/* Register-related defines. */
+
+/* Total max number of registers per class
+ * (instances > ROGUE_MAX_REG_INDEX addressable via indexing only).
+ */
+#define ROGUE_MAX_REG_TEMP 248
+#define ROGUE_MAX_REG_COEFF 4096
+#define ROGUE_MAX_REG_CONST 240
+#define ROGUE_MAX_REG_SHARED 4096
+#define ROGUE_MAX_REG_PIXEL_OUT 8
+#define ROGUE_MAX_REG_VERTEX_IN 248
+#define ROGUE_MAX_REG_INTERNAL 8
+
+/* Maximum register index via offset encoding. */
+#define ROGUE_MAX_REG_INDEX 256
+
+/* Pixel-out register offset. */
+#define ROGUE_PIXEL_OUT_REG_OFFSET 32
+
+/* Internal register offset. */
+#define ROGUE_INTERNAL_REG_OFFSET 36
+
+/* Coefficient registers are typically used in groups of 4. */
+#define ROGUE_COEFF_ALIGN 4
+
+/* Defines for other operand types. */
+
+/* Available dependent read counters. */
+#define ROGUE_NUM_DRCS 2
+
+/* Maximum number of vertex outputs. */
+#define ROGUE_MAX_VERTEX_OUTPUTS 256
+
+/* All components of an emulated vec4 register group. */
+#define ROGUE_COMPONENT_ALL (~0)
+
+/**
+ * \brief Operand types.
+ */
+enum rogue_operand_type {
+   /* Register operands. */
+   ROGUE_OPERAND_TYPE_REG_TEMP = 0, /** Temporary register. */
+   ROGUE_OPERAND_TYPE_REG_COEFF, /** Coefficient register. */
+   ROGUE_OPERAND_TYPE_REG_CONST, /** Constant register. */
+   ROGUE_OPERAND_TYPE_REG_SHARED, /** Shared register. */
+   ROGUE_OPERAND_TYPE_REG_PIXEL_OUT, /** Pixel output register. */
+   ROGUE_OPERAND_TYPE_REG_VERTEX_IN, /** Vertex input register. */
+   ROGUE_OPERAND_TYPE_REG_INTERNAL, /** Internal register. */
+
+   ROGUE_OPERAND_TYPE_REG_MAX = ROGUE_OPERAND_TYPE_REG_INTERNAL,
+
+   ROGUE_OPERAND_TYPE_IMMEDIATE, /** Immediate value. */
+
+   ROGUE_OPERAND_TYPE_DRC, /** Dependent read counter. */
+
+   ROGUE_OPERAND_TYPE_VREG, /** Virtual register (pre-regalloc). */
+
+   ROGUE_OPERAND_TYPE_COUNT,
+};
+
+/* clang-format off */
+
+#define ROGUE_NUM_REG_TYPES (ROGUE_OPERAND_TYPE_REG_MAX + 1)
+
+/**
+ * \brief A bitmask for any register operand type.
+ */
+#define ROGUE_MASK_ANY_REG                 \
+   ROH(ROGUE_OPERAND_TYPE_REG_TEMP) |      \
+   ROH(ROGUE_OPERAND_TYPE_REG_COEFF) |     \
+   ROH(ROGUE_OPERAND_TYPE_REG_CONST) |     \
+   ROH(ROGUE_OPERAND_TYPE_REG_PIXEL_OUT) | \
+   ROH(ROGUE_OPERAND_TYPE_REG_VERTEX_IN) | \
+   ROH(ROGUE_OPERAND_TYPE_REG_SHARED) |    \
+   ROH(ROGUE_OPERAND_TYPE_REG_INTERNAL)
+
+/* clang-format on */
+
+/**
+ * \brief Operand description.
+ */
+struct rogue_operand {
+   enum rogue_operand_type type;
+
+   union {
+      struct {
+         uint64_t value;
+      } immediate;
+
+      struct {
+         size_t number;
+      } drc;
+
+      struct {
+         size_t number;
+      } reg;
+
+      struct {
+         size_t number;
+         bool is_vector;
+         size_t component;
+      } vreg;
+   };
+};
+
+/**
+ * \brief Register access flags.
+ */
+enum rogue_register_access {
+   ROGUE_REG_ACCESS_READ = BITFIELD_BIT(0U), /** Read-only. */
+   ROGUE_REG_ACCESS_WRITE = BITFIELD_BIT(1U), /* Write-only. */
+   ROGUE_REG_ACCESS_RW = ROGUE_REG_ACCESS_READ |
+                         ROGUE_REG_ACCESS_WRITE, /** Read/write. */
+};
+
+/**
+ * \brief Register modifier flags.
+ */
+enum rogue_register_modifier {
+   ROGUE_REG_MOD_NONE = 0U,
+   ROGUE_REG_MOD_IDX = BITFIELD_BIT(0U), /** Index modifier. */
+   ROGUE_REG_MOD_DIM = BITFIELD_BIT(1U), /** Dimension modifier. */
+   ROGUE_REG_MOD_ALL = ROGUE_REG_MOD_IDX | ROGUE_REG_MOD_DIM,
+};
+
+#endif /* ROGUE_OPERAND_H */
diff --git a/src/imagination/rogue/rogue_regalloc.c b/src/imagination/rogue/rogue_regalloc.c
new file mode 100644 (file)
index 0000000..f18e443
--- /dev/null
@@ -0,0 +1,313 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "rogue_operand.h"
+#include "rogue_regalloc.h"
+#include "rogue_shader.h"
+#include "rogue_util.h"
+#include "util/hash_table.h"
+#include "util/list.h"
+#include "util/ralloc.h"
+#include "util/register_allocate.h"
+#include "util/u_dynarray.h"
+
+/**
+ * \file rogue_regalloc.c
+ *
+ * \brief Contains register allocation helper functions.
+ */
+
+/**
+ * \brief Sets up the register data with the classes to be used for allocation.
+ *
+ * \param[in] data The register data array.
+ */
+static void
+rogue_reg_data_init(struct rogue_reg_data data[static ROGUE_REG_CLASS_COUNT])
+{
+   data[ROGUE_REG_CLASS_TEMP].type = ROGUE_OPERAND_TYPE_REG_TEMP;
+   data[ROGUE_REG_CLASS_TEMP].count = ROGUE_MAX_REG_TEMP;
+   data[ROGUE_REG_CLASS_TEMP].stride = 1;
+
+   data[ROGUE_REG_CLASS_VEC4].type = ROGUE_OPERAND_TYPE_REG_INTERNAL;
+   data[ROGUE_REG_CLASS_VEC4].count = ROGUE_MAX_REG_INTERNAL;
+   data[ROGUE_REG_CLASS_VEC4].stride = 4;
+}
+
+/**
+ * \brief Initializes the Rogue register allocation context.
+ *
+ * \param[in] mem_ctx The memory context for the ra context.
+ * \return A rogue_ra * if successful, or NULL if unsuccessful.
+ */
+struct rogue_ra *rogue_ra_init(void *mem_ctx)
+{
+   struct rogue_ra *ra;
+   size_t total_regs = 0;
+
+   ra = rzalloc_size(mem_ctx, sizeof(*ra));
+   if (!ra)
+      return NULL;
+
+   /* Initialize the register class data. */
+   rogue_reg_data_init(ra->reg_data);
+
+   /* Count up the registers classes and set up their offsets.
+    *
+    * The physical register numbers are sequential, even if the
+    * registers are from different banks, so keeping track of
+    * the offset means we can get the true physical register
+    * number back after allocation.
+    */
+   for (size_t u = 0; u < ARRAY_SIZE(ra->reg_data); ++u) {
+      ra->reg_data[u].offset = total_regs;
+      total_regs += ra->reg_data[u].count;
+   }
+
+   /* Create a register set for allocation.  */
+   ra->regs = ra_alloc_reg_set(ra, total_regs, true);
+   if (!ra->regs) {
+      ralloc_free(ra);
+      return NULL;
+   }
+
+   /* Create the register class for the temps. */
+   ra->reg_data[ROGUE_REG_CLASS_TEMP].class =
+      ra_alloc_contig_reg_class(ra->regs, 1);
+
+   /* Create the register class for vec4 registers
+    * (using the internal register bank).
+    */
+   ra->reg_data[ROGUE_REG_CLASS_VEC4].class =
+      ra_alloc_contig_reg_class(ra->regs, 4);
+
+   /* Populate the register classes. */
+   for (size_t u = 0; u < ARRAY_SIZE(ra->reg_data); ++u) {
+      struct rogue_reg_data *reg_data = &ra->reg_data[u];
+      size_t offset = reg_data->offset;
+      size_t end = reg_data->offset + reg_data->count;
+      size_t stride = reg_data->stride;
+
+      for (size_t r = offset; r < end; r += stride)
+         ra_class_add_reg(reg_data->class, r);
+   }
+
+   /* Finalize the set (no early conflicts passed along for now). */
+   ra_set_finalize(ra->regs, NULL);
+
+   return ra;
+}
+
+/**
+ * \brief The range for which a (virtual) register is live, and its references.
+ */
+struct live_range {
+   size_t start;
+   size_t end;
+   enum rogue_reg_class class;
+   struct util_dynarray operand_refs;
+};
+
+/**
+ * \brief Performs register allocation.
+ *
+ * \param[in] instr_list A linked list of instructions with virtual registers to
+ * be allocated.
+ * \param[in] ra The register allocation context.
+ */
+bool rogue_ra_alloc(struct list_head *instr_list,
+                    struct rogue_ra *ra,
+                    size_t *temps_used,
+                    size_t *internals_used)
+{
+   /* Used for ra_alloc_interference_graph() as it doesn't
+    * like having gaps (e.g. with v0, v2 count = 3 rather
+    * than 2).
+    */
+   size_t max_vreg = 0;
+
+   struct hash_table *reg_ht =
+      _mesa_hash_table_create(ra, _mesa_hash_uint, _mesa_key_uint_equal);
+   if (!reg_ht)
+      return false;
+
+   /* Calculate live ranges for virtual registers. */
+   size_t ip = 0U; /* "Instruction pointer". */
+   foreach_instr (instr, instr_list) {
+      for (size_t u = 0U; u < instr->num_operands; ++u) {
+         struct hash_entry *entry;
+         struct live_range *range;
+
+         if (instr->operands[u].type != ROGUE_OPERAND_TYPE_VREG)
+            continue;
+
+         entry =
+            _mesa_hash_table_search(reg_ht, &instr->operands[u].vreg.number);
+         if (!entry) {
+            /* First use of this virtual register: initialize live range. */
+            /* TODO: Error handling. */
+            range = rzalloc_size(reg_ht, sizeof(*range));
+
+            range->start = ip;
+            range->end = ip;
+            range->class = instr->operands[u].vreg.is_vector
+                              ? ROGUE_REG_CLASS_VEC4
+                              : ROGUE_REG_CLASS_TEMP;
+
+            entry = _mesa_hash_table_insert(reg_ht,
+                                            &instr->operands[u].vreg.number,
+                                            range);
+
+            max_vreg = MAX2(max_vreg, instr->operands[u].vreg.number);
+
+            util_dynarray_init(&range->operand_refs, range);
+         } else {
+            /* Subsequent uses: update live range end. */
+            range = entry->data;
+            range->end = MAX2(range->end, ip);
+            assert(range->class == (instr->operands[u].vreg.is_vector
+                                       ? ROGUE_REG_CLASS_VEC4
+                                       : ROGUE_REG_CLASS_TEMP));
+         }
+
+         /* Save a reference to the operand.  */
+         util_dynarray_append(&range->operand_refs,
+                              struct rogue_operand *,
+                              &instr->operands[u]);
+      }
+      ++ip;
+   }
+
+   /* Initialize the interference graph. */
+   struct ra_graph *g = ra_alloc_interference_graph(ra->regs, max_vreg + 1);
+
+   /* Set each virtual register to the appropriate class. */
+   hash_table_foreach (reg_ht, entry) {
+      const uint32_t *vreg = entry->key;
+      struct live_range *range = entry->data;
+      struct ra_class *class = ra->reg_data[range->class].class;
+
+      ra_set_node_class(g, *vreg, class);
+      /* TODO: ra_set_node_spill_cost(g, *vreg, cost); */
+   }
+
+   /* Build interference graph from overlapping live ranges. */
+   hash_table_foreach (reg_ht, entry_first) {
+      const uint32_t *vreg_first = entry_first->key;
+      struct live_range *range_first = entry_first->data;
+
+      hash_table_foreach (reg_ht, entry_second) {
+         const uint32_t *vreg_second = entry_second->key;
+         struct live_range *range_second = entry_second->data;
+
+         if (*vreg_first == *vreg_second)
+            continue;
+
+         /* If the live ranges overlap, those register nodes interfere. */
+         if (!(range_first->start >= range_second->end ||
+               range_second->start >= range_first->end)) {
+            ra_add_node_interference(g, *vreg_first, *vreg_second);
+         }
+      }
+   }
+
+   /* Add node interferences such that the same register can't be used for
+    * both an instruction's source and destination.
+    */
+   foreach_instr (instr, instr_list) {
+      for (size_t u = 0U; u < instr->num_operands; ++u) {
+         if (instr->operands[u].type != ROGUE_OPERAND_TYPE_VREG)
+            continue;
+
+         /* Operand 0 (if it exists and is virtual) is always
+          * the destination register.
+          */
+         if (u > 0 && instr->operands[0].type == ROGUE_OPERAND_TYPE_VREG)
+            ra_add_node_interference(g,
+                                     instr->operands[0].vreg.number,
+                                     instr->operands[u].vreg.number);
+      }
+   }
+
+   /* Perform register allocation. */
+   /* TODO: Spilling support. */
+   assert(ra_allocate(g));
+
+   /* Replace virtual registers with allocated physical registers.
+    * N.B. This is a destructive process as it overwrites the hash table key!
+    */
+   hash_table_foreach (reg_ht, entry) {
+      uint32_t vreg = *(uint32_t *)entry->key;
+      unsigned phy_reg = ra_get_node_reg(g, vreg);
+      struct live_range *range = entry->data;
+
+      struct rogue_reg_data *reg_data = &ra->reg_data[range->class];
+      enum rogue_operand_type type = reg_data->type;
+      size_t reg_offset = reg_data->offset;
+      size_t *num_used = &reg_data->num_used;
+
+      util_dynarray_foreach (&range->operand_refs,
+                             struct rogue_operand *,
+                             operand_ptr) {
+         size_t num = phy_reg - reg_offset;
+         struct rogue_operand *operand = *operand_ptr;
+
+         assert(operand->type == ROGUE_OPERAND_TYPE_VREG);
+         assert(operand->vreg.number == vreg);
+
+         /* Index the component of emulated vec4 registers. */
+         if (operand->vreg.is_vector &&
+             operand->vreg.component != ROGUE_COMPONENT_ALL)
+            num += operand->vreg.component;
+
+         operand->type = type;
+         operand->reg.number = num;
+
+         *num_used = MAX2(*num_used, operand->reg.number);
+      }
+
+      util_dynarray_fini(&range->operand_refs);
+      _mesa_hash_table_remove(reg_ht, entry);
+   }
+
+   /* Registers used = max reg number + 1. */
+   for (size_t u = 0; u < ARRAY_SIZE(ra->reg_data); ++u)
+      if (ra->reg_data[u].num_used)
+         ++ra->reg_data[u].num_used;
+
+   /* Pass back the registers used. */
+   if (temps_used)
+      *temps_used = ra->reg_data[ROGUE_REG_CLASS_TEMP].num_used;
+
+   if (internals_used)
+      *internals_used = ra->reg_data[ROGUE_REG_CLASS_VEC4].num_used;
+
+   ralloc_free(g);
+
+   _mesa_hash_table_destroy(reg_ht, NULL);
+
+   return true;
+}
diff --git a/src/imagination/rogue/rogue_regalloc.h b/src/imagination/rogue/rogue_regalloc.h
new file mode 100644 (file)
index 0000000..eb831ea
--- /dev/null
@@ -0,0 +1,70 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_REGALLOC_H
+#define ROGUE_REGALLOC_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "util/list.h"
+
+/**
+ * \brief Register classes used for allocation.
+ */
+enum rogue_reg_class {
+   ROGUE_REG_CLASS_TEMP,
+   ROGUE_REG_CLASS_VEC4,
+
+   ROGUE_REG_CLASS_COUNT,
+};
+
+/**
+ * \brief Register data for each class.
+ */
+struct rogue_reg_data {
+   enum rogue_operand_type type;
+   size_t count;
+   size_t stride;
+
+   size_t offset;
+   struct ra_class *class;
+   size_t num_used;
+};
+
+/**
+ * \brief Register allocation context.
+ */
+struct rogue_ra {
+   struct ra_regs *regs;
+
+   struct rogue_reg_data reg_data[ROGUE_REG_CLASS_COUNT];
+};
+
+struct rogue_ra *rogue_ra_init(void *mem_ctx);
+bool rogue_ra_alloc(struct list_head *instr_list,
+                    struct rogue_ra *ra,
+                    size_t *temps_used,
+                    size_t *internals_used);
+
+#endif /* ROGUE_REGALLOC_H */
diff --git a/src/imagination/rogue/rogue_shader.c b/src/imagination/rogue/rogue_shader.c
new file mode 100644 (file)
index 0000000..174d962
--- /dev/null
@@ -0,0 +1,133 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "rogue_shader.h"
+#include "rogue_instr.h"
+#include "rogue_regalloc.h"
+#include "rogue_util.h"
+#include "util/ralloc.h"
+
+/**
+ * \file rogue_shader.c
+ *
+ * \brief Contains functions to manipulate Rogue shaders.
+ */
+
+/**
+ * \brief Counts how many times an instruction is used in a shader.
+ *
+ * \param[in] shader The shader containing instructions to count.
+ * \param[in] opcode The opcode of the instruction to be counted.
+ * \return The number of times "opcode" is present, or 0 on error.
+ */
+size_t rogue_shader_instr_count_type(const struct rogue_shader *shader,
+                                     enum rogue_opcode opcode)
+{
+   size_t count = 0U;
+
+   ASSERT_OPCODE_RANGE(opcode);
+
+   foreach_instr (instr, &shader->instr_list)
+      if (instr->opcode == opcode)
+         ++count;
+
+   return count;
+}
+
+/**
+ * \brief Allocates and sets up a Rogue shader.
+ *
+ * \param[in] stage The shader stage.
+ * \return A rogue_shader* if successful, or NULL if unsuccessful.
+ */
+struct rogue_shader *rogue_shader_create(struct rogue_build_ctx *ctx,
+                                         gl_shader_stage stage)
+{
+   struct rogue_shader *shader;
+
+   if (!ctx)
+      return NULL;
+
+   shader = rzalloc_size(ctx, sizeof(*shader));
+   if (!shader)
+      return NULL;
+
+   shader->stage = stage;
+
+   list_inithead(&shader->instr_list);
+
+   shader->ctx = ctx;
+   shader->ra = rogue_ra_init(shader);
+   if (!shader->ra) {
+      ralloc_free(shader);
+      return NULL;
+   }
+
+   return shader;
+}
+
+/**
+ * \brief Creates an instruction and appends it to a Rogue shader.
+ *
+ * \param[in] shader The shader.
+ * \param[in] opcode The instruction opcode.
+ * \return A rogue_instr* if successful, or NULL if unsuccessful.
+ */
+struct rogue_instr *rogue_shader_insert(struct rogue_shader *shader,
+                                        enum rogue_opcode opcode)
+{
+   struct rogue_instr *instr = rogue_instr_create(shader, opcode);
+   if (!instr)
+      return NULL;
+
+   list_addtail(&instr->node, &shader->instr_list);
+
+   return instr;
+}
+
+size_t rogue_acquire_drc(struct rogue_shader *shader)
+{
+   size_t drc;
+
+   /* If both DRCs are in use, we have a problem. */
+   if (shader->drc_used[0] && shader->drc_used[1])
+      return SIZE_MAX;
+
+   drc = !shader->drc_used[0] ? 0 : 1;
+   shader->drc_used[drc] = true;
+
+   return drc;
+}
+
+void rogue_release_drc(struct rogue_shader *shader, size_t drc)
+{
+   assert(drc < ROGUE_NUM_DRCS);
+   assert(shader->drc_used[drc]);
+
+   shader->drc_used[drc] = false;
+}
diff --git a/src/imagination/rogue/rogue_shader.h b/src/imagination/rogue/rogue_shader.h
new file mode 100644 (file)
index 0000000..2109855
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_SHADER_H
+#define ROGUE_SHADER_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include "compiler/shader_enums.h"
+#include "rogue_instr.h"
+#include "rogue_operand.h"
+#include "rogue_util.h"
+#include "util/list.h"
+#include "util/macros.h"
+
+struct rogue_build_ctx;
+struct rogue_ra;
+
+/**
+ * \brief Shader description.
+ */
+struct rogue_shader {
+   gl_shader_stage stage; /** Shader stage. */
+
+   struct list_head instr_list; /** Instructions linked list. */
+
+   struct rogue_build_ctx *ctx;
+   struct rogue_ra *ra;
+
+   bool drc_used[ROGUE_NUM_DRCS];
+};
+
+/* Shader instruction list iterators and helpers. */
+#define foreach_instr(__instr, __list) \
+   list_for_each_entry (struct rogue_instr, __instr, __list, node)
+#define foreach_instr_rev(__instr, __list) \
+   list_for_each_entry_rev (struct rogue_instr, __instr, __list, node)
+#define foreach_instr_safe(__instr, __list) \
+   list_for_each_entry_safe (struct rogue_instr, __instr, __list, node)
+
+#define instr_first_entry(__list) \
+   list_first_entry(__list, struct rogue_instr, node)
+#define instr_last_entry(__list) \
+   list_last_entry(__list, struct rogue_instr, node)
+
+size_t rogue_shader_instr_count_type(const struct rogue_shader *shader,
+                                     enum rogue_opcode opcode);
+
+PUBLIC
+struct rogue_shader *rogue_shader_create(struct rogue_build_ctx *ctx,
+                                         gl_shader_stage stage);
+
+PUBLIC
+struct rogue_instr *rogue_shader_insert(struct rogue_shader *shader,
+                                        enum rogue_opcode opcode);
+
+size_t rogue_acquire_drc(struct rogue_shader *shader);
+void rogue_release_drc(struct rogue_shader *shader, size_t drc);
+
+#endif /* ROGUE_SHADER_H */
diff --git a/src/imagination/rogue/rogue_util.c b/src/imagination/rogue/rogue_util.c
new file mode 100644 (file)
index 0000000..f20c13a
--- /dev/null
@@ -0,0 +1,98 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "rogue_util.h"
+#include "util/macros.h"
+
+/**
+ * \file rogue_util.c
+ *
+ * \brief Contains compiler utility and helper functions.
+ */
+
+/**
+ * \brief Splits and distributes value "source" across "dest_bytes" according to
+ * the ranges specified (from MSB to LSB).
+ *
+ * \param[in] source The source value to be distributed.
+ * \param[in] rangelist The rangelist describing how to distribute "source".
+ * \param[in] dest_size The size of the destination in bytes.
+ * \param[in] dest_bytes The destination byte array.
+ * \return false if invalid inputs were provided, else true.
+ */
+bool rogue_distribute_value(uint64_t source,
+                            const struct rogue_rangelist *rangelist,
+                            size_t dest_size,
+                            uint8_t dest_bytes[dest_size])
+{
+   size_t total_bits_left = 0U;
+
+   /* Check that "value" is actually representable in "total_bits" bits. */
+   total_bits_left = rogue_rangelist_bits(rangelist);
+   assert(util_last_bit64(source) <= total_bits_left &&
+          "Value cannot be represented.");
+
+   /* Iterate over each range. */
+   for (size_t u = 0U; u < rangelist->num_ranges; ++u) {
+      struct rogue_bitrange *range = &rangelist->ranges[u];
+
+      size_t dest_bit = range->start;
+      size_t bits_left = range->num;
+      size_t bytes_covered = rogue_bytes_spilled(range) + 1;
+      size_t base_byte = rogue_byte_index(range, dest_size);
+
+      /* Iterate over each byte covered by the current range. */
+      for (size_t b = 0U; b < bytes_covered; ++b) {
+         size_t max_bits = rogue_max_bits(dest_bit);
+         size_t bits_to_place = MIN2(bits_left, max_bits);
+         size_t dest_byte_bit = dest_bit % 8;
+         size_t source_bit = total_bits_left - 1;
+
+         /* Mask and shuffle the source value so that it'll fit into the
+          * correct place in the destination byte:
+          */
+
+         /* Extract bits. */
+         uint64_t value_masked =
+            (source & BITMASK64_N(source_bit, bits_to_place));
+         /* Shift all the way right. */
+         value_masked >>= (1 + source_bit - bits_to_place);
+         /* Shift left to the correct position. */
+         value_masked <<= (1 + dest_byte_bit - bits_to_place);
+         /* Place value into byte. */
+         dest_bytes[base_byte + b] |= (value_masked & 0xff);
+
+         dest_bit -= max_bits;
+         bits_left -= bits_to_place;
+         total_bits_left -= bits_to_place;
+      }
+   }
+
+   return true;
+}
diff --git a/src/imagination/rogue/rogue_util.h b/src/imagination/rogue/rogue_util.h
new file mode 100644 (file)
index 0000000..fa789c4
--- /dev/null
@@ -0,0 +1,320 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_UTIL_H
+#define ROGUE_UTIL_H
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "util/bitscan.h"
+#include "util/log.h"
+#include "util/macros.h"
+
+/* Input validation helpers. */
+
+/**
+ * \brief Returns false if "expr" is not asserted.
+ *
+ * \param[in] expr The expression to check.
+ */
+#define CHECK(expr)    \
+   do {                \
+      if (!(expr))     \
+         return false; \
+   } while (0)
+
+/**
+ * \brief Returns false if "expr" is not asserted,
+ * and logs the provided error message.
+ *
+ * \param[in] expr The expression to check.
+ * \param[in] fmt The error message to print.
+ * \param[in] ... The printf-style varable arguments.
+ */
+#define CHECKF(expr, fmt, ...)                                  \
+   do {                                                         \
+      if (!(expr)) {                                            \
+         mesa_log(MESA_LOG_ERROR, "ROGUE", fmt, ##__VA_ARGS__); \
+         return false;                                          \
+      }                                                         \
+   } while (0)
+
+/**
+ * \brief Asserts if "opcode" is invalid.
+ *
+ * \param[in] opcode The opcode to check.
+ */
+#define ASSERT_OPCODE_RANGE(opcode) assert((opcode) < ROGUE_OP_COUNT)
+
+/**
+ * \brief Asserts if "operand" is invalid.
+ *
+ * \param[in] operand The operand to check.
+ */
+#define ASSERT_OPERAND_RANGE(operand) \
+   assert((operand) < ROGUE_OPERAND_TYPE_COUNT)
+
+/**
+ * \brief Asserts if "operand" is not a register.
+ *
+ * \param[in] operand The operand to check.
+ */
+#define ASSERT_OPERAND_REG(operand) \
+   assert((operand) <= ROGUE_OPERAND_TYPE_REG_MAX)
+
+/**
+ * \brief Asserts if "flag" is invalid.
+ *
+ * \param[in] flag The flag to check.
+ */
+#define ASSERT_INSTR_FLAG_RANGE(flag) assert((flag) < ROGUE_INSTR_FLAG_COUNT)
+
+/**
+ * \brief Asserts if operand index "index" is out of range.
+ *
+ * \param[in] instr The target instruction.
+ * \param[in] index The operand index to check.
+ */
+#define ASSERT_INSTR_OPERAND_INDEX(instr, index) \
+   assert((index) < (instr)->num_operands)
+
+/**
+ * \brief Asserts if "stage" is invalid.
+ *
+ * \param[in] stage The stage to check.
+ */
+#define ASSERT_SHADER_STAGE_RANGE(stage) assert((stage) < MESA_SHADER_STAGES)
+
+/**
+ * \brief Creates a "n"-bit mask starting from bit "b".
+ *
+ * \param[in] b The starting bit.
+ * \param[in] n The number of bits in the mask.
+ */
+#define BITMASK64_N(b, n) (((~0ULL) << (64 - (n))) >> (63 - (b)))
+
+/**
+ * \brief Compile-time rogue_onehot.
+ *
+ * \sa #rogue_onehot()
+ */
+#define ROH(OFFSET) BITFIELD64_BIT(OFFSET)
+
+/* TODO: Consider integrating the following into src/util/{macros,bitscan}.h */
+
+/**
+ * \brief Converts a one-hot encoding to an offset encoding.
+ *
+ * E.g. 0b10000 -> 4
+ *
+ * \param[in] onehot The one-hot encoding.
+ * \return The offset encoding.
+ */
+static inline uint64_t rogue_offset(uint64_t onehot)
+{
+   assert(util_bitcount64(onehot) == 1);
+   return ffsll(onehot) - 1;
+}
+
+/**
+ * \brief Converts an offset encoding to a one-hot encoding.
+ *
+ * E.g. 0 -> 0b1
+ *
+ * \param[in] offset The offset encoding.
+ * \return The one-hot encoding.
+ */
+static inline uint64_t rogue_onehot(uint64_t offset)
+{
+   assert(offset < 64ULL);
+   return (1ULL << offset);
+}
+
+/**
+ * \brief Checks whether an input bitfield contains only a valid bitset.
+ *
+ * E.g. rogue_check_bitset(0b00001100, 0b00001111) -> true
+ *      rogue_check_bitset(0b00001100, 0b00000111) -> false
+ *
+ * \param[in] input The input bitfield.
+ * \param[in] valid_bits The valid bitset.
+ * \return true if "input" contains only "valid_bits", false otherwise.
+ */
+static inline bool rogue_check_bitset(uint64_t input, uint64_t valid_bits)
+{
+   input &= ~valid_bits;
+   return !input;
+}
+
+/**
+ * \brief Describes a downward range of bits within an arbitrarily-sized
+ * sequence.
+ *
+ * E.g. for start = 7 and num = 3:
+ *
+ * 76543210
+ * abcdefgh
+ *
+ * the bit range would be: abc.
+ */
+struct rogue_bitrange {
+   size_t start;
+   size_t num;
+};
+
+/**
+ * \brief Describes a collection of bit-ranges within an arbitrarily-sized
+ * sequence that are meaningful together.
+ *
+ * E.g. an 8-bit value that is encoded within a larger value:
+ *     8-bit value: abcdefgh
+ *     Parent value: 010ab0cdef0010gh
+ *
+ */
+struct rogue_rangelist {
+   size_t num_ranges;
+   struct rogue_bitrange *ranges;
+};
+
+/**
+ * \brief Counts the total number of bits described in a rangelist.
+ *
+ * \param[in] rangelist The input rangelist.
+ * \return The total number of bits.
+ */
+static inline size_t
+rogue_rangelist_bits(const struct rogue_rangelist *rangelist)
+{
+   size_t total_bits = 0U;
+
+   for (size_t u = 0U; u < rangelist->num_ranges; ++u)
+      total_bits += rangelist->ranges[u].num;
+
+   return total_bits;
+}
+
+/**
+ * \brief Returns the byte offset of the bitrange moving left from the LSB.
+ *
+ * \param[in] bitrange The input bit-range.
+ * \return The byte offset.
+ */
+static inline size_t rogue_byte_num(const struct rogue_bitrange *bitrange)
+{
+   /* Make sure there are enough bits. */
+   assert(bitrange->num <= (bitrange->start + 1));
+
+   return bitrange->start / 8;
+}
+
+/**
+ * \brief Returns the array-indexable byte offset of a bit-range if the sequence
+ * it represents were to be stored in an byte-array containing "num_bytes"
+ * bytes.
+ *
+ * E.g. uint8_t array[2] is a sequence of 16 bits:
+ *     bit(0) is located in array[1].
+ *     bit(15) is located in array[0].
+ *
+ * For uint8_t array[4]:
+ *     bit(0) is located in array[3].
+ *     bit(15) is located in array[2].
+ *
+ * \param[in] bitrange The input bit-range.
+ * \param[in] num_bytes The number of bytes that are used to contain the
+ * bit-range. \return The byte offset.
+ */
+static inline size_t rogue_byte_index(const struct rogue_bitrange *bitrange,
+                                      size_t num_bytes)
+{
+   /* Make sure there are enough bits. */
+   assert(bitrange->num <= (bitrange->start + 1));
+
+   return num_bytes - rogue_byte_num(bitrange) - 1;
+}
+
+/**
+ * \brief Returns the bit offset of a bit-range if the sequence it represents is
+ * being accessed in a byte-wise manner.
+ *
+ * E.g. bit 17 has a bit offset of 1.
+ *
+ * \param[in] bitrange The input bit-range.
+ * \return The bit offset.
+ */
+static inline size_t rogue_bit_offset(const struct rogue_bitrange *bitrange)
+{
+   /* Make sure there are enough bits. */
+   assert(bitrange->num <= (bitrange->start + 1));
+
+   return bitrange->start % 8;
+}
+
+/**
+ * \brief Returns the number of additional bytes that the bit-range spills into
+ * (excluding its "starting" byte).
+ *
+ * \param[in] bitrange The input bit-range.
+ * \return The number of bytes spilled.
+ */
+static inline size_t rogue_bytes_spilled(const struct rogue_bitrange *bitrange)
+{
+   /* Make sure there are enough bits. */
+   assert(bitrange->num <= (bitrange->start + 1));
+
+   return ((bitrange->num - 1) / 8) +
+          ((bitrange->num % 8) > (rogue_bit_offset(bitrange) + 1));
+}
+
+/**
+ * \brief For a given bit offset, returns the maximum number of bits (including
+ * itself) that are accessible before spilling into the following byte.
+ *
+ * E.g. When trying to insert an 8-bit value offset of 13, a maximum of 6 bits
+ * can be placed; the last 2 bits will need to go into the next byte.
+ *
+ *     8-bit value: abcdefgh
+ *
+ *     array[0]  array[1]
+ *     15      8 7      0
+ *      iiiiiiii jjjjjjjj
+ *        ^
+ *        abcdef gh
+ *
+ * \param[in] The bit offset.
+ * \return The maximum number of accessible bits.
+ */
+static inline size_t rogue_max_bits(size_t offset)
+{
+   return (offset % 8) + 1;
+}
+
+bool rogue_distribute_value(uint64_t source,
+                            const struct rogue_rangelist *rangelist,
+                            size_t dest_size,
+                            uint8_t dest_bytes[dest_size]);
+
+#endif /* ROGUE_UTIL_H */
diff --git a/src/imagination/rogue/rogue_validate.c b/src/imagination/rogue/rogue_validate.c
new file mode 100644 (file)
index 0000000..8d9d10e
--- /dev/null
@@ -0,0 +1,288 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * \file rogue_validate.c
+ *
+ * \brief Contains rules and functions for validating Rogue data structures.
+ */
+
+#include <stdbool.h>
+
+#include "rogue_operand.h"
+#include "rogue_shader.h"
+#include "rogue_util.h"
+#include "rogue_validate.h"
+#include "util/list.h"
+#include "util/macros.h"
+
+/**
+ * \brief Register operand rules.
+ */
+#define REG_RULE(OPERAND, ACCESS, MAX, MODIFIERS) \
+   [ROGUE_OPERAND_TYPE_REG_##OPERAND] = {         \
+      .access = ROGUE_REG_ACCESS_##ACCESS,        \
+      .max = MAX,                                 \
+      .modifiers = ROGUE_REG_MOD_##MODIFIERS,     \
+   }
+
+/* TODO: Support register indexing > ROGUE_MAX_REG_TEMP. */
+static const struct rogue_register_rule reg_rules[ROGUE_NUM_REG_TYPES] = {
+   REG_RULE(TEMP, RW, MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_TEMP), ALL),
+   REG_RULE(COEFF, RW, MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_COEFF), ALL),
+   REG_RULE(CONST, RW, MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_CONST), NONE),
+   REG_RULE(SHARED, RW, MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_SHARED), ALL),
+   REG_RULE(PIXEL_OUT,
+            RW,
+            MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_PIXEL_OUT),
+            NONE),
+   REG_RULE(VERTEX_IN,
+            RW,
+            MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_VERTEX_IN),
+            ALL),
+   REG_RULE(INTERNAL,
+            RW,
+            MIN2(ROGUE_MAX_REG_INDEX, ROGUE_MAX_REG_INTERNAL),
+            NONE),
+};
+#undef REG_RULE
+
+/**
+ * \brief Instruction rules.
+ */
+/* TODO: Common up register classes to prevent long lines. */
+static const struct rogue_instr_rule instr_rules[ROGUE_OP_COUNT] = {
+       [ROGUE_OP_NOP] = { .flags = 0, .num_operands = 0, .operand_rules = NULL, },
+       [ROGUE_OP_END_FRAG] = { .flags = 0, .num_operands = 0, .operand_rules = NULL, },
+       [ROGUE_OP_END_VERT] = { .flags = 0, .num_operands = 0, .operand_rules = NULL, },
+       [ROGUE_OP_WDF] = { .flags = 0,
+               .num_operands = 1, .operand_rules = (struct rogue_instr_operand_rule[]){
+                       [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_DRC), .min = -1, .max = -1, .align = -1, },
+               },
+       },
+       [ROGUE_OP_PIX_ITER_W] = { .flags = ROH(ROGUE_INSTR_FLAG_SAT),
+               .num_operands = 5, .operand_rules = (struct rogue_instr_operand_rule[]){
+                       [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+                       [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_DRC), .min = -1, .max = -1, .align = -1, },
+                       [2] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_COEFF), .min = -1, .max = -1, .align = ROGUE_COEFF_ALIGN, },
+                       [3] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_COEFF), .min = -1, .max = -1, .align = ROGUE_COEFF_ALIGN, },
+                       [4] = { .mask = ROH(ROGUE_OPERAND_TYPE_IMMEDIATE), .min = 1, .max = 16, .align = -1, },
+               },
+       },
+       [ROGUE_OP_MAX] = { .flags = 0,
+               .num_operands = 3, .operand_rules = (struct rogue_instr_operand_rule[]){
+                       [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+                       [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+                       [2] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_CONST) | ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+               },
+       },
+       [ROGUE_OP_MIN] = { .flags = 0,
+               .num_operands = 3, .operand_rules = (struct rogue_instr_operand_rule[]){
+                       [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP) | ROH(ROGUE_OPERAND_TYPE_REG_INTERNAL), .min = -1, .max = -1, .align = -1, },
+                       [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+                       [2] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_CONST) | ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+               },
+       },
+       /* TODO: Add representation for 4 sequential registers. */
+       [ROGUE_OP_PACK_U8888] = { .flags = 0,
+               .num_operands = 2, .operand_rules = (struct rogue_instr_operand_rule[]){
+                       [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+                       [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_INTERNAL), .min = -1, .max = -1, .align = -1, },
+               },
+       },
+       [ROGUE_OP_MOV] = { .flags = ROH(ROGUE_INSTR_FLAG_OLCHK),
+               .num_operands = 2, .operand_rules = (struct rogue_instr_operand_rule[]){
+                       [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP) | ROH(ROGUE_OPERAND_TYPE_REG_INTERNAL) | ROH(ROGUE_OPERAND_TYPE_REG_PIXEL_OUT), .min = -1, .max = -1, .align = -1, },
+                       [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_CONST) | ROH(ROGUE_OPERAND_TYPE_REG_TEMP) | ROH(ROGUE_OPERAND_TYPE_REG_SHARED) | ROH(ROGUE_OPERAND_TYPE_REG_VERTEX_IN), .min = -1, .max = -1, .align = -1, },
+               },
+       },
+       [ROGUE_OP_MOV_IMM] = { .flags = 0,
+               .num_operands = 2, .operand_rules = (struct rogue_instr_operand_rule[]){
+                       [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+                       [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_IMMEDIATE), .min = 0, .max = UINT32_MAX, .align = -1, },
+               },
+       },
+       [ROGUE_OP_FMA] = { .flags = ROH(ROGUE_INSTR_FLAG_SAT) | ROH(ROGUE_INSTR_FLAG_LP),
+               .num_operands = 4, .operand_rules = (struct rogue_instr_operand_rule[]){
+                       [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+                       [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+                       [2] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+                       [3] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+               },
+       },
+       [ROGUE_OP_MUL] = { .flags = ROH(ROGUE_INSTR_FLAG_SAT) | ROH(ROGUE_INSTR_FLAG_LP),
+               .num_operands = 3, .operand_rules = (struct rogue_instr_operand_rule[]){
+                       [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+                       [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+                       [2] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+               },
+       },
+       [ROGUE_OP_VTXOUT] = { .flags = 0,
+               .num_operands = 2, .operand_rules = (struct rogue_instr_operand_rule[]){
+                       [0] = { .mask = ROH(ROGUE_OPERAND_TYPE_IMMEDIATE), .min = 0, .max = ROGUE_MAX_VERTEX_OUTPUTS, .align = -1, },
+                       [1] = { .mask = ROH(ROGUE_OPERAND_TYPE_REG_TEMP), .min = -1, .max = -1, .align = -1, },
+               },
+       },
+};
+
+/**
+ * \brief Validates an operand.
+ *
+ * \param[in] operand The operand.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_validate_operand(const struct rogue_operand *operand)
+{
+   ASSERT_OPERAND_RANGE(operand->type);
+
+   switch (operand->type) {
+   case ROGUE_OPERAND_TYPE_IMMEDIATE:
+      return true;
+
+   case ROGUE_OPERAND_TYPE_DRC:
+      CHECKF(operand->drc.number < ROGUE_NUM_DRCS,
+             "Invalid DRC number '%zu'.",
+             operand->drc.number);
+      return true;
+
+   case ROGUE_OPERAND_TYPE_REG_TEMP:
+   case ROGUE_OPERAND_TYPE_REG_COEFF:
+   case ROGUE_OPERAND_TYPE_REG_CONST:
+   case ROGUE_OPERAND_TYPE_REG_SHARED:
+   case ROGUE_OPERAND_TYPE_REG_PIXEL_OUT:
+   case ROGUE_OPERAND_TYPE_REG_VERTEX_IN:
+   case ROGUE_OPERAND_TYPE_REG_INTERNAL:
+      CHECKF(operand->reg.number < reg_rules[operand->type].max,
+             "Register number '%zu' out of range.",
+             operand->reg.number);
+      return true;
+
+   default:
+      break;
+   }
+
+   return false;
+}
+
+/**
+ * \brief Validates an instruction.
+ *
+ * \param[in] instr The instruction.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_validate_instr(const struct rogue_instr *instr)
+{
+   const struct rogue_instr_rule *rule;
+
+   ASSERT_OPCODE_RANGE(instr->opcode);
+
+   rule = &instr_rules[instr->opcode];
+
+   /* Validate flags. */
+   CHECKF(rogue_check_bitset(instr->flags, rule->flags),
+          "Invalid instruction flags specified.");
+
+   /* Validate number of operands. */
+   CHECKF(instr->num_operands == rule->num_operands,
+          "Invalid number of operands specified.");
+
+   CHECK(!rule->num_operands || instr->operands);
+   for (size_t u = 0U; u < instr->num_operands; ++u) {
+      /* Validate operand types. */
+      CHECKF(rogue_check_bitset(rogue_onehot(instr->operands[u].type),
+                                rule->operand_rules[u].mask),
+             "Invalid type for operand %zu.",
+             u);
+
+      /* Validate immediate ranges. */
+      if (rogue_check_bitset(rogue_onehot(instr->operands[u].type),
+                             ROH(ROGUE_OPERAND_TYPE_IMMEDIATE)) &&
+          rule->operand_rules[u].min != -1 &&
+          rule->operand_rules[u].max != -1) {
+         CHECKF(
+            instr->operands[u].immediate.value >= rule->operand_rules[u].min &&
+               instr->operands[u].immediate.value <= rule->operand_rules[u].max,
+            "Immediate value out of range for operand %zu.",
+            u);
+      }
+
+      /* Validate register alignment. */
+      if (rogue_check_bitset(rogue_onehot(instr->operands[u].type),
+                             ROGUE_MASK_ANY_REG) &&
+          rule->operand_rules[u].align != -1) {
+         CHECKF(!(instr->operands[u].reg.number % rule->operand_rules[u].align),
+                "Invalid register alignment in operand %zu.",
+                u);
+      }
+
+      /* Validate each operand. */
+      CHECKF(rogue_validate_operand(&instr->operands[u]),
+             "Failed to validate operand.");
+   }
+
+   return true;
+}
+
+/**
+ * \brief Validates a shader.
+ *
+ * \param[in] shader The shader.
+ * \return true if valid, otherwise false.
+ */
+bool rogue_validate_shader(const struct rogue_shader *shader)
+{
+   CHECK(!list_is_empty(&shader->instr_list));
+   ASSERT_SHADER_STAGE_RANGE(shader->stage);
+
+   /* Shader stage-specific validation. */
+   switch (shader->stage) {
+   case MESA_SHADER_VERTEX:
+      /* Make sure there is (only) one end vertex shader instruction. */
+      CHECKF(rogue_shader_instr_count_type(shader, ROGUE_OP_END_VERT) == 1,
+             "Shader must contain a single end.vert instruction.");
+
+      /* Make sure the end vertex shader instruction is the last one. */
+      CHECKF(instr_last_entry(&shader->instr_list)->opcode == ROGUE_OP_END_VERT,
+             "end.vert not last instruction.");
+      break;
+
+   case MESA_SHADER_FRAGMENT:
+      /* Make sure there is (only) one end fragment shader instruction. */
+      CHECKF(rogue_shader_instr_count_type(shader, ROGUE_OP_END_FRAG) == 1,
+             "Shader must contain a single end.frag instruction.");
+
+      /* Make sure the end fragment shader instruction is the last one. */
+      CHECKF(instr_last_entry(&shader->instr_list)->opcode == ROGUE_OP_END_FRAG,
+             "end.frag not last instruction.");
+      break;
+
+   default:
+      return false;
+   }
+
+   /* Validate each instruction. */
+   foreach_instr (instr, &shader->instr_list)
+      CHECKF(rogue_validate_instr(instr), "Failed to validate instruction.");
+
+   return true;
+}
diff --git a/src/imagination/rogue/rogue_validate.h b/src/imagination/rogue/rogue_validate.h
new file mode 100644 (file)
index 0000000..36268f3
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ROGUE_VALIDATE_H
+#define ROGUE_VALIDATE_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/types.h>
+
+#include "rogue_instr.h"
+#include "rogue_operand.h"
+#include "rogue_shader.h"
+#include "util/macros.h"
+
+/**
+ * \brief Register rule description.
+ */
+struct rogue_register_rule {
+   enum rogue_register_access access;
+   size_t max;
+   enum rogue_register_modifier modifiers;
+};
+
+/**
+ * \brief Instruction operand rule description.
+ */
+struct rogue_instr_operand_rule {
+   uint64_t mask;
+   ssize_t min;
+   ssize_t max;
+   ssize_t align;
+};
+
+/**
+ * \brief Instruction rule description.
+ */
+struct rogue_instr_rule {
+   uint64_t flags; /** A mask of #rogue_instr_flag values. */
+   size_t num_operands;
+   struct rogue_instr_operand_rule *operand_rules;
+};
+
+PUBLIC
+bool rogue_validate_operand(const struct rogue_operand *operand);
+
+PUBLIC
+bool rogue_validate_instr(const struct rogue_instr *instr);
+
+PUBLIC
+bool rogue_validate_shader(const struct rogue_shader *shader);
+
+#endif /* ROGUE_VALIDATE_H */
diff --git a/src/imagination/rogue/tools/offline_compiler.c b/src/imagination/rogue/tools/offline_compiler.c
new file mode 100644 (file)
index 0000000..bbf597f
--- /dev/null
@@ -0,0 +1,314 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compiler/shader_enums.h"
+#include "nir/nir.h"
+#include "rogue.h"
+#include "rogue_build_data.h"
+#include "rogue_compiler.h"
+#include "rogue_dump.h"
+#include "util/os_file.h"
+#include "util/ralloc.h"
+
+#include <getopt.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* Number of hex columns to dump before starting a new line. */
+#define ARRAY_DUMP_COLS 16
+
+/**
+ * \file compiler.c
+ *
+ * \brief Rogue offline compiler.
+ */
+
+static const struct option cmdline_opts[] = {
+   /* Arguments. */
+   { "stage", required_argument, NULL, 's' },
+   { "file", required_argument, NULL, 'f' },
+   { "entry", required_argument, NULL, 'e' },
+
+   /* Options. */
+   { "help", no_argument, NULL, 'h' },
+   { "out", required_argument, NULL, 'o' },
+
+   { "dump-c-array", no_argument, NULL, 'c' },
+   { "dump-rogue", no_argument, NULL, 'r' },
+   { "dump-nir", no_argument, NULL, 'n' },
+
+   { NULL, 0, NULL, 0 },
+};
+
+struct compiler_opts {
+   gl_shader_stage stage;
+   char *file;
+   char *entry;
+   char *out_file;
+   bool dump_c_array;
+   bool dump_rogue;
+   bool dump_nir;
+};
+
+static void usage(const char *argv0)
+{
+   /* clang-format off */
+   printf("Rogue offline compiler.\n");
+   printf("Usage: %s -s <stage> -f <file> [-e <entry>] [-o <file>] [-c] [-r] [-n] [-h]\n", argv0);
+   printf("\n");
+
+   printf("Required arguments:\n");
+   printf("\t-s, --stage <stage> Shader stage (supported options: frag, vert).\n");
+   printf("\t-f, --file <file>   Shader SPIR-V filename.\n");
+   printf("\n");
+
+   printf("Options:\n");
+   printf("\t-h, --help          Prints this help message.\n");
+   printf("\t-e, --entry <entry> Overrides the shader entry-point name (default: 'main').\n");
+   printf("\t-o, --out <file>    Overrides the output filename (default: 'out.bin').\n");
+   printf("\n");
+
+   printf("\t-c, --dump-c-array  Print the shader binary as a C byte array.\n");
+   printf("\t-r, --dump-rogue    Prints the shader Rogue assembly.\n");
+   printf("\t-n, --dump-nir      Prints the shader NIR.\n");
+   printf("\n");
+   /* clang-format on */
+}
+
+static bool parse_cmdline(int argc, char *argv[], struct compiler_opts *opts)
+{
+   int opt;
+   int longindex;
+
+   while (
+      (opt =
+          getopt_long(argc, argv, "crnhs:f:e:o:", cmdline_opts, &longindex)) !=
+      -1) {
+      switch (opt) {
+      case 'c':
+         opts->dump_c_array = true;
+         break;
+
+      case 'e':
+         if (opts->entry)
+            continue;
+
+         opts->entry = optarg;
+         break;
+
+      case 'f':
+         if (opts->file)
+            continue;
+
+         opts->file = optarg;
+         break;
+
+      case 'n':
+         opts->dump_nir = true;
+         break;
+
+      case 'o':
+         if (opts->out_file)
+            continue;
+
+         opts->out_file = optarg;
+         break;
+
+      case 'r':
+         opts->dump_rogue = true;
+         break;
+
+      case 's':
+         if (opts->stage != MESA_SHADER_NONE)
+            continue;
+
+         if (!strcmp(optarg, "frag"))
+            opts->stage = MESA_SHADER_FRAGMENT;
+         else if (!strcmp(optarg, "vert"))
+            opts->stage = MESA_SHADER_VERTEX;
+         else {
+            fprintf(stderr, "Invalid stage \"%s\".\n", optarg);
+            usage(argv[0]);
+            return false;
+         }
+
+         break;
+
+      case 'h':
+      default:
+         usage(argv[0]);
+         return false;
+      }
+   }
+
+   if (opts->stage == MESA_SHADER_NONE || !opts->file) {
+      fprintf(stderr,
+              "%s: --stage and --file are required arguments.\n",
+              argv[0]);
+      usage(argv[0]);
+      return false;
+   }
+
+   if (!opts->out_file)
+      opts->out_file = "out.bin";
+
+   if (!opts->entry)
+      opts->entry = "main";
+
+   return true;
+}
+
+int main(int argc, char *argv[])
+{
+   /* Command-line options. */
+   /* N.B. MESA_SHADER_NONE != 0 */
+   struct compiler_opts opts = { .stage = MESA_SHADER_NONE, 0 };
+
+   /* Input file data. */
+   char *input_data;
+   size_t input_size;
+
+   /* Compiler context. */
+   struct rogue_compiler *compiler;
+
+   /* Multi-stage build context. */
+   struct rogue_build_ctx *ctx;
+
+   /* Output file. */
+   FILE *fp;
+   size_t bytes_written;
+
+   /* Parse command-line options. */
+   if (!parse_cmdline(argc, argv, &opts))
+      return 1;
+
+   /* Load SPIR-V input file. */
+   input_data = os_read_file(opts.file, &input_size);
+   if (!input_data) {
+      fprintf(stderr, "Failed to read file \"%s\".\n", opts.file);
+      return 1;
+   }
+
+   /* Create compiler context. */
+   compiler = rogue_compiler_create(NULL);
+   if (!compiler) {
+      fprintf(stderr, "Failed to set up compiler context.\n");
+      goto err_free_input;
+   }
+
+   ctx = rogue_create_build_context(compiler);
+   if (!ctx) {
+      fprintf(stderr, "Failed to set up build context.\n");
+      goto err_destroy_compiler;
+   }
+
+   /* SPIR-V -> NIR. */
+   ctx->nir[opts.stage] = rogue_spirv_to_nir(ctx,
+                                             opts.stage,
+                                             opts.entry,
+                                             input_size / sizeof(uint32_t),
+                                             (uint32_t *)input_data,
+                                             0,
+                                             NULL);
+   if (!ctx->nir[opts.stage]) {
+      fprintf(stderr, "Failed to translate SPIR-V input to NIR.\n");
+      goto err_free_build_context;
+   }
+
+   /* Dump NIR shader. */
+   if (opts.dump_nir)
+      nir_print_shader(ctx->nir[opts.stage], stdout);
+
+   /* NIR -> Rogue. */
+   ctx->rogue[opts.stage] = rogue_nir_to_rogue(ctx, ctx->nir[opts.stage]);
+   if (!ctx->rogue[opts.stage]) {
+      fprintf(stderr, "Failed to translate NIR input to Rogue.\n");
+      goto err_free_build_context;
+   }
+
+   /* Dump Rogue shader. */
+   if (opts.dump_rogue)
+      rogue_dump_shader(ctx->rogue[opts.stage], stdout);
+
+   /* Rogue -> Binary. */
+   ctx->binary[opts.stage] = rogue_to_binary(ctx, ctx->rogue[opts.stage]);
+   if (!ctx->binary[opts.stage]) {
+      fprintf(stderr, "Failed to translate Rogue to binary.\n");
+      goto err_free_build_context;
+   }
+
+   /* Dump binary as a C array. */
+   if (opts.dump_c_array) {
+      printf("uint8_t shader_bytes[%zu] = {", ctx->binary[opts.stage]->size);
+      for (size_t u = 0U; u < ctx->binary[opts.stage]->size; ++u) {
+         if (!(u % ARRAY_DUMP_COLS))
+            printf("\n\t");
+
+         printf("0x%02x, ", ctx->binary[opts.stage]->data[u]);
+      }
+      printf("\n};\n");
+   }
+
+   /* Write shader binary to disk. */
+   fp = fopen(opts.out_file, "wb");
+   if (!fp) {
+      fprintf(stderr, "Failed to open output file \"%s\".\n", opts.out_file);
+      goto err_free_build_context;
+   }
+
+   bytes_written = fwrite(ctx->binary[opts.stage]->data,
+                          1,
+                          ctx->binary[opts.stage]->size,
+                          fp);
+   if (bytes_written != ctx->binary[opts.stage]->size) {
+      fprintf(
+         stderr,
+         "Failed to write to output file \"%s\" (%zu bytes of %zu written).\n",
+         opts.out_file,
+         bytes_written,
+         ctx->binary[opts.stage]->size);
+      goto err_close_outfile;
+   }
+
+   /* Clean up. */
+   fclose(fp);
+   ralloc_free(ctx);
+   rogue_compiler_destroy(compiler);
+   free(input_data);
+
+   return 0;
+
+err_close_outfile:
+   fclose(fp);
+err_free_build_context:
+   ralloc_free(ctx);
+err_destroy_compiler:
+   rogue_compiler_destroy(compiler);
+err_free_input:
+   free(input_data);
+
+   return 1;
+}
diff --git a/src/imagination/vulkan/meson.build b/src/imagination/vulkan/meson.build
new file mode 100644 (file)
index 0000000..a85f732
--- /dev/null
@@ -0,0 +1,171 @@
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+subdir('pds')
+
+pvr_entrypoints = custom_target(
+  'pvr_entrypoints',
+  input : [vk_entrypoints_gen, vk_api_xml],
+  output : ['pvr_entrypoints.h', 'pvr_entrypoints.c'],
+  command : [
+    prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--proto', '--weak',
+    '--out-h', '@OUTPUT0@', '--out-c', '@OUTPUT1@', '--prefix', 'pvr',
+  ],
+  depend_files : vk_entrypoints_gen_depend_files,
+)
+
+pvr_files = files(
+  'winsys/powervr/pvr_drm.c',
+  'winsys/pvr_winsys.c',
+  'winsys/pvr_winsys_helper.c',
+  'pvr_blit.c',
+  'pvr_bo.c',
+  'pvr_cmd_buffer.c',
+  'pvr_csb.c',
+  'pvr_descriptor_set.c',
+  'pvr_device.c',
+  'pvr_formats.c',
+  'pvr_hw_pass.c',
+  'pvr_image.c',
+  'pvr_job_common.c',
+  'pvr_job_compute.c',
+  'pvr_job_context.c',
+  'pvr_job_render.c',
+  'pvr_pass.c',
+  'pvr_pipeline.c',
+  'pvr_pipeline_cache.c',
+  'pvr_query.c',
+  'pvr_queue.c',
+  'pvr_shader.c',
+  'pvr_tex_state.c',
+  'pvr_wsi.c',
+)
+
+pvr_includes = [
+  include_directories('usc/programs'),
+  include_directories('winsys'),
+  libpowervr_pds_includes,
+]
+
+pvr_deps = [
+  dep_csbgen,
+  dep_libdrm,
+  dep_valgrind,
+  idep_vulkan_runtime,
+  idep_vulkan_util,
+  idep_vulkan_wsi,
+]
+
+pvr_flags = [
+  no_override_init_args,
+]
+
+if with_imagination_srv
+  pvr_files += files(
+    'winsys/pvrsrvkm/pvr_srv.c',
+    'winsys/pvrsrvkm/pvr_srv_bo.c',
+    'winsys/pvrsrvkm/pvr_srv_bridge.c',
+    'winsys/pvrsrvkm/pvr_srv_job_compute.c',
+    'winsys/pvrsrvkm/pvr_srv_job_render.c',
+    'winsys/pvrsrvkm/pvr_srv_syncobj.c',
+  )
+  pvr_flags += '-DPVR_SUPPORT_SERVICES_DRIVER'
+endif
+
+libvulkan_powervr_mesa = shared_library(
+  'vulkan_powervr_mesa',
+  [pvr_files, pvr_entrypoints],
+  include_directories : [
+    pvr_includes,
+    inc_gallium_aux,
+    inc_imagination,
+    inc_include,
+    inc_src,
+    inc_mesa,
+    inc_gallium,
+    inc_compiler,
+  ],
+  link_with : [
+    libpowervr_common,
+    libpowervr_pds,
+    libpowervr_rogue,
+    libvulkan_wsi,
+  ],
+  dependencies : [
+    pvr_deps,
+    idep_nir,
+  ],
+  c_args : pvr_flags,
+  link_args : [
+    ld_args_build_id,
+    ld_args_bsymbolic,
+    ld_args_gc_sections
+  ],
+  gnu_symbol_visibility : 'hidden',
+  install : true,
+)
+
+if with_symbols_check
+  test(
+    'pvr symbols check',
+    symbols_check,
+    args : [
+      '--lib', libvulkan_powervr_mesa,
+      '--symbols-file', vulkan_icd_symbols,
+      symbols_check_args,
+    ],
+    suite : ['imagination'],
+  )
+endif
+
+powervr_mesa_icd = custom_target(
+  'powervr_mesa_icd',
+  input : [vk_icd_gen, vk_api_xml],
+  output : 'powervr_mesa_icd.@0@.json'.format(host_machine.cpu()),
+  command : [
+    prog_python, '@INPUT0@',
+    '--api-version', '1.0', '--xml', '@INPUT1@',
+    '--lib-path', join_paths(get_option('prefix'), get_option('libdir'),
+                             'libvulkan_powervr_mesa.so'),
+    '--out', '@OUTPUT@',
+  ],
+  build_by_default : true,
+  install_dir : with_vulkan_icd_dir,
+  install : true,
+)
+
+if meson.version().version_compare('>= 0.58')
+  _dev_icdname = 'powervr_mesa_devenv_icd.@0@.json'.format(host_machine.cpu())
+  custom_target(
+    'powervr_mesa_devenv_icd',
+    input : [vk_icd_gen, vk_api_xml],
+    output : _dev_icdname,
+    command : [
+      prog_python, '@INPUT0@',
+      '--api-version', '1.0', '--xml', '@INPUT1@',
+      '--lib-path', meson.current_build_dir() / 'libvulkan_powervr_mesa.so',
+      '--out', '@OUTPUT@',
+    ],
+    build_by_default : true,
+  )
+
+  devenv.append('VK_ICD_FILENAMES', meson.current_build_dir() / _dev_icdname)
+endif
diff --git a/src/imagination/vulkan/pds/meson.build b/src/imagination/vulkan/pds/meson.build
new file mode 100644 (file)
index 0000000..20f4790
--- /dev/null
@@ -0,0 +1,50 @@
+# Copyright © 2022 Imagination Technologies Ltd.
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+
+libpowervr_pds_files = files(
+  'pvr_pds.c',
+  'pvr_pds_disasm.c',
+  'pvr_pds_printer.c',
+  'pvr_xgl_pds.c',
+)
+
+libpowervr_pds_includes = include_directories(
+  '..',
+  '.',
+  'pvr_pds_programs',
+)
+
+libpowervr_pds = static_library(
+  'pvr_pds',
+  [libpowervr_pds_files],
+  include_directories : [
+    libpowervr_pds_includes,
+    inc_include,
+    inc_src,
+    inc_imagination,
+  ],
+  c_args : [
+    no_override_init_args,
+  ],
+  gnu_symbol_visibility : 'hidden',
+  pic : true,
+)
diff --git a/src/imagination/vulkan/pds/pvr_pds.c b/src/imagination/vulkan/pds/pvr_pds.c
new file mode 100644 (file)
index 0000000..c7e83e4
--- /dev/null
@@ -0,0 +1,5179 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "pvr_device_info.h"
+#include "pvr_pds.h"
+#include "pvr_rogue_pds_defs.h"
+#include "pvr_rogue_pds_disasm.h"
+#include "pvr_rogue_pds_encode.h"
+#include "util/log.h"
+#include "util/macros.h"
+
+#define H32(X) (uint32_t)((((X) >> 32U) & 0xFFFFFFFFUL))
+#define L32(X) (uint32_t)(((X)&0xFFFFFFFFUL))
+
+/*****************************************************************************
+ Macro definitions
+*****************************************************************************/
+
+#define PVR_PDS_DWORD_SHIFT 2
+
+#define PVR_PDS_CONSTANTS_BLOCK_BASE 0
+#define PVR_PDS_CONSTANTS_BLOCK_SIZE 128
+#define PVR_PDS_TEMPS_BLOCK_BASE 128
+#define PVR_PDS_TEMPS_BLOCK_SIZE 32
+
+#define PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE PVR_ROGUE_PDSINST_ST_COUNT4_MASK
+#define PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE PVR_ROGUE_PDSINST_LD_COUNT8_MASK
+
+/* Map PDS temp registers to the CDM values they contain Work-group IDs are only
+ * available in the coefficient sync task.
+ */
+#define PVR_PDS_CDM_WORK_GROUP_ID_X 0
+#define PVR_PDS_CDM_WORK_GROUP_ID_Y 1
+#define PVR_PDS_CDM_WORK_GROUP_ID_Z 2
+/* Local IDs are available in every task. */
+#define PVR_PDS_CDM_LOCAL_ID_X 0
+#define PVR_PDS_CDM_LOCAL_ID_YZ 1
+
+#define PVR_PDS_DOUTW_LOWER32 0x0
+#define PVR_PDS_DOUTW_UPPER32 0x1
+#define PVR_PDS_DOUTW_LOWER64 0x2
+#define PVR_PDS_DOUTW_LOWER128 0x3
+#define PVR_PDS_DOUTW_MAXMASK 0x4
+
+#define ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE 8U
+#define PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE (16U)
+
+/*****************************************************************************
+ Static variables
+*****************************************************************************/
+
+static const uint32_t dword_mask_const[PVR_PDS_DOUTW_MAXMASK] = {
+   PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER,
+   PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER,
+   PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64,
+   PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64
+};
+
+/* If has_slc_mcu_cache_control is enabled use cache_control_const[0], else use
+ * cache_control_const[1].
+ */
+static const uint32_t cache_control_const[2][2] = {
+   { PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_BYPASS,
+     PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED },
+   { 0, 0 }
+};
+
+/*****************************************************************************
+ Function definitions
+*****************************************************************************/
+
+uint64_t pvr_pds_encode_ld_src0(uint64_t dest,
+                                uint64_t count8,
+                                uint64_t src_add,
+                                bool cached,
+                                const struct pvr_device_info *dev_info)
+{
+   uint64_t encoded = 0;
+
+   if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
+      encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED
+                         : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS);
+   }
+
+   encoded |= ((src_add & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
+               << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
+   encoded |= ((count8 & PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
+               << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
+   encoded |= (cached ? PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED
+                      : PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_BYPASS);
+   encoded |= ((dest & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+               << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
+
+   return encoded;
+}
+
+uint64_t pvr_pds_encode_st_src0(uint64_t src,
+                                uint64_t count4,
+                                uint64_t dst_add,
+                                bool write_through,
+                                const struct pvr_device_info *device_info)
+{
+   uint64_t encoded = 0;
+
+   if (device_info->features.has_slc_mcu_cache_controls) {
+      encoded |= (write_through
+                     ? PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH
+                     : PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_BACK);
+   }
+
+   encoded |= ((dst_add & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
+               << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
+   encoded |= ((count4 & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
+               << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
+   encoded |= (write_through ? PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH
+                             : PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_BACK);
+   encoded |= ((src & PVR_ROGUE_PDSINST_REGS32TP_MASK)
+               << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t
+pvr_pds_encode_doutw_src1(uint32_t dest,
+                          uint32_t dword_mask,
+                          uint32_t flags,
+                          bool cached,
+                          const struct pvr_device_info *dev_info)
+{
+   assert(((dword_mask > PVR_PDS_DOUTW_LOWER64) && ((dest & 3) == 0)) ||
+          ((dword_mask == PVR_PDS_DOUTW_LOWER64) && ((dest & 1) == 0)) ||
+          (dword_mask < PVR_PDS_DOUTW_LOWER64));
+
+   uint32_t encoded =
+      (dest << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT);
+
+   encoded |= dword_mask_const[dword_mask];
+
+   encoded |= flags;
+
+   encoded |=
+      cache_control_const[PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) ? 0
+                                                                            : 1]
+                         [cached ? 1 : 0];
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_encode_doutw64(uint32_t cc,
+                                                     uint32_t end,
+                                                     uint32_t src1,
+                                                     uint32_t src0)
+{
+   return pvr_pds_inst_encode_dout(cc,
+                                   end,
+                                   src1,
+                                   src0,
+                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
+                                                   uint32_t end,
+                                                   uint32_t src0)
+{
+   return pvr_pds_inst_encode_dout(cc,
+                                   end,
+                                   0,
+                                   src0,
+                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_doutc(uint32_t cc,
+                                                        uint32_t end)
+{
+   return pvr_pds_inst_encode_dout(cc,
+                                   end,
+                                   0,
+                                   0,
+                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTC);
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_encode_doutd(uint32_t cc,
+                                                   uint32_t end,
+                                                   uint32_t src1,
+                                                   uint32_t src0)
+{
+   return pvr_pds_inst_encode_dout(cc,
+                                   end,
+                                   src1,
+                                   src0,
+                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_encode_douti(uint32_t cc,
+                                                   uint32_t end,
+                                                   uint32_t src0)
+{
+   return pvr_pds_inst_encode_dout(cc,
+                                   end,
+                                   0,
+                                   src0,
+                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTI);
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_encode_doutv(uint32_t cc,
+                                                   uint32_t end,
+                                                   uint32_t src1,
+                                                   uint32_t src0)
+{
+   return pvr_pds_inst_encode_dout(cc,
+                                   end,
+                                   src1,
+                                   src0,
+                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTV);
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_encode_bra(uint32_t srcc,
+                                                 uint32_t neg,
+                                                 uint32_t setc,
+                                                 int32_t relative_address)
+{
+   /* Address should be signed but API only allows unsigned value. */
+   return pvr_pds_inst_encode_bra(srcc, neg, setc, (uint32_t)relative_address);
+}
+
+/**
+ * Gets the next constant address and moves the next constant pointer along.
+ *
+ * \param next_constant Pointer to the next constant address.
+ * \param num_constants The number of constants required.
+ * \param count The number of constants allocated.
+ * \return The address of the next constant.
+ */
+static uint32_t pvr_pds_get_constants(uint32_t *next_constant,
+                                      uint32_t num_constants,
+                                      uint32_t *count)
+{
+   uint32_t constant;
+
+   /* Work out starting constant number. For even number of constants, start on
+    * a 64-bit boundary.
+    */
+   if (num_constants & 1)
+      constant = *next_constant;
+   else
+      constant = (*next_constant + 1) & ~1;
+
+   /* Update the count with the number of constants actually allocated. */
+   *count += constant + num_constants - *next_constant;
+
+   /* Move the next constant pointer. */
+   *next_constant = constant + num_constants;
+
+   assert((constant + num_constants) <= PVR_PDS_CONSTANTS_BLOCK_SIZE);
+
+   return constant;
+}
+
+/**
+ * Gets the next temp address and moves the next temp pointer along.
+ *
+ * \param next_temp Pointer to the next temp address.
+ * \param num_temps The number of temps required.
+ * \param count The number of temps allocated.
+ * \return The address of the next temp.
+ */
+static uint32_t
+pvr_pds_get_temps(uint32_t *next_temp, uint32_t num_temps, uint32_t *count)
+{
+   uint32_t temp;
+
+   /* Work out starting temp number. For even number of temps, start on a
+    * 64-bit boundary.
+    */
+   if (num_temps & 1)
+      temp = *next_temp;
+   else
+      temp = (*next_temp + 1) & ~1;
+
+   /* Update the count with the number of temps actually allocated. */
+   *count += temp + num_temps - *next_temp;
+
+   /* Move the next temp pointer. */
+   *next_temp = temp + num_temps;
+
+   assert((temp + num_temps) <=
+          (PVR_PDS_TEMPS_BLOCK_SIZE + PVR_PDS_TEMPS_BLOCK_BASE));
+
+   return temp;
+}
+
+/**
+ * Write a 32-bit constant indexed by the long range.
+ *
+ * \param data_block Pointer to data block to write to.
+ * \param index Index within the data to write to.
+ * \param dword The 32-bit constant to write.
+ */
+static void
+pvr_pds_write_constant32(uint32_t *data_block, uint32_t index, uint32_t dword0)
+{
+   /* Check range. */
+   assert(index <= (PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER -
+                    PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER));
+
+   data_block[index + 0] = dword0;
+
+   PVR_PDS_PRINT_DATA("WriteConstant32", (uint64_t)dword0, index);
+}
+
+/**
+ * Write a 64-bit constant indexed by the long range.
+ *
+ * \param data_block Pointer to data block to write to.
+ * \param index Index within the data to write to.
+ * \param dword0 Lower half of the 64 bit constant.
+ * \param dword1 Upper half of the 64 bit constant.
+ */
+static void pvr_pds_write_constant64(uint32_t *data_block,
+                                     uint32_t index,
+                                     uint32_t dword0,
+                                     uint32_t dword1)
+{
+   /* Has to be on 64 bit boundary. */
+   assert((index & 1) == 0);
+
+   /* Check range. */
+   assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
+                           PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
+
+   data_block[index + 0] = dword0;
+   data_block[index + 1] = dword1;
+
+   PVR_PDS_PRINT_DATA("WriteConstant64",
+                      ((uint64_t)dword0 << 32) | (uint64_t)dword1,
+                      index);
+}
+
+/**
+ * Write a 64-bit constant from a single wide word indexed by the long-range
+ * number.
+ *
+ * \param data_block Pointer to data block to write to.
+ * \param index Index within the data to write to.
+ * \param word The 64-bit constant to write.
+ */
+
+static void
+pvr_pds_write_wide_constant(uint32_t *data_block, uint32_t index, uint64_t word)
+{
+   /* Has to be on 64 bit boundary. */
+   assert((index & 1) == 0);
+
+   /* Check range. */
+   assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
+                           PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
+
+   data_block[index + 0] = L32(word);
+   data_block[index + 1] = H32(word);
+
+   PVR_PDS_PRINT_DATA("WriteWideConstant", word, index);
+}
+
+static void pvr_pds_write_dma_address(uint32_t *data_block,
+                                      uint32_t index,
+                                      uint64_t address,
+                                      bool coherent,
+                                      const struct pvr_device_info *dev_info)
+{
+   /* Has to be on 64 bit boundary. */
+   assert((index & 1) == 0);
+
+   if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
+      address |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
+
+   /* Check range. */
+   assert((index >> 1) <= (PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER -
+                           PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER));
+
+   data_block[index + 0] = L32(address);
+   data_block[index + 1] = H32(address);
+
+   PVR_PDS_PRINT_DATA("WriteDMAAddress", address, index);
+}
+
+/**
+ * External API to append a 64-bit constant to an existing data segment
+ * allocation.
+ *
+ * \param constants Pointer to start of data segment.
+ * \param constant_value Value to write to constant.
+ * \param data_size The number of constants allocated.
+ * \returns The address of the next constant.
+ */
+uint32_t pvr_pds_append_constant64(uint32_t *constants,
+                                   uint64_t constant_value,
+                                   uint32_t *data_size)
+{
+   /* Calculate next constant from current data size. */
+   uint32_t next_constant = *data_size;
+   uint32_t constant = pvr_pds_get_constants(&next_constant, 2, data_size);
+
+   /* Set the value. */
+   pvr_pds_write_wide_constant(constants, constant, constant_value);
+
+   return constant;
+}
+
+void pvr_pds_pixel_shader_sa_initialize(
+   struct pvr_pds_pixel_shader_sa_program *program)
+{
+   memset(program, 0, sizeof(*program));
+}
+
+/**
+ * Encode a DMA burst.
+ *
+ * \param dma_control DMA control words.
+ * \param dma_address DMA address.
+ * \param dest_offset Destination offset in the attribute.
+ * \param dma_size The size of the DMA in words.
+ * \param src_address Source address for the burst.
+ * \param dev_info PVR device info structure.
+ * \returns The number of DMA transfers required.
+ */
+
+uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
+                                  uint64_t *dma_address,
+                                  uint32_t dest_offset,
+                                  uint32_t dma_size,
+                                  uint64_t src_address,
+                                  const struct pvr_device_info *dev_info)
+{
+   /* Simplified for MS2. */
+
+   /* Force to 1 DMA. */
+   const uint32_t num_kicks = 1;
+
+   dma_control[0] = dma_size
+                    << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
+   dma_control[0] |= dest_offset
+                     << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
+
+   dma_control[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
+                     PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE;
+
+   dma_address[0] = src_address;
+   if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
+      dma_address[0] |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED;
+   }
+
+   return num_kicks;
+}
+
+/* FIXME: use the csbgen interface and pvr_csb_pack.
+ * FIXME: use bool for phase_rate_change.
+ */
+/**
+ * Sets up the USC control words for a DOUTU.
+ *
+ * \param usc_task_control USC task control structure to be setup.
+ * \param execution_address USC execution virtual address.
+ * \param usc_temps Number of USC temps.
+ * \param sample_rate Sample rate for the DOUTU.
+ * \param phase_rate_change Phase rate change for the DOUTU.
+ */
+void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control,
+                         uint64_t execution_address,
+                         uint32_t usc_temps,
+                         uint32_t sample_rate,
+                         uint32_t phase_rate_change)
+{
+   usc_task_control->src0 = UINT64_C(0);
+
+   /* Set the execution address. */
+   pvr_set_usc_execution_address64(&(usc_task_control->src0),
+                                   execution_address);
+
+   if (usc_temps > 0) {
+      /* Temps are allocated in blocks of 4 dwords. */
+      usc_temps =
+         DIV_ROUND_UP(usc_temps,
+                      PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSIZE);
+
+      /* Check for losing temps due to too many requested. */
+      assert((usc_temps & PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK) ==
+             usc_temps);
+
+      usc_task_control->src0 |=
+         ((uint64_t)(usc_temps &
+                     PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK))
+         << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_SHIFT;
+   }
+
+   if (sample_rate > 0) {
+      usc_task_control->src0 |=
+         ((uint64_t)sample_rate)
+         << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SHIFT;
+   }
+
+   if (phase_rate_change) {
+      usc_task_control->src0 |=
+         PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_EN;
+   }
+}
+
+/**
+ * Generates the PDS pixel event program.
+ *
+ * \param program Pointer to the PDS pixel event program.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Generate either a data segment or code segment.
+ * \param dev_info PVR device info structure.
+ * \returns Pointer to just beyond the buffer for the program.
+ */
+uint32_t *
+pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program,
+                             uint32_t *restrict buffer,
+                             enum pvr_pds_generate_mode gen_mode,
+                             const struct pvr_device_info *dev_info)
+{
+   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+   uint32_t *constants = buffer;
+
+   uint32_t data_size = 0;
+
+   /* Copy the DMA control words and USC task control words to constants, then
+    * arrange them so that the 64-bit words are together followed by the 32-bit
+    * words.
+    */
+   uint32_t control_constant =
+      pvr_pds_get_constants(&next_constant, 2, &data_size);
+   uint32_t emit_constant =
+      pvr_pds_get_constants(&next_constant,
+                            (2 * program->num_emit_word_pairs),
+                            &data_size);
+
+   uint32_t control_word_constant =
+      pvr_pds_get_constants(&next_constant,
+                            program->num_emit_word_pairs,
+                            &data_size);
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+      /* Src0 for DOUTU. */
+      pvr_pds_write_wide_constant(buffer,
+                                  control_constant,
+                                  program->task_control.src0); /* DOUTU */
+      /* 64-bit Src0. */
+
+      /* Emit words for end of tile program. */
+      for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
+         pvr_pds_write_constant64(constants,
+                                  emit_constant + (2 * i),
+                                  program->emit_words[(2 * i) + 0],
+                                  program->emit_words[(2 * i) + 1]);
+      }
+
+      /* Control words. */
+      for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
+         uint32_t doutw = pvr_pds_encode_doutw_src1(
+            (2 * i),
+            PVR_PDS_DOUTW_LOWER64,
+            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+            false,
+            dev_info);
+
+         if (i == (program->num_emit_word_pairs - 1))
+            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+
+         pvr_pds_write_constant32(constants, control_word_constant + i, doutw);
+      }
+   }
+
+   else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+      /* DOUTW the state into the shared register. */
+      for (uint32_t i = 0; i < program->num_emit_word_pairs; i++) {
+         *buffer++ = pvr_pds_encode_doutw64(
+            /* cc */ 0,
+            /* END */ 0,
+            /* SRC1 */ (control_word_constant + i), /* DOUTW 32-bit Src1 */
+            /* SRC0 */ (emit_constant + (2 * i)) >> 1); /* DOUTW 64-bit Src0
+                                                         */
+      }
+
+      /* Kick the USC. */
+      *buffer++ = pvr_pds_encode_doutu(
+         /* cc */ 0,
+         /* END */ 1,
+         /* SRC0 */ control_constant >> 1);
+   }
+
+   uint32_t code_size = 1 + program->num_emit_word_pairs;
+
+   /* Save the data segment Pointer and size. */
+   program->data_segment = constants;
+   program->data_size = data_size;
+   program->code_size = code_size;
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
+      return (constants + next_constant);
+
+   if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
+      return buffer;
+
+   return NULL;
+}
+
+/**
+ * Checks if any of the vertex streams contains instance data.
+ *
+ * \param streams Streams contained in the vertex shader.
+ * \param num_streams Number of vertex streams.
+ * \returns true if one or more of the given vertex streams contains
+ *          instance data, otherwise false.
+ */
+static bool pvr_pds_vertex_streams_contains_instance_data(
+   const struct pvr_pds_vertex_stream *streams,
+   uint32_t num_streams)
+{
+   for (uint32_t i = 0; i < num_streams; i++) {
+      const struct pvr_pds_vertex_stream *vertex_stream = &streams[i];
+      if (vertex_stream->instance_data)
+         return true;
+   }
+
+   return false;
+}
+
+static uint32_t pvr_pds_get_bank_based_constants(uint32_t num_backs,
+                                                 uint32_t *next_constant,
+                                                 uint32_t num_constants,
+                                                 uint32_t *count)
+{
+   /* Allocate constant for PDS vertex shader where constant is divided into
+    * banks.
+    */
+   uint32_t constant;
+
+   assert(num_constants == 1 || num_constants == 2);
+
+   if (*next_constant >= (num_backs << 3))
+      return pvr_pds_get_constants(next_constant, num_constants, count);
+
+   if ((*next_constant % 8) == 0) {
+      constant = *next_constant;
+
+      if (num_constants == 1)
+         *next_constant += 1;
+      else
+         *next_constant += 8;
+   } else if (num_constants == 1) {
+      constant = *next_constant;
+      *next_constant += 7;
+   } else {
+      *next_constant += 7;
+      constant = *next_constant;
+
+      if (*next_constant >= (num_backs << 3)) {
+         *next_constant += 2;
+         *count += 2;
+      } else {
+         *next_constant += 8;
+      }
+   }
+   return constant;
+}
+
+/**
+ * Generates a PDS program to load USC vertex inputs based from one or more
+ * vertex buffers, each containing potentially multiple elements, and then a
+ * DOUTU to execute the USC.
+ *
+ * \param program Pointer to the description of the program which should be
+ *                generated.
+ * \param buffer Pointer to buffer that receives the output of this function.
+ *               Will either be the data segment or code segment depending on
+ *               gen_mode.
+ * \param gen_mode Which part to generate, either data segment or
+ *                 code segment. If PDS_GENERATE_SIZES is specified, nothing is
+ *                 written, but size information in program is updated.
+ * \param dev_info PVR device info structure.
+ * \returns Pointer to just beyond the buffer for the data - i.e the value
+ *          of the buffer after writing its contents.
+ */
+uint32_t *
+pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program,
+                      uint32_t *restrict buffer,
+                      enum pvr_pds_generate_mode gen_mode,
+                      const struct pvr_device_info *dev_info)
+{
+   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+   uint32_t next_stream_constant;
+   uint32_t next_temp;
+   uint32_t usc_control_constant64;
+   uint32_t stride_constant32 = 0;
+   uint32_t dma_address_constant64 = 0;
+   uint32_t dma_control_constant64;
+   uint32_t multiplier_constant32 = 0;
+   uint32_t base_instance_const32 = 0;
+
+   uint32_t temp = 0;
+   uint32_t index_temp64 = 0;
+   uint32_t num_vertices_temp64 = 0;
+   uint32_t pre_index_temp = (uint32_t)(-1);
+   bool first_ddmadt = true;
+   uint32_t input_register0;
+   uint32_t input_register1;
+   uint32_t input_register2;
+
+   struct pvr_pds_vertex_stream *vertex_stream;
+   struct pvr_pds_vertex_element *vertex_element;
+   uint32_t shift_2s_comp;
+
+   uint32_t data_size = 0;
+   uint32_t code_size = 0;
+   uint32_t temps_used = 0;
+
+   bool direct_writes_needed = false;
+
+   uint32_t consts_size = 0;
+   uint32_t vertex_id_control_word_const32 = 0;
+   uint32_t instance_id_control_word_const32 = 0;
+   uint32_t instance_id_modifier_word_const32 = 0;
+   uint32_t geometry_id_control_word_const64 = 0;
+   uint32_t empty_dma_control_constant64 = 0;
+
+   bool any_instanced_stream =
+      pvr_pds_vertex_streams_contains_instance_data(program->streams,
+                                                    program->num_streams);
+
+   uint32_t base_instance_register = 0;
+   uint32_t ddmadt_enables = 0;
+
+   bool issue_empty_ddmad = false;
+   uint32_t last_stream_index = program->num_streams - 1;
+   bool current_p0 = false;
+   uint32_t skip_stream_flag = 0;
+
+   /* Generate the PDS vertex shader data. */
+
+#if defined(DEBUG)
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+      for (uint32_t i = 0; i < program->data_size; i++)
+         buffer[i] = 0xDEADBEEF;
+   }
+#endif
+
+   /* Generate the PDS vertex shader program */
+   next_temp = PVR_PDS_TEMPS_BLOCK_BASE;
+   /* IR0 is in first 32-bit temp, temp[0].32, vertex_Index. */
+   input_register0 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
+   /* IR1 is in second 32-bit temp, temp[1].32, instance_ID. */
+   input_register1 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
+
+   if (program->iterate_remap_id)
+      input_register2 = pvr_pds_get_temps(&next_temp, 1, &temps_used);
+   else
+      input_register2 = 0; /* Not used, but need to silence the compiler. */
+
+   /* Generate the PDS vertex shader code. The constants in the data block are
+    * arranged as follows:
+    *
+    * 64 bit bank 0        64 bit bank 1          64 bit bank 2    64 bit bank
+    * 3 Not used (tmps)    Stride | Multiplier    Address          Control
+    */
+
+   /* Find out how many constants are needed by streams. */
+   for (uint32_t stream = 0; stream < program->num_streams; stream++) {
+      pvr_pds_get_constants(&next_constant,
+                            8 * program->streams[stream].num_elements,
+                            &consts_size);
+   }
+
+   /* If there are no vertex streams allocate the first bank for USC Code
+    * Address.
+    */
+   if (consts_size == 0)
+      pvr_pds_get_constants(&next_constant, 2, &consts_size);
+   else
+      next_constant = 8;
+
+   direct_writes_needed = program->iterate_instance_id ||
+                          program->iterate_vtx_id || program->iterate_remap_id;
+
+   if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
+      /* Evaluate what config of DDMAD should be used for each stream. */
+      for (uint32_t stream = 0; stream < program->num_streams; stream++) {
+         vertex_stream = &program->streams[stream];
+
+         if (vertex_stream->use_ddmadt) {
+            ddmadt_enables |= (1 << stream);
+
+            /* The condition for index value is:
+             * index * stride + size <= bufferSize (all in unit of byte)
+             */
+            if (vertex_stream->stride == 0) {
+               if (vertex_stream->elements[0].size <=
+                   vertex_stream->buffer_size_in_bytes) {
+                  /* index can be any value -> no need to use DDMADT. */
+                  ddmadt_enables &= (~(1 << stream));
+               } else {
+                  /* No index works -> no need to issue DDMAD instruction.
+                   */
+                  skip_stream_flag |= (1 << stream);
+               }
+            } else {
+               /* index * stride + size <= bufferSize
+                *
+                * can be converted to:
+                * index <= (bufferSize - size) / stride
+                *
+                * where maximum index is:
+                * integer((bufferSize - size) / stride).
+                */
+               if (vertex_stream->buffer_size_in_bytes <
+                   vertex_stream->elements[0].size) {
+                  /* No index works -> no need to issue DDMAD instruction.
+                   */
+                  skip_stream_flag |= (1 << stream);
+               } else {
+                  uint32_t max_index = (vertex_stream->buffer_size_in_bytes -
+                                        vertex_stream->elements[0].size) /
+                                       vertex_stream->stride;
+                  if (max_index == 0xFFFFFFFFu) {
+                     /* No need to use DDMADT as all possible indices can
+                      * pass the test.
+                      */
+                     ddmadt_enables &= (~(1 << stream));
+                  } else {
+                     /* In this case, test condition can be changed to
+                      * index < max_index + 1.
+                      */
+                     program->streams[stream].num_vertices =
+                        pvr_pds_get_bank_based_constants(program->num_streams,
+                                                         &next_constant,
+                                                         1,
+                                                         &consts_size);
+
+                     if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+                        pvr_pds_write_constant32(
+                           buffer,
+                           program->streams[stream].num_vertices,
+                           max_index + 1);
+                     }
+                  }
+               }
+            }
+         }
+
+         if ((skip_stream_flag & (1 << stream)) == 0) {
+            issue_empty_ddmad = (ddmadt_enables & (1 << stream)) != 0;
+            last_stream_index = stream;
+         }
+      }
+   } else {
+      if (program->num_streams > 0 &&
+          program->streams[program->num_streams - 1].use_ddmadt) {
+         issue_empty_ddmad = true;
+      }
+   }
+
+   if (direct_writes_needed)
+      issue_empty_ddmad = false;
+
+   if (issue_empty_ddmad) {
+      /* An empty DMA control const (DMA size = 0) is required in case the
+       * last DDMADD is predicated out and last flag does not have any usage.
+       */
+      empty_dma_control_constant64 =
+         pvr_pds_get_bank_based_constants(program->num_streams,
+                                          &next_constant,
+                                          2,
+                                          &consts_size);
+   }
+
+   /* Assign constants for non stream or base instance if there is any
+    * instanced stream.
+    */
+   if (direct_writes_needed || any_instanced_stream ||
+       program->instance_ID_modifier) {
+      if (program->iterate_vtx_id) {
+         vertex_id_control_word_const32 =
+            pvr_pds_get_bank_based_constants(program->num_streams,
+                                             &next_constant,
+                                             1,
+                                             &consts_size);
+      }
+
+      if (program->iterate_instance_id || program->instance_ID_modifier) {
+         if (program->instance_ID_modifier == 0) {
+            instance_id_control_word_const32 =
+               pvr_pds_get_bank_based_constants(program->num_streams,
+                                                &next_constant,
+                                                1,
+                                                &consts_size);
+         } else {
+            instance_id_modifier_word_const32 =
+               pvr_pds_get_bank_based_constants(program->num_streams,
+                                                &next_constant,
+                                                1,
+                                                &consts_size);
+            if ((instance_id_modifier_word_const32 % 2) == 0) {
+               instance_id_control_word_const32 =
+                  pvr_pds_get_bank_based_constants(program->num_streams,
+                                                   &next_constant,
+                                                   1,
+                                                   &consts_size);
+            } else {
+               instance_id_control_word_const32 =
+                  instance_id_modifier_word_const32;
+               instance_id_modifier_word_const32 =
+                  pvr_pds_get_bank_based_constants(program->num_streams,
+                                                   &next_constant,
+                                                   1,
+                                                   &consts_size);
+            }
+         }
+      }
+
+      if (program->base_instance != 0) {
+         base_instance_const32 =
+            pvr_pds_get_bank_based_constants(program->num_streams,
+                                             &next_constant,
+                                             1,
+                                             &consts_size);
+      }
+
+      if (program->iterate_remap_id) {
+         geometry_id_control_word_const64 =
+            pvr_pds_get_bank_based_constants(program->num_streams,
+                                             &next_constant,
+                                             2,
+                                             &consts_size);
+      }
+   }
+
+   if (program->instance_ID_modifier != 0) {
+      /* This instanceID modifier is used when a draw array instanced call
+       * sourcing from client data cannot fit into vertex buffer and needs to
+       * be broken down into several draw calls.
+       */
+
+      code_size += 1;
+
+      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+         pvr_pds_write_constant32(buffer,
+                                  instance_id_modifier_word_const32,
+                                  program->instance_ID_modifier);
+      } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+         *buffer++ = pvr_pds_inst_encode_add32(
+            /* cc */ 0x0,
+            /* ALUM */ 0, /* Unsigned */
+            /* SNA */ 0, /* Add */
+            /* SRC0 32b */ instance_id_modifier_word_const32,
+            /* SRC1 32b */ input_register1,
+            /* DST 32b */ input_register1);
+      }
+   }
+
+   /* Adjust instanceID if necessary. */
+   if (any_instanced_stream || program->iterate_instance_id) {
+      if (program->base_instance != 0) {
+         assert(!program->draw_indirect);
+
+         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+            pvr_pds_write_constant32(buffer,
+                                     base_instance_const32,
+                                     program->base_instance);
+         }
+
+         base_instance_register = base_instance_const32;
+      }
+
+      if (program->draw_indirect) {
+         assert((program->instance_ID_modifier == 0) &&
+                (program->base_instance == 0));
+
+         base_instance_register = PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER + 1;
+      }
+   }
+
+   next_constant = next_stream_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+   usc_control_constant64 =
+      pvr_pds_get_constants(&next_stream_constant, 2, &data_size);
+
+   for (uint32_t stream = 0; stream < program->num_streams; stream++) {
+      bool instance_data_with_base_instance;
+
+      if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
+          ((skip_stream_flag & (1 << stream)) != 0)) {
+         continue;
+      }
+
+      vertex_stream = &program->streams[stream];
+
+      instance_data_with_base_instance =
+         ((vertex_stream->instance_data) &&
+          ((program->base_instance > 0) || (program->draw_indirect)));
+
+      /* Get all 8 32-bit constants at once, only 6 for first stream due to
+       * USC constants.
+       */
+      if (stream == 0) {
+         stride_constant32 =
+            pvr_pds_get_constants(&next_stream_constant, 6, &data_size);
+      } else {
+         next_constant =
+            pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
+
+         /* Skip bank 0. */
+         stride_constant32 = next_constant + 2;
+      }
+
+      multiplier_constant32 = stride_constant32 + 1;
+
+      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+         pvr_pds_write_constant32(buffer,
+                                  stride_constant32,
+                                  vertex_stream->stride);
+
+         /* Vertex stream frequency multiplier. */
+         if (vertex_stream->multiplier)
+            pvr_pds_write_constant32(buffer,
+                                     multiplier_constant32,
+                                     vertex_stream->multiplier);
+      }
+
+      /* Update the code size count and temps count for the above code
+       * segment.
+       */
+      if (vertex_stream->current_state) {
+         code_size += 1;
+         temp = pvr_pds_get_temps(&next_temp, 1, &temps_used); /* 32-bit */
+      } else {
+         unsigned int num_temps_required = 0;
+
+         if (vertex_stream->multiplier) {
+            num_temps_required += 2;
+            code_size += 3;
+
+            if (vertex_stream->shift) {
+               code_size += 1;
+
+               if ((int32_t)vertex_stream->shift > 0)
+                  code_size += 1;
+            }
+         } else if (vertex_stream->shift) {
+            code_size += 1;
+            num_temps_required += 1;
+         } else if (instance_data_with_base_instance) {
+            num_temps_required += 1;
+         }
+
+         if (num_temps_required != 0) {
+            temp = pvr_pds_get_temps(&next_temp,
+                                     num_temps_required,
+                                     &temps_used); /* 64-bit */
+         } else {
+            temp = vertex_stream->instance_data ? input_register1
+                                                : input_register0;
+         }
+
+         if (instance_data_with_base_instance)
+            code_size += 1;
+      }
+
+      /* The real code segment. */
+      if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+         /* If it's current state stream, then index = 0 always. */
+         if (vertex_stream->current_state) {
+            /* Put zero in temp. */
+            *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
+         } else if (vertex_stream->multiplier) {
+            /* old: Iout = (Iin * (Multiplier+2^24)) >> (Shift+24)
+             * new: Iout = (Iin * Multiplier) >> (shift+31)
+             */
+
+            /* Put zero in temp. Need zero for add part of the following
+             * MAD. MAD source is 64 bit, so need two LIMMs.
+             */
+            *buffer++ = pvr_pds_inst_encode_limm(0, temp, 0, 0);
+            /* Put zero in temp. Need zero for add part of the following
+             * MAD.
+             */
+            *buffer++ = pvr_pds_inst_encode_limm(0, temp + 1, 0, 0);
+
+            /* old: (Iin * (Multiplier+2^24))
+             * new: (Iin * Multiplier)
+             */
+            *buffer++ = pvr_rogue_inst_encode_mad(
+               0, /* Sign of add is positive. */
+               0, /* Unsigned ALU mode */
+               0, /* Unconditional */
+               multiplier_constant32,
+               vertex_stream->instance_data ? input_register1 : input_register0,
+               temp / 2,
+               temp / 2);
+
+            if (vertex_stream->shift) {
+               int32_t shift = (int32_t)vertex_stream->shift;
+
+               /* new: >> (shift + 31) */
+               shift += 31;
+               shift *= -1;
+
+               if (shift < -31) {
+                  /* >> (31) */
+                  shift_2s_comp = 0xFFFE1;
+                  *buffer++ = pvr_pds_inst_encode_stflp64(
+                     /* cc */ 0,
+                     /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
+                     /* IM */ 1, /*  enable immediate */
+                     /* SRC0 */ temp / 2,
+                     /* SRC1 */ input_register0, /* This won't be used in
+                                                  * a shift operation.
+                                                  */
+                     /* SRC2 (Shift) */ shift_2s_comp,
+                     /* DST */ temp / 2);
+                  shift += 31;
+               }
+
+               /* old: >> (Shift+24)
+                * new: >> (shift + 31)
+                */
+               shift_2s_comp = *((uint32_t *)&shift);
+               *buffer++ = pvr_pds_inst_encode_stflp64(
+                  /* cc */ 0,
+                  /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
+                  /* IM */ 1, /*enable immediate */
+                  /* SRC0 */ temp / 2,
+                  /* SRC1 */ input_register0, /* This won't be used in
+                                               * a shift operation.
+                                               */
+                  /* SRC2 (Shift) */ shift_2s_comp,
+                  /* DST */ temp / 2);
+            }
+
+            if (instance_data_with_base_instance) {
+               *buffer++ =
+                  pvr_pds_inst_encode_add32(0, /* cc */
+                                            0, /* ALNUM */
+                                            0, /* SNA */
+                                            base_instance_register, /* src0
+                                                                     */
+                                            temp, /* src1 */
+                                            temp /* dst */
+                  );
+            }
+         } else { /* NOT vertex_stream->multiplier */
+            if (vertex_stream->shift) {
+               /* Shift Index/InstanceNum Right by shift bits. Put result
+                * in a Temp.
+                */
+
+               /* 2's complement of shift as this will be a right shift. */
+               shift_2s_comp = ~(vertex_stream->shift) + 1;
+
+               *buffer++ = pvr_pds_inst_encode_stflp32(
+                  /* IM */ 1, /*  enable immediate. */
+                  /* cc */ 0,
+                  /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
+                  /* SRC0 */ vertex_stream->instance_data ? input_register1
+                                                          : input_register0,
+                  /* SRC1 */ input_register0, /* This won't be used in
+                                               * a shift operation.
+                                               */
+                  /* SRC2 (Shift) */ shift_2s_comp,
+                  /* DST */ temp);
+
+               if (instance_data_with_base_instance) {
+                  *buffer++ =
+                     pvr_pds_inst_encode_add32(0, /* cc */
+                                               0, /* ALNUM */
+                                               0, /* SNA */
+                                               base_instance_register, /* src0
+                                                                        */
+                                               temp, /* src1 */
+                                               temp /* dst */
+                     );
+               }
+            } else {
+               if (instance_data_with_base_instance) {
+                  *buffer++ =
+                     pvr_pds_inst_encode_add32(0, /* cc */
+                                               0, /* ALNUM */
+                                               0, /* SNA */
+                                               base_instance_register, /* src0
+                                                                        */
+                                               input_register1, /* src1 */
+                                               temp /* dst */
+                     );
+               } else {
+                  /* If the shift instruction doesn't happen, use the IR
+                   * directly into the following MAD.
+                   */
+                  temp = vertex_stream->instance_data ? input_register1
+                                                      : input_register0;
+               }
+            }
+         }
+      }
+
+      if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
+         if (vertex_stream->use_ddmadt)
+            ddmadt_enables |= (1 << stream);
+      } else {
+         if ((ddmadt_enables & (1 << stream)) != 0) {
+            /* Emulate what DDMADT does for range checking. */
+            if (first_ddmadt) {
+               /* Get an 64 bits temp such that cmp current index with
+                * allowed vertex number can work.
+                */
+               index_temp64 =
+                  pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
+                                                                  */
+               num_vertices_temp64 =
+                  pvr_pds_get_temps(&next_temp, 2, &temps_used); /* 64-bit
+                                                                  */
+
+               index_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
+               num_vertices_temp64 -= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
+
+               code_size += 3;
+               current_p0 = true;
+            }
+
+            code_size += (temp == pre_index_temp ? 1 : 2);
+
+            if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+               if (first_ddmadt) {
+                  /* Set predicate to be P0. */
+                  *buffer++ = pvr_pds_encode_bra(
+                     PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
+                                                        */
+                     0, /* Neg */
+                     PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETCC
+                                                      */
+                     1); /* Addr */
+
+                  *buffer++ =
+                     pvr_pds_inst_encode_limm(0, index_temp64 + 1, 0, 0);
+                  *buffer++ =
+                     pvr_pds_inst_encode_limm(0, num_vertices_temp64 + 1, 0, 0);
+               }
+
+               if (temp != pre_index_temp) {
+                  *buffer++ = pvr_pds_inst_encode_stflp32(
+                     /* IM */ 1, /*  enable immediate. */
+                     /* cc */ 0,
+                     /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
+                     /* SRC0 */ temp - PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER,
+                     /* SRC1 */ 0,
+                     /* SRC2 (Shift) */ 0,
+                     /* DST */ index_temp64);
+               }
+
+               *buffer++ = pvr_pds_inst_encode_stflp32(
+                  /* IM */ 1, /*  enable immediate. */
+                  /* cc */ 0,
+                  /* LOP */ PVR_ROGUE_PDSINST_LOP_OR,
+                  /* SRC0 */ num_vertices_temp64 + 1,
+                  /* SRC1 */ vertex_stream->num_vertices,
+                  /* SRC2 (Shift) */ 0,
+                  /* DST */ num_vertices_temp64);
+            }
+
+            first_ddmadt = false;
+
+            pre_index_temp = temp;
+         }
+      }
+
+      /* Process the elements in the stream. */
+      for (uint32_t element = 0; element < vertex_stream->num_elements;
+           element++) {
+         bool terminate = false;
+
+         vertex_element = &vertex_stream->elements[element];
+         /* Check if last DDMAD needs terminate or not. */
+         if ((element == (vertex_stream->num_elements - 1)) &&
+             (stream == last_stream_index)) {
+            terminate = !issue_empty_ddmad && !direct_writes_needed;
+         }
+
+         /* Get a new set of constants for this element. */
+         if (element) {
+            /* Get all 8 32 bit constants at once. */
+            next_constant =
+               pvr_pds_get_constants(&next_stream_constant, 8, &data_size);
+         }
+
+         dma_address_constant64 = next_constant + 4;
+         dma_control_constant64 = dma_address_constant64 + 2;
+
+         if (vertex_element->component_size == 0) {
+            /* Standard DMA.
+             *
+             * Write the DMA transfer control words into the PDS data
+             * section.
+             *
+             * DMA Address is 40-bit.
+             */
+
+            if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+               uint32_t dma_control_word;
+               uint64_t dma_control_word64 = 0;
+               uint32_t dma_size;
+
+               /* Write the address to the constant. */
+               pvr_pds_write_dma_address(buffer,
+                                         dma_address_constant64,
+                                         vertex_stream->address +
+                                            (uint64_t)vertex_element->offset,
+                                         false,
+                                         dev_info);
+               {
+                  if (program->stream_patch_offsets) {
+                     program
+                        ->stream_patch_offsets[program->num_stream_patches++] =
+                        (stream << 16) | (dma_address_constant64 >> 1);
+                  }
+               }
+
+               /* Size is in bytes - round up to nearest 32 bit word. */
+               dma_size =
+                  (vertex_element->size + (1 << PVR_PDS_DWORD_SHIFT) - 1) >>
+                  PVR_PDS_DWORD_SHIFT;
+
+               assert(dma_size <= PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_UPPER);
+
+               /* Set up the dma transfer control word. */
+               dma_control_word =
+                  dma_size << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
+
+               dma_control_word |=
+                  vertex_element->reg
+                  << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
+
+               dma_control_word |=
+                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
+                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
+
+               if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
+                  if ((ddmadt_enables & (1 << stream)) != 0) {
+                     assert(
+                        ((((uint64_t)vertex_stream->buffer_size_in_bytes
+                           << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
+                          ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK) >>
+                         PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) ==
+                        (uint64_t)vertex_stream->buffer_size_in_bytes);
+                     dma_control_word64 =
+                        (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_EN |
+                         (((uint64_t)vertex_stream->buffer_size_in_bytes
+                           << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT) &
+                          ~PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK));
+                  }
+               }
+               /* If this is the last dma then also set the last flag. */
+               if (terminate) {
+                  dma_control_word |=
+                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
+               }
+
+               /* Write the 32-Bit SRC3 word to a 64-bit constant as per
+                * spec.
+                */
+               pvr_pds_write_wide_constant(buffer,
+                                           dma_control_constant64,
+                                           dma_control_word64 |
+                                              (uint64_t)dma_control_word);
+            }
+
+            if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+               if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
+                  if ((ddmadt_enables & (1 << stream)) != 0) {
+                     *buffer++ = pvr_pds_inst_encode_cmp(
+                        0, /* cc enable */
+                        PVR_ROGUE_PDSINST_COP_LT, /* Operation */
+                        index_temp64 >> 1, /* SRC0 (REGS64TP) */
+                        (num_vertices_temp64 >> 1) +
+                           PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER); /* SRC1
+                                                                      (REGS64)
+                                                                    */
+                  }
+               }
+               /* Multiply by the vertex stream stride and add the base
+                * followed by a DOUTD.
+                *
+                * dmad32 (C0 * T0) + C1, C2
+                * src0 = stride  src1 = index  src2 = baseaddr src3 =
+                * doutd part
+                */
+
+               uint32_t cc;
+               if (PVR_HAS_FEATURE(dev_info, pds_ddmadt))
+                  cc = 0;
+               else
+                  cc = (ddmadt_enables & (1 << stream)) != 0 ? 1 : 0;
+
+               *buffer++ = pvr_pds_inst_encode_ddmad(
+                  /* cc */ cc,
+                  /* END */ 0,
+                  /* SRC0 */ stride_constant32, /* Stride 32-bit*/
+                  /* SRC1 */ temp, /* Index 32-bit*/
+                  /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
+                                                                  * Address
+                                                                  * +
+                                                                  * Offset
+                                                                  */
+                  /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
+                                                                 * Transfer
+                                                                 * Control
+                                                                 * Word.
+                                                                 */
+               );
+            }
+
+            if ((!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) &&
+                ((ddmadt_enables & (1 << stream)) != 0)) {
+               code_size += 1;
+            }
+            code_size += 1;
+         } else {
+            /* Repeat DMA.
+             *
+             * Write the DMA transfer control words into the PDS data
+             * section.
+             *
+             * DMA address is 40-bit.
+             */
+
+            if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+               uint32_t dma_control_word;
+
+               /* Write the address to the constant. */
+               pvr_pds_write_dma_address(buffer,
+                                         dma_address_constant64,
+                                         vertex_stream->address +
+                                            (uint64_t)vertex_element->offset,
+                                         false,
+                                         dev_info);
+
+               /* Set up the DMA transfer control word. */
+               dma_control_word =
+                  vertex_element->size
+                  << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
+
+               dma_control_word |=
+                  vertex_element->reg
+                  << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
+
+               switch (vertex_element->component_size) {
+               case 4: {
+                  dma_control_word |=
+                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_FOUR;
+                  break;
+               }
+               case 3: {
+                  dma_control_word |=
+                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_THREE;
+                  break;
+               }
+               case 2: {
+                  dma_control_word |=
+                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_TWO;
+                  break;
+               }
+               default: {
+                  dma_control_word |=
+                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_ONE;
+                  break;
+               }
+               }
+
+               dma_control_word |=
+                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_REPEAT;
+
+               dma_control_word |=
+                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
+                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED;
+
+               /* If this is the last dma then also set the last flag. */
+               if (terminate) {
+                  dma_control_word |=
+                     PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
+               }
+
+               /* Write the 32-Bit SRC3 word to a 64-bit constant as per
+                * spec.
+                */
+               pvr_pds_write_wide_constant(buffer,
+                                           dma_control_constant64,
+                                           (uint64_t)dma_control_word);
+            }
+
+            if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+               /* Multiply by the vertex stream stride and add the base
+                * followed by a DOUTD.
+                *
+                * dmad32 (C0 * T0) + C1, C2
+                * src0 = stride  src1 = index  src2 = baseaddr src3 =
+                * doutd part
+                */
+               *buffer++ = pvr_pds_inst_encode_ddmad(
+                  /* cc */ 0,
+                  /* END */ 0,
+                  /* SRC0 */ stride_constant32, /* Stride 32-bit*/
+                  /* SRC1 */ temp, /* Index 32-bit*/
+                  /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
+                                                                  * Address
+                                                                  * +
+                                                                  * Offset.
+                                                                  */
+                  /* SRC3 64-bit */ dma_control_constant64 >> 1 /* DMA
+                                                                 * Transfer
+                                                                 * Control
+                                                                 * Word.
+                                                                 */
+               );
+            }
+
+            code_size += 1;
+         } /* End of repeat DMA. */
+      } /* Element loop */
+   } /* Stream loop */
+
+   if (issue_empty_ddmad) {
+      /* Issue an empty last DDMAD, always executed. */
+      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+         pvr_pds_write_wide_constant(
+            buffer,
+            empty_dma_control_constant64,
+            PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN);
+      }
+
+      code_size += 1;
+
+      if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+         *buffer++ = pvr_pds_inst_encode_ddmad(
+            /* cc */ 0,
+            /* END */ 0,
+            /* SRC0 */ stride_constant32, /* Stride 32-bit*/
+            /* SRC1 */ temp, /* Index 32-bit*/
+            /* SRC2 64-bit */ dma_address_constant64 >> 1, /* Stream
+                                                            *Address +
+                                                            *Offset.
+                                                            */
+            /* SRC3 64-bit */ empty_dma_control_constant64 >> 1 /* DMA
+                                                                 * Transfer
+                                                                 * Control
+                                                                 * Word.
+                                                                 */
+         );
+      }
+   }
+
+   if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
+      if (current_p0) {
+         code_size += 1;
+
+         if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+            /* Revert predicate back to IF0 which is required by DOUTU. */
+            *buffer++ =
+               pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC
+                                                                     */
+                                  0, /* Neg */
+                                  PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC
+                                                                    */
+                                  1); /* Addr */
+         }
+      }
+   }
+   /* Send VertexID if requested. */
+   if (program->iterate_vtx_id) {
+      if (program->draw_indirect) {
+         if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+            *buffer++ = pvr_pds_inst_encode_add32(
+               /* cc */ 0x0,
+               /* ALUM */ 0, /* Unsigned */
+               /* SNA */ 1, /* Minus */
+               /* SRC0 32b */ input_register0, /* vertexID */
+               /* SRC1 32b */ PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER, /* base
+                                                                       * vertexID.
+                                                                       */
+               /* DST 32b */ input_register0);
+         }
+
+         code_size += 1;
+      }
+
+      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+         uint32_t doutw = pvr_pds_encode_doutw_src1(
+            program->vtx_id_register,
+            PVR_PDS_DOUTW_LOWER32,
+            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+            false,
+            dev_info);
+
+         if (!program->iterate_instance_id && !program->iterate_remap_id)
+            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+
+         pvr_pds_write_constant32(buffer,
+                                  vertex_id_control_word_const32,
+                                  doutw);
+      } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+         *buffer++ = pvr_pds_encode_doutw64(
+            /* cc */ 0,
+            /* END */ 0,
+            /* SRC1 */ vertex_id_control_word_const32, /* DOUTW 32-bit Src1
+                                                        */
+            /* SRC0 */ input_register0 >> 1); /* DOUTW 64-bit Src0 */
+      }
+
+      code_size += 1;
+   }
+
+   /* Send InstanceID if requested. */
+   if (program->iterate_instance_id) {
+      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+         uint32_t doutw = pvr_pds_encode_doutw_src1(
+            program->instance_id_register,
+            PVR_PDS_DOUTW_UPPER32,
+            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+            true,
+            dev_info);
+
+         if (!program->iterate_remap_id)
+            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+
+         pvr_pds_write_constant32(buffer,
+                                  instance_id_control_word_const32,
+                                  doutw);
+      } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+         *buffer++ = pvr_pds_encode_doutw64(
+            /* cc */ 0,
+            /* END */ 0,
+            /* SRC1 */ instance_id_control_word_const32, /* DOUTW 32-bit Src1 */
+            /* SRC0 */ input_register1 >> 1); /* DOUTW 64-bit Src0 */
+      }
+
+      code_size += 1;
+   }
+
+   /* Send remapped index number to vi0. */
+   if (program->iterate_remap_id) {
+      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+         uint32_t doutw = pvr_pds_encode_doutw_src1(
+            0 /* vi0 */,
+            PVR_PDS_DOUTW_LOWER32,
+            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
+            false,
+            dev_info);
+
+         pvr_pds_write_constant64(buffer,
+                                  geometry_id_control_word_const64,
+                                  doutw,
+                                  0);
+      } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+         *buffer++ = pvr_pds_encode_doutw64(
+            /* cc */ 0,
+            /* END */ 0,
+            /* SRC1 */ geometry_id_control_word_const64, /* DOUTW 32-bit
+                                                          * Src1
+                                                          */
+            /* SRC0 */ input_register2 >> 1); /* DOUTW 64-bit Src0 */
+      }
+
+      code_size += 1;
+   }
+
+   /* Copy the USC task control words to constants. */
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+      pvr_pds_write_wide_constant(buffer,
+                                  usc_control_constant64,
+                                  program->usc_task_control.src0); /* 64-bit
+                                                                    * Src0
+                                                                    */
+      if (program->stream_patch_offsets) {
+         /* USC TaskControl is always the first patch. */
+         program->stream_patch_offsets[0] = usc_control_constant64 >> 1;
+      }
+   }
+
+   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+      /* Conditionally (if last in task) issue the task to the USC
+       * (if0) DOUTU src1=USC Code Base address, src2=DOUTU word 2.
+       */
+
+      *buffer++ = pvr_pds_encode_doutu(
+         /* cc */ 1,
+         /* END */ 1,
+         /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0 */
+
+      /* End the program if the Dout did not already end it. */
+      *buffer++ = pvr_pds_inst_encode_halt(0);
+   }
+
+   code_size += 2;
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+      /* Set the data segment pointer and ensure we return 1 past the buffer
+       * ptr.
+       */
+      program->data_segment = buffer;
+
+      buffer += consts_size;
+   }
+
+   program->temps_used = temps_used;
+   program->data_size = consts_size;
+   program->code_size = code_size;
+   program->ddmadt_enables = ddmadt_enables;
+   if (!PVR_HAS_FEATURE(dev_info, pds_ddmadt))
+      program->skip_stream_flag = skip_stream_flag;
+
+   return buffer;
+}
+
+/**
+ * Generates a PDS program to load USC compute shader global/local/workgroup
+ * sizes/ids and then a DOUTU to execute the USC.
+ *
+ * \param program Pointer to description of the program that should be
+ *                generated.
+ * \param buffer Pointer to buffer that receives the output of this function.
+ *               This will be either the data segment, or the code depending on
+ *               gen_mode.
+ * \param gen_mode Which part to generate, either data segment or code segment.
+ *                 If PDS_GENERATE_SIZES is specified, nothing is written, but
+ *                 size information in program is updated.
+ * \param dev_info PVR device info struct.
+ * \returns Pointer to just beyond the buffer for the data - i.e. the value of
+ *          the buffer after writing its contents.
+ */
+uint32_t *
+pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
+                       uint32_t *restrict buffer,
+                       enum pvr_pds_generate_mode gen_mode,
+                       const struct pvr_device_info *dev_info)
+{
+   uint32_t usc_control_constant64;
+   uint32_t usc_control_constant64_coeff_update = 0;
+   uint32_t zero_constant64 = 0;
+
+   uint32_t data_size = 0;
+   uint32_t code_size = 0;
+   uint32_t temps_used = 0;
+   uint32_t doutw = 0;
+
+   uint32_t barrier_ctrl_word = 0;
+   uint32_t barrier_ctrl_word2 = 0;
+
+   /* Even though there are 3 IDs for local and global we only need max one
+    * DOUTW for local, and two for global.
+    */
+   uint32_t work_group_id_ctrl_words[2] = { 0 };
+   uint32_t local_id_ctrl_word = 0;
+   uint32_t local_input_register;
+
+   /* For the constant value to load into ptemp (SW fence). */
+   uint64_t predicate_ld_src0_constant = 0;
+   uint32_t cond_render_negate_constant = 0;
+
+   uint32_t cond_render_pred_temp;
+   uint32_t cond_render_negate_temp;
+
+   /* 2x 64 bit registers that will mask out the Predicate load. */
+   uint32_t cond_render_pred_mask_constant = 0;
+
+#if defined(DEBUG)
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+      for (uint32_t j = 0; j < program->data_size; j++)
+         buffer[j] = 0xDEADBEEF;
+   }
+#endif
+
+   /* All the compute input registers are in temps. */
+   temps_used += PVR_PDS_NUM_COMPUTE_INPUT_REGS;
+
+   uint32_t next_temp = PVR_PDS_TEMPS_BLOCK_BASE + temps_used;
+
+   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+
+   if (program->kick_usc) {
+      /* Copy the USC task control words to constants. */
+      usc_control_constant64 =
+         pvr_pds_get_constants(&next_constant, 2, &data_size);
+   }
+
+   if (program->has_coefficient_update_task) {
+      usc_control_constant64_coeff_update =
+         pvr_pds_get_constants(&next_constant, 2, &data_size);
+   }
+
+   if (program->conditional_render) {
+      predicate_ld_src0_constant =
+         pvr_pds_get_constants(&next_constant, 2, &data_size);
+      cond_render_negate_constant =
+         pvr_pds_get_constants(&next_constant, 2, &data_size);
+      cond_render_pred_mask_constant =
+         pvr_pds_get_constants(&next_constant, 4, &data_size);
+
+      /* LD will load a 64 bit value. */
+      cond_render_pred_temp = pvr_pds_get_temps(&next_temp, 4, &temps_used);
+      cond_render_negate_temp = pvr_pds_get_temps(&next_temp, 2, &temps_used);
+
+      program->cond_render_const_offset_in_dwords = predicate_ld_src0_constant;
+      program->cond_render_pred_temp = cond_render_pred_temp;
+   }
+
+   if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+       (program->clear_pds_barrier) ||
+       (program->kick_usc && program->conditional_render)) {
+      zero_constant64 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+   }
+
+   if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+      barrier_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
+      if (PVR_HAS_QUIRK(dev_info, 51210)) {
+         barrier_ctrl_word2 =
+            pvr_pds_get_constants(&next_constant, 1, &data_size);
+      }
+   }
+
+   if (program->work_group_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
+       program->work_group_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+      work_group_id_ctrl_words[0] =
+         pvr_pds_get_constants(&next_constant, 1, &data_size);
+   }
+
+   if (program->work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+      work_group_id_ctrl_words[1] =
+         pvr_pds_get_constants(&next_constant, 1, &data_size);
+   }
+
+   if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+       (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+       (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
+      local_id_ctrl_word = pvr_pds_get_constants(&next_constant, 1, &data_size);
+   }
+
+   if (program->add_base_workgroup) {
+      for (uint32_t workgroup_component = 0; workgroup_component < 3;
+           workgroup_component++) {
+         if (program->work_group_input_regs[workgroup_component] !=
+             PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+            program
+               ->base_workgroup_constant_offset_in_dwords[workgroup_component] =
+               pvr_pds_get_constants(&next_constant, 1, &data_size);
+         }
+      }
+   }
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+      if (program->kick_usc) {
+         /* Src0 for DOUTU */
+         pvr_pds_write_wide_constant(buffer,
+                                     usc_control_constant64,
+                                     program->usc_task_control.src0); /* 64-bit
+                                                                       * Src0.
+                                                                       */
+      }
+
+      if (program->has_coefficient_update_task) {
+         /* Src0 for DOUTU. */
+         pvr_pds_write_wide_constant(
+            buffer,
+            usc_control_constant64_coeff_update,
+            program->usc_task_control_coeff_update.src0); /* 64-bit Src0 */
+      }
+
+      if ((program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+          (program->clear_pds_barrier) ||
+          (program->kick_usc && program->conditional_render)) {
+         pvr_pds_write_wide_constant(buffer, zero_constant64, 0); /* 64-bit
+                                                                   * Src0
+                                                                   */
+      }
+
+      if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+         if (PVR_HAS_QUIRK(dev_info, 51210)) {
+            /* Write the constant for the coefficient register write. */
+            doutw = pvr_pds_encode_doutw_src1(
+               program->barrier_coefficient + 4,
+               PVR_PDS_DOUTW_LOWER64,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+               true,
+               dev_info);
+            pvr_pds_write_constant32(buffer, barrier_ctrl_word2, doutw);
+         }
+         /* Write the constant for the coefficient register write. */
+         doutw = pvr_pds_encode_doutw_src1(
+            program->barrier_coefficient,
+            PVR_PDS_DOUTW_LOWER64,
+            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+            true,
+            dev_info);
+
+         /* Check whether the barrier is going to be the last DOUTW done by
+          * the coefficient sync task.
+          */
+         if ((program->work_group_input_regs[0] ==
+              PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
+             (program->work_group_input_regs[1] ==
+              PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
+             (program->work_group_input_regs[2] ==
+              PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
+            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+         }
+
+         pvr_pds_write_constant32(buffer, barrier_ctrl_word, doutw);
+      }
+
+      /* If we want work-group id X, see if we also want work-group id Y. */
+      if (program->work_group_input_regs[0] !=
+             PVR_PDS_COMPUTE_INPUT_REG_UNUSED &&
+          program->work_group_input_regs[1] !=
+             PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+         /* Make sure we are going to DOUTW them into adjacent registers
+          * otherwise we can't do it in one.
+          */
+         assert(program->work_group_input_regs[1] ==
+                (program->work_group_input_regs[0] + 1));
+
+         doutw = pvr_pds_encode_doutw_src1(
+            program->work_group_input_regs[0],
+            PVR_PDS_DOUTW_LOWER64,
+            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+            true,
+            dev_info);
+
+         /* If we don't want the Z work-group id then this is the last one.
+          */
+         if (program->work_group_input_regs[2] ==
+             PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+         }
+
+         pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[0], doutw);
+      }
+      /* If we only want one of X or Y then handle them separately. */
+      else {
+         if (program->work_group_input_regs[0] !=
+             PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+            doutw = pvr_pds_encode_doutw_src1(
+               program->work_group_input_regs[0],
+               PVR_PDS_DOUTW_LOWER32,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+               true,
+               dev_info);
+
+            /* If we don't want the Z work-group id then this is the last
+             * one.
+             */
+            if (program->work_group_input_regs[2] ==
+                PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+               doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+            }
+
+            pvr_pds_write_constant32(buffer,
+                                     work_group_id_ctrl_words[0],
+                                     doutw);
+         } else if (program->work_group_input_regs[1] !=
+                    PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+            doutw = pvr_pds_encode_doutw_src1(
+               program->work_group_input_regs[1],
+               PVR_PDS_DOUTW_UPPER32,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+               true,
+               dev_info);
+
+            /* If we don't want the Z work-group id then this is the last
+             * one.
+             */
+            if (program->work_group_input_regs[2] ==
+                PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+               doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+            }
+
+            pvr_pds_write_constant32(buffer,
+                                     work_group_id_ctrl_words[0],
+                                     doutw);
+         }
+      }
+
+      /* Handle work-group id Z. */
+      if (program->work_group_input_regs[2] !=
+          PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+         doutw = pvr_pds_encode_doutw_src1(
+            program->work_group_input_regs[2],
+            PVR_PDS_DOUTW_UPPER32,
+            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE |
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
+            true,
+            dev_info);
+
+         pvr_pds_write_constant32(buffer, work_group_id_ctrl_words[1], doutw);
+      }
+
+      /* Handle the local IDs. */
+      if ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+          (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
+         uint32_t dest_reg;
+
+         /* If we want local id Y and Z make sure the compiler wants them in
+          * the same register.
+          */
+         if (!program->flattened_work_groups) {
+            if ((program->local_input_regs[1] !=
+                 PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
+                (program->local_input_regs[2] !=
+                 PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
+               assert(program->local_input_regs[1] ==
+                      program->local_input_regs[2]);
+            }
+         }
+
+         if (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
+            dest_reg = program->local_input_regs[1];
+         else
+            dest_reg = program->local_input_regs[2];
+
+         /* If we want local id X and (Y or Z) then we can do that in a
+          * single 64-bit DOUTW.
+          */
+         if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+            assert(dest_reg == (program->local_input_regs[0] + 1));
+
+            doutw = pvr_pds_encode_doutw_src1(
+               program->local_input_regs[0],
+               PVR_PDS_DOUTW_LOWER64,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+               true,
+               dev_info);
+
+            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+
+            pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
+         }
+         /* Otherwise just DMA in Y and Z together in a single 32-bit DOUTW.
+          */
+         else {
+            doutw = pvr_pds_encode_doutw_src1(
+               dest_reg,
+               PVR_PDS_DOUTW_UPPER32,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+               true,
+               dev_info);
+
+            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+
+            pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
+         }
+      }
+      /* If we don't want Y or Z then just DMA in X in a single 32-bit DOUTW.
+       */
+      else if (program->local_input_regs[0] !=
+               PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+         doutw = pvr_pds_encode_doutw_src1(
+            program->local_input_regs[0],
+            PVR_PDS_DOUTW_LOWER32,
+            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE |
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN,
+            true,
+            dev_info);
+
+         pvr_pds_write_constant32(buffer, local_id_ctrl_word, doutw);
+      }
+   }
+
+   if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
+       gen_mode == PDS_GENERATE_SIZES) {
+      const bool encode = (gen_mode == PDS_GENERATE_CODE_SEGMENT);
+#define APPEND(X)                    \
+   if (encode) {                     \
+      *buffer = X;                   \
+      buffer++;                      \
+   } else {                          \
+      code_size += sizeof(uint32_t); \
+   }
+
+      /* Assert that coeff_update_task_branch_size is > 0 because if it is 0
+       * then we will be doing an infinite loop.
+       */
+      if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
+         assert(program->coeff_update_task_branch_size > 0);
+
+      /* Test whether this is the coefficient update task or not. */
+      APPEND(
+         pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SRCC */
+                            PVR_ROGUE_PDSINST_NEG_ENABLE, /* NEG */
+                            PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC */
+                            program->coeff_update_task_branch_size /* ADDR */));
+
+      /* Do we need to initialize the barrier coefficient? */
+      if (program->barrier_coefficient != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+         if (PVR_HAS_QUIRK(dev_info, 51210)) {
+            /* Initialize the second barrier coefficient registers to zero.
+             */
+            APPEND(pvr_pds_encode_doutw64(0, /* cc */
+                                          0, /* END */
+                                          barrier_ctrl_word2, /* SRC1 */
+                                          zero_constant64 >> 1)); /* SRC0 */
+         }
+         /* Initialize the coefficient register to zero. */
+         APPEND(pvr_pds_encode_doutw64(0, /* cc */
+                                       0, /* END */
+                                       barrier_ctrl_word, /* SRC1 */
+                                       zero_constant64 >> 1)); /* SRC0 */
+      }
+
+      if (program->add_base_workgroup) {
+         const uint32_t temp_values[3] = { 0, 1, 3 };
+         for (uint32_t workgroup_component = 0; workgroup_component < 3;
+              workgroup_component++) {
+            if (program->work_group_input_regs[workgroup_component] ==
+                PVR_PDS_COMPUTE_INPUT_REG_UNUSED)
+               continue;
+
+            APPEND(pvr_pds_inst_encode_add32(
+               /* cc */ 0x0,
+               /* ALUM */ 0,
+               /* SNA */ 0,
+               /* SRC0 (R32)*/ PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER +
+                  program->base_workgroup_constant_offset_in_dwords
+                     [workgroup_component],
+               /* SRC1 (R32)*/ PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER +
+                  PVR_PDS_CDM_WORK_GROUP_ID_X +
+                  temp_values[workgroup_component],
+               /* DST  (R32TP)*/ PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER +
+                  PVR_PDS_CDM_WORK_GROUP_ID_X +
+                  temp_values[workgroup_component]));
+         }
+      }
+
+      /* If we are going to put the work-group IDs in coefficients then we
+       * just need to do the DOUTWs.
+       */
+      if ((program->work_group_input_regs[0] !=
+           PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+          (program->work_group_input_regs[1] !=
+           PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
+         uint32_t dest_reg;
+
+         if (program->work_group_input_regs[0] !=
+             PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+            dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_X;
+         } else {
+            dest_reg = PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Y;
+         }
+
+         APPEND(pvr_pds_encode_doutw64(0, /* cc */
+                                       0, /* END */
+                                       work_group_id_ctrl_words[0], /* SRC1
+                                                                     */
+                                       dest_reg >> 1)); /* SRC0 */
+      }
+
+      if (program->work_group_input_regs[2] !=
+          PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+         APPEND(pvr_pds_encode_doutw64(
+            0, /* cc */
+            0, /* END */
+            work_group_id_ctrl_words[1], /* SRC1 */
+            (PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_WORK_GROUP_ID_Z) >>
+               1)); /* SRC0 */
+      }
+
+      /* Issue the task to the USC. */
+      if (program->kick_usc && program->has_coefficient_update_task) {
+         APPEND(pvr_pds_encode_doutu(0, /* cc */
+                                     1, /* END */
+                                     usc_control_constant64_coeff_update >>
+                                        1)); /* SRC0; DOUTU 64-bit Src0 */
+      }
+
+      /* Encode a HALT */
+      APPEND(pvr_pds_inst_encode_halt(0));
+
+      /* Set the branch size used to skip the coefficient sync task. */
+      program->coeff_update_task_branch_size = code_size / sizeof(uint32_t);
+
+      /* DOUTW in the local IDs. */
+
+      /* If we want X and Y or Z, we only need one DOUTW. */
+      if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) &&
+          ((program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+           (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED))) {
+         local_input_register =
+            PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
+      } else {
+         /* If we just want X. */
+         if (program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+            local_input_register =
+               PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_X;
+         }
+         /* If we just want Y or Z. */
+         else if (program->local_input_regs[1] !=
+                     PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
+                  program->local_input_regs[2] !=
+                     PVR_PDS_COMPUTE_INPUT_REG_UNUSED) {
+            local_input_register =
+               PVR_PDS_TEMPS_BLOCK_BASE + PVR_PDS_CDM_LOCAL_ID_YZ;
+         }
+      }
+
+      if ((program->local_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+          (program->local_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED) ||
+          (program->local_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED)) {
+         APPEND(pvr_pds_encode_doutw64(0, /* cc */
+                                       0, /* END */
+                                       local_id_ctrl_word, /* SRC1 */
+                                       local_input_register >> 1)); /* SRC0
+                                                                     */
+      }
+
+      if (program->clear_pds_barrier) {
+         /* Zero the persistent temp (SW fence for context switch). */
+         APPEND(pvr_pds_inst_encode_add64(
+            0, /* cc */
+            PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
+            PVR_ROGUE_PDSINST_MAD_SNA_ADD,
+            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+               (zero_constant64 >> 1), /* src0 = 0 */
+            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+               (zero_constant64 >> 1), /* src1 = 0 */
+            PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0)); /* dest =
+                                                             * ptemp64[0]
+                                                             */
+      }
+
+      /* If this is a fence, issue the DOUTC. */
+      if (program->fence) {
+         APPEND(pvr_pds_inst_encode_doutc(0, /* cc */
+                                          0 /* END */));
+      }
+
+      if (program->kick_usc) {
+         if (program->conditional_render) {
+            /* Skip if coefficient update task. */
+            APPEND(pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF1,
+                                           0,
+                                           PVR_ROGUE_PDSINST_PREDICATE_KEEP,
+                                           16));
+
+            /* Load the predicate. */
+            APPEND(pvr_pds_inst_encode_ld(0, predicate_ld_src0_constant >> 1));
+
+            /* Load negate constant into temp for CMP. */
+            APPEND(pvr_pds_inst_encode_add64(
+               0, /* cc */
+               PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
+               PVR_ROGUE_PDSINST_MAD_SNA_ADD,
+               PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+                  (cond_render_negate_constant >> 1), /* src0 = 0 */
+               PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+                  (zero_constant64 >> 1), /* src1 = 0 */
+               PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER +
+                  (cond_render_negate_temp >> 1))); /* dest = ptemp64[0]
+                                                     */
+
+            APPEND(pvr_pds_inst_encode_wdf(0));
+
+            for (uint32_t i = 0; i < 4; i++) {
+               APPEND(pvr_pds_inst_encode_stflp32(
+                  1, /* enable immediate */
+                  0, /* cc */
+                  PVR_ROGUE_PDSINST_LOP_AND, /* LOP */
+                  cond_render_pred_temp + i, /* SRC0 */
+                  cond_render_pred_mask_constant + i, /* SRC1 */
+                  0, /* SRC2 (Shift) */
+                  cond_render_pred_temp + i)); /* DST */
+
+               APPEND(
+                  pvr_pds_inst_encode_stflp32(1, /* enable immediate */
+                                              0, /* cc */
+                                              PVR_ROGUE_PDSINST_LOP_OR, /* LOP
+                                                                         */
+                                              cond_render_pred_temp + i, /* SRC0
+                                                                          */
+                                              cond_render_pred_temp, /* SRC1 */
+                                              0, /* SRC2 (Shift) */
+                                              cond_render_pred_temp)); /* DST */
+            }
+
+            APPEND(pvr_pds_inst_encode_limm(0, /* cc */
+                                            cond_render_pred_temp + 1, /* SRC1
+                                                                        */
+                                            0, /* SRC0 */
+                                            0)); /* GLOBALREG */
+
+            APPEND(pvr_pds_inst_encode_stflp32(1, /* enable immediate */
+                                               0, /* cc */
+                                               PVR_ROGUE_PDSINST_LOP_XOR, /* LOP
+                                                                           */
+                                               cond_render_pred_temp, /* SRC0 */
+                                               cond_render_negate_temp, /* SRC1
+                                                                         */
+                                               0, /* SRC2 (Shift) */
+                                               cond_render_pred_temp)); /* DST
+                                                                         */
+
+            /* Check that the predicate is 0. */
+            APPEND(pvr_pds_inst_encode_cmpi(
+               0, /* cc */
+               PVR_ROGUE_PDSINST_COP_EQ, /* LOP */
+               (cond_render_pred_temp >> 1) +
+                  PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER, /* SRC0 */
+               0)); /* SRC1 */
+
+            /* If predicate is 0, skip DOUTU. */
+            APPEND(pvr_pds_inst_encode_bra(
+               PVR_ROGUE_PDSINST_PREDICATE_P0, /* SRCC:
+                                                  P0 */
+               0, /* NEG */
+               PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SETC:
+                                                    keep
+                                                  */
+               2));
+         }
+
+         /* Issue the task to the USC.
+          * DoutU src1=USC Code Base address, src2=doutu word 2.
+          */
+         APPEND(pvr_pds_encode_doutu(1, /* cc */
+                                     1, /* END */
+                                     usc_control_constant64 >> 1)); /* SRC0;
+                                                                     * DOUTU
+                                                                     * 64-bit
+                                                                     * Src0.
+                                                                     */
+      }
+
+      /* End the program if the Dout did not already end it. */
+      APPEND(pvr_pds_inst_encode_halt(0));
+#undef APPEND
+   }
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+      /* Set the data segment pointer and ensure we return 1 past the buffer
+       * ptr.
+       */
+      program->data_segment = buffer;
+
+      buffer += next_constant;
+   }
+
+   /* Require at least one DWORD of PDS data so the program runs. */
+   data_size = MAX2(1, data_size);
+
+   program->temps_used = temps_used;
+   program->highest_temp = temps_used;
+   program->data_size = data_size;
+   if (gen_mode == PDS_GENERATE_SIZES)
+      program->code_size = code_size;
+
+   return buffer;
+}
+
+/**
+ * Generates the PDS vertex shader data or code block. This program will do a
+ * DMA into USC Constants followed by a DOUTU.
+ *
+ * \param program Pointer to the PDS vertex shader program.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Generate code or data.
+ * \param dev_info PVR device information struct.
+ * \returns Pointer to just beyond the code/data.
+ */
+uint32_t *pvr_pds_vertex_shader_sa(
+   struct pvr_pds_vertex_shader_sa_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info)
+{
+   uint32_t next_constant;
+   uint32_t data_size = 0;
+   uint32_t code_size = 0;
+
+   uint32_t usc_control_constant64 = 0;
+   uint32_t dma_address_constant64 = 0;
+   uint32_t dma_control_constant32 = 0;
+   uint32_t doutw_value_constant64 = 0;
+   uint32_t doutw_control_constant32 = 0;
+   uint32_t fence_constant_word = 0;
+   uint32_t *buffer_base;
+   uint32_t kick_index;
+
+   uint32_t total_num_doutw =
+      program->num_dword_doutw + program->num_q_word_doutw;
+   uint32_t total_size_dma =
+      program->num_dword_doutw + 2 * program->num_q_word_doutw;
+
+   next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+
+   /* Copy the DMA control words and USC task control words to constants.
+    *
+    * Arrange them so that the 64-bit words are together followed by the 32-bit
+    * words.
+    */
+   if (program->kick_usc) {
+      usc_control_constant64 =
+         pvr_pds_get_constants(&next_constant, 2, &data_size);
+   }
+
+   if (program->clear_pds_barrier) {
+      fence_constant_word =
+         pvr_pds_get_constants(&next_constant, 2, &data_size);
+   }
+   dma_address_constant64 = pvr_pds_get_constants(&next_constant,
+                                                  2 * program->num_dma_kicks,
+                                                  &data_size);
+
+   /* Assign all unaligned constants together to avoid alignment issues caused
+    * by pvr_pds_get_constants with even allocation sizes.
+    */
+   doutw_value_constant64 = pvr_pds_get_constants(
+      &next_constant,
+      total_size_dma + total_num_doutw + program->num_dma_kicks,
+      &data_size);
+   doutw_control_constant32 = doutw_value_constant64 + total_size_dma;
+   dma_control_constant32 = doutw_control_constant32 + total_num_doutw;
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+      buffer_base = buffer;
+
+      if (program->kick_usc) {
+         /* Src0 for DOUTU. */
+         pvr_pds_write_wide_constant(buffer_base,
+                                     usc_control_constant64,
+                                     program->usc_task_control.src0); /* DOUTU
+                                                                       * 64-bit
+                                                                       * Src0.
+                                                                       */
+         buffer += 2;
+      }
+
+      if (program->clear_pds_barrier) {
+         /* Encode the fence constant src0. Fence barrier is initialized to
+          * zero.
+          */
+         pvr_pds_write_wide_constant(buffer_base, fence_constant_word, 0);
+         buffer += 2;
+      }
+
+      if (total_num_doutw > 0) {
+         for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
+            /* Write the constant for the coefficient register write. */
+            pvr_pds_write_constant64(buffer_base,
+                                     doutw_value_constant64,
+                                     program->q_word_doutw_value[2 * i],
+                                     program->q_word_doutw_value[2 * i + 1]);
+            pvr_pds_write_constant32(
+               buffer_base,
+               doutw_control_constant32,
+               program->q_word_doutw_control[i] |
+                  ((!program->num_dma_kicks && i == total_num_doutw - 1)
+                      ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
+                      : 0));
+
+            doutw_value_constant64 += 2;
+            doutw_control_constant32 += 1;
+         }
+
+         for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
+            /* Write the constant for the coefficient register write. */
+            pvr_pds_write_constant32(buffer_base,
+                                     doutw_value_constant64,
+                                     program->dword_doutw_value[i]);
+            pvr_pds_write_constant32(
+               buffer_base,
+               doutw_control_constant32,
+               program->dword_doutw_control[i] |
+                  ((!program->num_dma_kicks && i == program->num_dword_doutw - 1)
+                      ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
+                      : 0));
+
+            doutw_value_constant64 += 1;
+            doutw_control_constant32 += 1;
+         }
+
+         buffer += total_size_dma + total_num_doutw;
+      }
+
+      if (program->num_dma_kicks == 1) /* Most-common case. */
+      {
+         /* Src0 for DOUTD - Address. */
+         pvr_pds_write_dma_address(buffer_base,
+                                   dma_address_constant64,
+                                   program->dma_address[0],
+                                   false,
+                                   dev_info);
+
+         /* Src1 for DOUTD - Control Word. */
+         pvr_pds_write_constant32(
+            buffer_base,
+            dma_control_constant32,
+            program->dma_control[0] |
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
+
+         /* Move the buffer ptr along as we will return 1 past the buffer. */
+         buffer += 3;
+      } else if (program->num_dma_kicks > 1) {
+         for (kick_index = 0; kick_index < program->num_dma_kicks - 1;
+              kick_index++) {
+            /* Src0 for DOUTD - Address. */
+            pvr_pds_write_dma_address(buffer_base,
+                                      dma_address_constant64,
+                                      program->dma_address[kick_index],
+                                      false,
+                                      dev_info);
+
+            /* Src1 for DOUTD - Control Word. */
+            pvr_pds_write_constant32(buffer_base,
+                                     dma_control_constant32,
+                                     program->dma_control[kick_index]);
+            dma_address_constant64 += 2;
+            dma_control_constant32 += 1;
+         }
+
+         /* Src0 for DOUTD - Address. */
+         pvr_pds_write_dma_address(buffer_base,
+                                   dma_address_constant64,
+                                   program->dma_address[kick_index],
+                                   false,
+                                   dev_info);
+
+         /* Src1 for DOUTD - Control Word. */
+         pvr_pds_write_constant32(
+            buffer_base,
+            dma_control_constant32,
+            program->dma_control[kick_index] |
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
+
+         buffer += 3 * program->num_dma_kicks;
+      }
+   } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+      if (program->clear_pds_barrier) {
+         /* Zero the persistent temp (SW fence for context switch). */
+         *buffer++ = pvr_pds_inst_encode_add64(
+            0, /* cc */
+            PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
+            PVR_ROGUE_PDSINST_MAD_SNA_ADD,
+            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+               (fence_constant_word >> 1), /* src0 = 0 */
+            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+               (fence_constant_word >> 1), /* src1 = 0 */
+            PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
+                                                            * ptemp[0]
+                                                            */
+      }
+
+      if (total_num_doutw > 0) {
+         for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
+            /* Set the coefficient register to data value. */
+            *buffer++ = pvr_pds_encode_doutw64(
+               /* cc */ 0,
+               /* END */ !program->num_dma_kicks && !program->kick_usc &&
+                  (i == total_num_doutw - 1),
+               /* SRC1 */ doutw_control_constant32,
+               /* SRC0 */ doutw_value_constant64 >> 1);
+
+            doutw_value_constant64 += 2;
+            doutw_control_constant32 += 1;
+         }
+
+         for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
+            /* Set the coefficient register to data value. */
+            *buffer++ = pvr_pds_encode_doutw64(
+               /* cc */ 0,
+               /* END */ !program->num_dma_kicks && !program->kick_usc &&
+                  (i == program->num_dword_doutw - 1),
+               /* SRC1 */ doutw_control_constant32,
+               /* SRC0 */ doutw_value_constant64 >> 1);
+
+            doutw_value_constant64 += 1;
+            doutw_control_constant32 += 1;
+         }
+      }
+
+      if (program->num_dma_kicks != 0) {
+         /* DMA the state into the secondary attributes. */
+
+         if (program->num_dma_kicks == 1) /* Most-common case. */
+         {
+            *buffer++ = pvr_pds_encode_doutd(
+               /* cc */ 0,
+               /* END */ !program->kick_usc,
+               /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit Src1 */
+               /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD 64-bit
+                                                         * Src0.
+                                                         */
+         } else {
+            for (kick_index = 0; kick_index < program->num_dma_kicks;
+                 kick_index++) {
+               *buffer++ = pvr_pds_encode_doutd(
+                  /* cc */ 0,
+                  /* END */ (!program->kick_usc) &&
+                     (kick_index + 1 == program->num_dma_kicks),
+                  /* SRC1 */ dma_control_constant32, /* DOUTD 32-bit
+                                                      * Src1.
+                                                      */
+                  /* SRC0 */ dma_address_constant64 >> 1); /* DOUTD
+                                                            * 64-bit
+                                                            * Src0.
+                                                            */
+               dma_address_constant64 += 2;
+               dma_control_constant32 += 1;
+            }
+         }
+      }
+
+      if (program->kick_usc) {
+         /* Kick the USC. */
+         *buffer++ = pvr_pds_encode_doutu(
+            /* cc */ 0,
+            /* END */ 1,
+            /* SRC0 */ usc_control_constant64 >> 1); /* DOUTU 64-bit Src0.
+                                                      */
+      }
+
+      if (!program->kick_usc && program->num_dma_kicks == 0 &&
+          total_num_doutw == 0) {
+         *buffer++ = pvr_pds_inst_encode_halt(0);
+      }
+   }
+
+   code_size = program->num_dma_kicks + total_num_doutw;
+   if (program->clear_pds_barrier)
+      code_size++; /* ADD64 instruction. */
+
+   if (program->kick_usc)
+      code_size++;
+
+   /* If there are no DMAs and no USC kick then code is HALT only. */
+   if (code_size == 0)
+      code_size = 1;
+
+   program->data_size = data_size;
+   program->code_size = code_size;
+
+   return buffer;
+}
+
+/**
+ * Writes the Uniform Data block for the PDS pixel shader secondary attributes
+ * program.
+ *
+ * \param program Pointer to the PDS pixel shader secondary attributes program.
+ * \param buffer Pointer to the buffer for the code/data.
+ * \param gen_mode Either code or data can be generated or sizes only updated.
+ * \returns Pointer to just beyond the buffer for the program/data.
+ */
+uint32_t *pvr_pds_pixel_shader_uniform_texture_code(
+   struct pvr_pds_pixel_shader_sa_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode)
+{
+   uint32_t *instruction;
+   uint32_t code_size = 0;
+   uint32_t data_size = 0;
+   uint32_t temps_used = 0;
+   uint32_t next_constant;
+
+   assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
+          0);
+
+   assert(gen_mode != PDS_GENERATE_DATA_SEGMENT);
+
+   /* clang-format off */
+   /* Shape of code segment (note: clear is different)
+    *
+    *      Code
+    *    +------------+
+    *    | BRA if0    |
+    *    | DOUTD      |
+    *    |  ...       |
+    *    | DOUTD.halt |
+    *    | uniform    |
+    *    | DOUTD      |
+    *    |  ...       |
+    *    |  ...       |
+    *    | DOUTW      |
+    *    |  ...       |
+    *    |  ...       |
+    *    | DOUTU.halt |
+    *    | HALT       |
+    *    +------------+
+    */
+   /* clang-format on */
+   instruction = buffer;
+
+   next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+
+   /* The clear color can arrive packed in the right form in the first (or
+    * first 2) dwords of the shared registers and the program will issue a
+    * single doutw for this.
+    */
+   if (program->clear && program->packed_clear) {
+      uint32_t color_constant1 =
+         pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+      uint32_t control_word_constant1 =
+         pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+      if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+         /* DOUTW the clear color to the USC constants. Predicate with
+          * uniform loading flag (IF0).
+          */
+         *instruction++ = pvr_pds_encode_doutw64(
+            /* cc */ 1, /* Only for uniform loading program. */
+            /* END */ program->kick_usc ? 0 : 1, /* Last
+                                                  * instruction
+                                                  * for a clear.
+                                                  */
+            /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
+            /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
+
+         code_size += 1;
+      }
+   } else if (program->clear) {
+      uint32_t color_constant1, color_constant2;
+
+      if (program->clear_color_dest_reg & 0x1) {
+         uint32_t color_constant3, control_word_constant1,
+            control_word_constant2, color_constant4;
+
+         color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
+         color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+         color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
+
+         control_word_constant1 =
+            pvr_pds_get_constants(&next_constant, 2, &data_size);
+         control_word_constant2 =
+            pvr_pds_get_constants(&next_constant, 2, &data_size);
+         color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+            /* DOUTW the clear color to the USSE constants. Predicate with
+             * uniform loading flag (IF0).
+             */
+            *instruction++ = pvr_pds_encode_doutw64(
+               /* cc */ 1, /* Only for Uniform Loading program */
+               /* END */ 0,
+               /* SRC1 */ control_word_constant1, /* DOUTW 32-bit Src1 */
+               /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
+
+            *instruction++ = pvr_pds_encode_doutw64(
+               /* cc */ 1, /* Only for Uniform Loading program */
+               /* END */ 0,
+               /* SRC1 */ control_word_constant2, /* DOUTW 32-bit Src1 */
+               /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
+
+            *instruction++ = pvr_pds_encode_doutw64(
+               /* cc */ 1, /* Only for uniform loading program */
+               /* END */ program->kick_usc ? 0 : 1, /* Last
+                                                     * instruction
+                                                     * for a clear.
+                                                     */
+               /* SRC1 */ color_constant4, /* DOUTW 32-bit Src1 */
+               /* SRC0 */ color_constant3 >> 1); /* DOUTW 64-bit Src0 */
+         }
+
+         code_size += 3;
+      } else {
+         uint32_t control_word_constant, control_word_last_constant;
+
+         /* Put the clear color and control words into the first 8
+          * constants.
+          */
+         color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+         color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+         control_word_constant =
+            pvr_pds_get_constants(&next_constant, 2, &data_size);
+         control_word_last_constant =
+            pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+            /* DOUTW the clear color to the USSE constants. Predicate with
+             * uniform loading flag (IF0).
+             */
+            *instruction++ = pvr_pds_encode_doutw64(
+               /* cc */ 1, /* Only for Uniform Loading program */
+               /* END */ 0,
+               /* SRC1 */ control_word_constant, /* DOUTW 32-bit Src1 */
+               /* SRC0 */ color_constant1 >> 1); /* DOUTW 64-bit Src0 */
+
+            *instruction++ = pvr_pds_encode_doutw64(
+               /* cc */ 1, /* Only for uniform loading program */
+               /* END */ program->kick_usc ? 0 : 1, /* Last
+                                                     * instruction
+                                                     * for a clear.
+                                                     */
+               /* SRC1 */ control_word_last_constant, /* DOUTW 32-bit Src1
+                                                       */
+               /* SRC0 */ color_constant2 >> 1); /* DOUTW 64-bit Src0 */
+         }
+
+         code_size += 2;
+      }
+
+      if (program->kick_usc) {
+         uint32_t doutu_constant64;
+
+         doutu_constant64 =
+            pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+            /* Issue the task to the USC.
+             *
+             * dout ds1[constant_use], ds0[constant_use],
+             * ds1[constant_use], emit
+             */
+            *instruction++ = pvr_pds_encode_doutu(
+               /* cc */ 0,
+               /* END */ 1,
+               /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0
+                                                   */
+         }
+
+         code_size += 1;
+      }
+
+      if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+         /* End the program. */
+         *instruction++ = pvr_pds_inst_encode_halt(0);
+      }
+      code_size += 1;
+   } else {
+      uint32_t total_num_doutw =
+         program->num_dword_doutw + program->num_q_word_doutw;
+      bool both_textures_and_uniforms =
+         ((program->num_texture_dma_kicks > 0) &&
+          ((program->num_uniform_dma_kicks > 0 || total_num_doutw > 0) ||
+           program->kick_usc));
+      uint32_t doutu_constant64 = 0;
+
+      if (both_textures_and_uniforms) {
+         /* If the size of a PDS data section is 0, the hardware won't run
+          * it. We therefore don't need to branch when there is only a
+          * texture OR a uniform update program.
+          */
+         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+            uint32_t branch_address =
+               MAX2(1 + program->num_texture_dma_kicks, 2);
+
+            /* Use If0 to BRAnch to uniform code. */
+            *instruction++ = pvr_pds_encode_bra(
+               /* SRCC */ PVR_ROGUE_PDSINST_PREDICATE_IF0,
+               /* NEG */ PVR_ROGUE_PDSINST_NEG_DISABLE,
+               /* SETC */ PVR_ROGUE_PDSINST_PREDICATE_KEEP,
+               /* ADDR */ branch_address);
+         }
+
+         code_size += 1;
+      }
+
+      if (program->num_texture_dma_kicks > 0) {
+         uint32_t dma_address_constant64;
+         uint32_t dma_control_constant32;
+         /* Allocate 3 constant spaces for each kick. The 64-bit constants
+          * come first followed by the 32-bit constants.
+          */
+         dma_address_constant64 = PVR_PDS_CONSTANTS_BLOCK_BASE;
+         dma_control_constant32 =
+            dma_address_constant64 + (program->num_texture_dma_kicks * 2);
+
+         for (uint32_t dma = 0; dma < program->num_texture_dma_kicks; dma++) {
+            code_size += 1;
+            if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction)
+               continue;
+
+            /* DMA the state into the secondary attributes. */
+            *instruction++ = pvr_pds_encode_doutd(
+               /* cc */ 0,
+               /* END */ dma == (program->num_texture_dma_kicks - 1),
+               /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1 */
+               /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
+                                                         * 64-bit
+                                                         * Src0
+                                                         */
+            dma_address_constant64 += 2;
+            dma_control_constant32 += 1;
+         }
+      } else if (both_textures_and_uniforms) {
+         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+            /* End the program. */
+            *instruction++ = pvr_pds_inst_encode_halt(0);
+         }
+
+         code_size += 1;
+      }
+
+      /* Reserve space at the beginning of the data segment for the DOUTU Task
+       * Control if one is needed.
+       */
+      if (program->kick_usc) {
+         doutu_constant64 =
+            pvr_pds_get_constants(&next_constant, 2, &data_size);
+      }
+
+      /* Allocate 3 constant spaces for each DMA and 2 for a USC kick. The
+       * 64-bit constants come first followed by the 32-bit constants.
+       */
+      uint32_t total_size_dma =
+         program->num_dword_doutw + 2 * program->num_q_word_doutw;
+
+      uint32_t dma_address_constant64 = pvr_pds_get_constants(
+         &next_constant,
+         program->num_uniform_dma_kicks * 3 + total_size_dma + total_num_doutw,
+         &data_size);
+      uint32_t doutw_value_constant64 =
+         dma_address_constant64 + program->num_uniform_dma_kicks * 2;
+      uint32_t dma_control_constant32 = doutw_value_constant64 + total_size_dma;
+      uint32_t doutw_control_constant32 =
+         dma_control_constant32 + program->num_uniform_dma_kicks;
+
+      if (total_num_doutw > 0) {
+         pvr_pds_get_constants(&next_constant, 0, &data_size);
+
+         if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+            for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
+               /* Set the coefficient register to data value. */
+               *instruction++ = pvr_pds_encode_doutw64(
+                  /* cc */ 0,
+                  /* END */ !program->num_uniform_dma_kicks &&
+                     !program->kick_usc && (i == total_num_doutw - 1),
+                  /* SRC1 */ doutw_control_constant32,
+                  /* SRC0 */ doutw_value_constant64 >> 1);
+
+               doutw_value_constant64 += 2;
+               doutw_control_constant32 += 1;
+            }
+
+            for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
+               /* Set the coefficient register to data value. */
+               *instruction++ = pvr_pds_encode_doutw64(
+                  /* cc */ 0,
+                  /* END */ !program->num_uniform_dma_kicks &&
+                     !program->kick_usc && (i == program->num_dword_doutw - 1),
+                  /* SRC1 */ doutw_control_constant32,
+                  /* SRC0 */ doutw_value_constant64 >> 1);
+
+               doutw_value_constant64 += 1;
+               doutw_control_constant32 += 1;
+            }
+         }
+         code_size += total_num_doutw;
+      }
+
+      if (program->num_uniform_dma_kicks > 0) {
+         for (uint32_t dma = 0; dma < program->num_uniform_dma_kicks; dma++) {
+            code_size += 1;
+
+            if (gen_mode != PDS_GENERATE_CODE_SEGMENT || !instruction)
+               continue;
+
+            bool last_instruction = false;
+            if (!program->kick_usc &&
+                (dma == program->num_uniform_dma_kicks - 1)) {
+               last_instruction = true;
+            }
+            /* DMA the state into the secondary attributes. */
+            *instruction++ = pvr_pds_encode_doutd(
+               /* cc */ 0,
+               /* END */ last_instruction,
+               /* SRC1 */ dma_control_constant32, /* DOUT 32-bit Src1
+                                                   */
+               /* SRC0 */ dma_address_constant64 >> 1); /* DOUT
+                                                         * 64-bit
+                                                         * Src0
+                                                         */
+            dma_address_constant64 += 2;
+            dma_control_constant32 += 1;
+         }
+      }
+
+      if (program->kick_usc) {
+         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+            /* Issue the task to the USC.
+             *
+             * dout ds1[constant_use], ds0[constant_use],
+             * ds1[constant_use], emit
+             */
+
+            *instruction++ = pvr_pds_encode_doutu(
+               /* cc */ 0,
+               /* END */ 1,
+               /* SRC0 */ doutu_constant64 >> 1); /* DOUTU 64-bit Src0 */
+         }
+
+         code_size += 1;
+      } else if (program->num_uniform_dma_kicks == 0 && total_num_doutw == 0) {
+         if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+            /* End the program. */
+            *instruction++ = pvr_pds_inst_encode_halt(0);
+         }
+
+         code_size += 1;
+      }
+   }
+
+   /* Minimum temp count is 1. */
+   program->temps_used = MAX2(temps_used, 1);
+   program->code_size = code_size;
+
+   if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
+      return instruction;
+   else
+      return NULL;
+}
+
+/**
+ * Writes the Uniform Data block for the PDS pixel shader secondary attributes
+ * program.
+ *
+ * \param program Pointer to the PDS pixel shader secondary attributes program.
+ * \param buffer Pointer to the buffer for the code/data.
+ * \param gen_mode Either code or data can be generated or sizes only updated.
+ * \param dev_info PVR device information struct.
+ * \returns Pointer to just beyond the buffer for the program/data.
+ */
+uint32_t *pvr_pds_pixel_shader_uniform_texture_data(
+   struct pvr_pds_pixel_shader_sa_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   bool uniform,
+   const struct pvr_device_info *dev_info)
+{
+   uint32_t *constants = buffer;
+   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+   uint32_t temps_used = 0;
+   uint32_t data_size = 0;
+
+   assert((((uintptr_t)buffer) & (PDS_ROGUE_TA_STATE_PDS_ADDR_ALIGNSIZE - 1)) ==
+          0);
+
+   assert(gen_mode != PDS_GENERATE_CODE_SEGMENT);
+
+   /* Shape of data segment (note: clear is different).
+    *
+    *        Uniform            Texture
+    *    +--------------+   +-------------+
+    *    | USC Task   L |   | USC Task  L |
+    *    |            H |   |           H |
+    *    | DMA1 Src0  L |   | DMA1 Src0 L |
+    *    |            H |   |           H |
+    *    | DMA2 Src0  L |   |             |
+    *    |            H |   |             |
+    *    | DMA1 Src1    |   | DMA1 Src1   |
+    *    | DMA2 Src1    |   |             |
+    *    | DOUTW0 Src1  |   |             |
+    *    | DOUTW1 Src1  |   |             |
+    *    |   ...        |   |             |
+    *    | DOUTWn Srcn  |   |             |
+    *    | other data   |   |             |
+    *    +--------------+   +-------------+
+    */
+
+   /* Generate the PDS pixel shader secondary attributes data.
+    *
+    * Packed Clear
+    * The clear color can arrive packed in the right form in the first (or
+    * first 2) dwords of the shared registers and the program will issue a
+    * single DOUTW for this.
+    */
+   if (program->clear && uniform && program->packed_clear) {
+      uint32_t color_constant1 =
+         pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+      uint32_t control_word_constant1 =
+         pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+      if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+         uint32_t doutw;
+
+         pvr_pds_write_constant64(constants,
+                                  color_constant1,
+                                  program->clear_color[0],
+                                  program->clear_color[1]);
+
+         /* Load into first constant in common store. */
+         doutw = pvr_pds_encode_doutw_src1(
+            program->clear_color_dest_reg,
+            PVR_PDS_DOUTW_LOWER64,
+            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+            false,
+            dev_info);
+
+         /* Set the last flag. */
+         doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+         pvr_pds_write_constant64(constants, control_word_constant1, doutw, 0);
+      }
+   } else if (program->clear && uniform) {
+      uint32_t color_constant1, color_constant2;
+
+      if (program->clear_color_dest_reg & 0x1) {
+         uint32_t color_constant3, control_word_constant1,
+            control_word_constant2, color_constant4;
+
+         color_constant1 = pvr_pds_get_constants(&next_constant, 1, &data_size);
+         color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+         color_constant3 = pvr_pds_get_constants(&next_constant, 1, &data_size);
+
+         control_word_constant1 =
+            pvr_pds_get_constants(&next_constant, 2, &data_size);
+         control_word_constant2 =
+            pvr_pds_get_constants(&next_constant, 2, &data_size);
+         color_constant4 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+            uint32_t doutw;
+
+            pvr_pds_write_constant32(constants,
+                                     color_constant1,
+                                     program->clear_color[0]);
+
+            pvr_pds_write_constant64(constants,
+                                     color_constant2,
+                                     program->clear_color[1],
+                                     program->clear_color[2]);
+
+            pvr_pds_write_constant32(constants,
+                                     color_constant3,
+                                     program->clear_color[3]);
+
+            /* Load into first constant in common store. */
+            doutw = pvr_pds_encode_doutw_src1(
+               program->clear_color_dest_reg,
+               PVR_PDS_DOUTW_LOWER32,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+               false,
+               dev_info);
+
+            pvr_pds_write_constant64(constants,
+                                     control_word_constant1,
+                                     doutw,
+                                     0);
+
+            /* Move the destination register along. */
+            doutw = pvr_pds_encode_doutw_src1(
+               program->clear_color_dest_reg + 1,
+               PVR_PDS_DOUTW_LOWER64,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+               false,
+               dev_info);
+
+            pvr_pds_write_constant64(constants,
+                                     control_word_constant2,
+                                     doutw,
+                                     0);
+
+            /* Move the destination register along. */
+            doutw = pvr_pds_encode_doutw_src1(
+               program->clear_color_dest_reg + 3,
+               PVR_PDS_DOUTW_LOWER32,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+               false,
+               dev_info);
+
+            /* Set the last flag. */
+            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+            pvr_pds_write_constant64(constants, color_constant4, doutw, 0);
+         }
+      } else {
+         uint32_t control_word_constant, control_word_last_constant;
+
+         /* Put the clear color and control words into the first 8
+          * constants.
+          */
+         color_constant1 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+         color_constant2 = pvr_pds_get_constants(&next_constant, 2, &data_size);
+         control_word_constant =
+            pvr_pds_get_constants(&next_constant, 2, &data_size);
+         control_word_last_constant =
+            pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+            uint32_t doutw;
+            pvr_pds_write_constant64(constants,
+                                     color_constant1,
+                                     program->clear_color[0],
+                                     program->clear_color[1]);
+
+            pvr_pds_write_constant64(constants,
+                                     color_constant2,
+                                     program->clear_color[2],
+                                     program->clear_color[3]);
+
+            /* Load into first constant in common store. */
+            doutw = pvr_pds_encode_doutw_src1(
+               program->clear_color_dest_reg,
+               PVR_PDS_DOUTW_LOWER64,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+               false,
+               dev_info);
+
+            pvr_pds_write_constant64(constants, control_word_constant, doutw, 0);
+
+            /* Move the destination register along. */
+            doutw &= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_CLRMSK;
+            doutw |= (program->clear_color_dest_reg + 2)
+                     << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
+
+            /* Set the last flag. */
+            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+            pvr_pds_write_constant64(constants,
+                                     control_word_last_constant,
+                                     doutw,
+                                     0);
+         }
+      }
+
+      /* Constants for the DOUTU Task Control, if needed. */
+      if (program->kick_usc) {
+         uint32_t doutu_constant64 =
+            pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+            pvr_pds_write_wide_constant(
+               constants,
+               doutu_constant64,
+               program->usc_task_control.src0); /* 64-bit
+                                                 */
+            /* Src0 */
+         }
+      }
+   } else {
+      if (uniform) {
+         /* Reserve space at the beginning of the data segment for the DOUTU
+          * Task Control if one is needed.
+          */
+         if (program->kick_usc) {
+            uint32_t doutu_constant64 =
+               pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+            if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+               pvr_pds_write_wide_constant(
+                  constants,
+                  doutu_constant64,
+                  program->usc_task_control.src0); /* 64-bit Src0 */
+            }
+         }
+
+         uint32_t total_num_doutw =
+            program->num_dword_doutw + program->num_q_word_doutw;
+         uint32_t total_size_dma =
+            program->num_dword_doutw + 2 * program->num_q_word_doutw;
+
+         /* Allocate 3 constant spaces for each kick. The 64-bit constants
+          * come first followed by the 32-bit constants.
+          */
+         uint32_t dma_address_constant64 =
+            pvr_pds_get_constants(&next_constant,
+                                  program->num_uniform_dma_kicks * 3 +
+                                     total_size_dma + total_num_doutw,
+                                  &data_size);
+         uint32_t doutw_value_constant64 =
+            dma_address_constant64 + program->num_uniform_dma_kicks * 2;
+         uint32_t dma_control_constant32 =
+            doutw_value_constant64 + total_size_dma;
+         uint32_t doutw_control_constant32 =
+            dma_control_constant32 + program->num_uniform_dma_kicks;
+
+         if (total_num_doutw > 0) {
+            if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+               for (uint32_t i = 0; i < program->num_q_word_doutw; i++) {
+                  pvr_pds_write_constant64(
+                     constants,
+                     doutw_value_constant64,
+                     program->q_word_doutw_value[2 * i],
+                     program->q_word_doutw_value[2 * i + 1]);
+                  pvr_pds_write_constant32(
+                     constants,
+                     doutw_control_constant32,
+                     program->q_word_doutw_control[i] |
+                        ((!program->num_uniform_dma_kicks &&
+                          i == total_num_doutw - 1)
+                            ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
+                            : 0));
+
+                  doutw_value_constant64 += 2;
+                  doutw_control_constant32 += 1;
+               }
+
+               for (uint32_t i = 0; i < program->num_dword_doutw; i++) {
+                  pvr_pds_write_constant32(constants,
+                                           doutw_value_constant64,
+                                           program->dword_doutw_value[i]);
+                  pvr_pds_write_constant32(
+                     constants,
+                     doutw_control_constant32,
+                     program->dword_doutw_control[i] |
+                        ((!program->num_uniform_dma_kicks &&
+                          i == program->num_dword_doutw - 1)
+                            ? PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN
+                            : 0));
+
+                  doutw_value_constant64 += 1;
+                  doutw_control_constant32 += 1;
+               }
+            }
+         }
+
+         if (program->num_uniform_dma_kicks > 0) {
+            uint32_t kick;
+
+            if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+               for (kick = 0; kick < program->num_uniform_dma_kicks - 1;
+                    kick++) {
+                  /* Copy the dma control words to constants. */
+                  pvr_pds_write_dma_address(constants,
+                                            dma_address_constant64,
+                                            program->uniform_dma_address[kick],
+                                            false,
+                                            dev_info);
+                  pvr_pds_write_constant32(constants,
+                                           dma_control_constant32,
+                                           program->uniform_dma_control[kick]);
+
+                  dma_address_constant64 += 2;
+                  dma_control_constant32 += 1;
+               }
+
+               pvr_pds_write_dma_address(constants,
+                                         dma_address_constant64,
+                                         program->uniform_dma_address[kick],
+                                         false,
+                                         dev_info);
+               pvr_pds_write_constant32(
+                  constants,
+                  dma_control_constant32,
+                  program->uniform_dma_control[kick] |
+                     PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
+            }
+         }
+
+      } else if (program->num_texture_dma_kicks > 0) {
+         /* Allocate 3 constant spaces for each kick. The 64-bit constants
+          * come first followed by the 32-bit constants.
+          */
+         uint32_t dma_address_constant64 =
+            pvr_pds_get_constants(&next_constant,
+                                  program->num_texture_dma_kicks * 3,
+                                  &data_size);
+         uint32_t dma_control_constant32 =
+            dma_address_constant64 + (program->num_texture_dma_kicks * 2);
+
+         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+            uint32_t kick;
+            for (kick = 0; kick < program->num_texture_dma_kicks - 1; kick++) {
+               /* Copy the DMA control words to constants. */
+               pvr_pds_write_dma_address(constants,
+                                         dma_address_constant64,
+                                         program->texture_dma_address[kick],
+                                         false,
+                                         dev_info);
+
+               pvr_pds_write_constant32(constants,
+                                        dma_control_constant32,
+                                        program->texture_dma_control[kick]);
+
+               dma_address_constant64 += 2;
+               dma_control_constant32 += 1;
+            }
+
+            pvr_pds_write_dma_address(constants,
+                                      dma_address_constant64,
+                                      program->texture_dma_address[kick],
+                                      false,
+                                      dev_info);
+
+            pvr_pds_write_constant32(
+               constants,
+               dma_control_constant32,
+               program->texture_dma_control[kick] |
+                  PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN);
+         }
+      }
+   }
+
+   /* Save the data segment pointer and size. */
+   program->data_segment = constants;
+
+   /* Minimum temp count is 1. */
+   program->temps_used = MAX2(temps_used, 1);
+   program->data_size = data_size;
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
+      return (constants + next_constant);
+   else
+      return NULL;
+}
+
+/**
+ * Generates generic DOUTC PDS program.
+ *
+ * \param program Pointer to the PDS kick USC.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Either code and data can be generated, or sizes only updated.
+ * \returns Pointer to just beyond the buffer for the code or program segment.
+ */
+uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program,
+                                 uint32_t *restrict buffer,
+                                 enum pvr_pds_generate_mode gen_mode)
+{
+   uint32_t constant = 0;
+
+   /* Automatically get a data size of 1x 128bit chunks. */
+   uint32_t data_size = 0, code_size = 0;
+
+   /* Setup the data part. */
+   uint32_t *constants = buffer; /* Constants placed at front of buffer. */
+   uint32_t *instruction = buffer;
+   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
+                                                           * dwords.
+                                                           */
+
+   /* Update the program sizes. */
+   program->data_size = data_size;
+   program->code_size = code_size;
+   program->data_segment = constants;
+
+   if (gen_mode == PDS_GENERATE_SIZES)
+      return NULL;
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+      /* Copy the USC task control words to constants. */
+
+      constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
+      pvr_pds_write_wide_constant(constants, constant + 0, 0); /* 64-bit
+                                                                * Src0
+                                                                */
+
+      uint32_t control_word_constant =
+         pvr_pds_get_constants(&next_constant, 2, &data_size);
+      pvr_pds_write_constant64(constants, control_word_constant, 0, 0); /* 32-bit
+                                                                         * Src1
+                                                                         */
+
+      program->data_size = data_size;
+      buffer += data_size;
+
+      return buffer;
+   } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+      *instruction++ = pvr_pds_inst_encode_doutc(
+         /* cc */ 0,
+         /* END */ 0);
+
+      code_size++;
+
+      /* End the program. */
+      *instruction++ = pvr_pds_inst_encode_halt(0);
+      code_size++;
+
+      program->code_size = code_size;
+   }
+
+   return instruction;
+}
+
+/**
+ * Generates generic kick DOUTU PDS program in a single data+code block.
+ *
+ * \param control Pointer to the PDS kick USC.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \param dev_info PVR device information structure.
+ * \returns Pointer to just beyond the buffer for the code or program segment.
+ */
+uint32_t *pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict control,
+                                 uint32_t *restrict buffer,
+                                 enum pvr_pds_generate_mode gen_mode,
+                                 const struct pvr_device_info *dev_info)
+{
+   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+   uint32_t doutw;
+   uint32_t data_size = 0, code_size = 0;
+   uint32_t constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
+   uint32_t control_word_constant[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
+
+   /* Assert if buffer is exceeded. */
+   assert(control->num_const64 <= PVR_PDS_MAX_NUM_DOUTW_CONSTANTS);
+
+   uint32_t *constants = buffer;
+   uint32_t *instruction = buffer;
+
+   /* Put the constants and control words interleaved in the data region. */
+   for (uint32_t const_pair = 0; const_pair < control->num_const64;
+        const_pair++) {
+      constant[const_pair] =
+         pvr_pds_get_constants(&next_constant, 2, &data_size);
+      control_word_constant[const_pair] =
+         pvr_pds_get_constants(&next_constant, 2, &data_size);
+   }
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+      /* Data segment points to start of constants. */
+      control->data_segment = constants;
+
+      for (uint32_t const_pair = 0; const_pair < control->num_const64;
+           const_pair++) {
+         pvr_pds_write_constant64(constants,
+                                  constant[const_pair],
+                                  H32(control->doutw_data[const_pair]),
+                                  L32(control->doutw_data[const_pair]));
+
+         /* Start loading at offset 0. */
+         if (control->dest_store == PDS_COMMON_STORE) {
+            doutw = pvr_pds_encode_doutw_src1(
+               (2 * const_pair),
+               PVR_PDS_DOUTW_LOWER64,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE,
+               false,
+               dev_info);
+         } else {
+            doutw = pvr_pds_encode_doutw_src1(
+               (2 * const_pair),
+               PVR_PDS_DOUTW_LOWER64,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+               false,
+               dev_info);
+         }
+
+         if (const_pair + 1 == control->num_const64) {
+            /* Set the last flag for the MCU (assume there are no following
+             * DOUTD's).
+             */
+            doutw |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+         }
+         pvr_pds_write_constant64(constants,
+                                  control_word_constant[const_pair],
+                                  doutw,
+                                  0);
+      }
+
+      control->data_size = data_size;
+   } else if (gen_mode == PDS_GENERATE_CODE_SEGMENT && instruction) {
+      /* Code section. */
+
+      for (uint32_t const_pair = 0; const_pair < control->num_const64;
+           const_pair++) {
+         /* DOUTW the PDS data to the USC constants. */
+         *instruction++ = pvr_pds_encode_doutw64(
+            /* cc */ 0,
+            /* END */ control->last_instruction &&
+               (const_pair + 1 == control->num_const64),
+            /* SRC1 */ control_word_constant[const_pair], /* DOUTW 32-bit
+                                                           * Src1.
+                                                           */
+            /* SRC0 */ constant[const_pair] >> 1); /* DOUTW 64-bit Src0. */
+
+         code_size++;
+      }
+
+      if (control->last_instruction) {
+         /* End the program. */
+         *instruction++ = pvr_pds_inst_encode_halt(0);
+         code_size++;
+      }
+
+      control->code_size = code_size;
+   }
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
+      return (constants + next_constant);
+   else
+      return instruction;
+}
+
+/**
+ * Generates generic kick DOUTU PDS program in a single data+code block.
+ *
+ * \param program Pointer to the PDS kick USC.
+ * \param buffer Pointer to the buffer for the program.
+ * \param start_next_constant Next constant in data segment. Non-zero if another
+ *                            instruction precedes the DOUTU.
+ * \param cc_enabled If true then the DOUTU is predicated (cc set).
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \returns Pointer to just beyond the buffer for the code or program segment.
+ */
+uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program,
+                           uint32_t *restrict buffer,
+                           uint32_t start_next_constant,
+                           bool cc_enabled,
+                           enum pvr_pds_generate_mode gen_mode)
+{
+   uint32_t constant = 0;
+
+   /* Automatically get a data size of 2 128bit chunks. */
+   uint32_t data_size = ROGUE_PDS_FIXED_PIXEL_SHADER_DATA_SIZE;
+   uint32_t code_size = 1; /* Single doutu */
+   uint32_t dummy_count = 0;
+
+   /* Setup the data part. */
+   uint32_t *constants = buffer; /* Constants placed at front of buffer. */
+   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE; /* Constants count in
+                                                           * dwords.
+                                                           */
+
+   /* Update the program sizes. */
+   program->data_size = data_size;
+   program->code_size = code_size;
+   program->data_segment = constants;
+
+   if (gen_mode == PDS_GENERATE_SIZES)
+      return NULL;
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT ||
+       gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
+      /* Copy the USC task control words to constants. */
+
+      constant = pvr_pds_get_constants(&next_constant, 2, &dummy_count);
+
+      pvr_pds_write_wide_constant(constants,
+                                  constant + 0,
+                                  program->usc_task_control.src0); /* 64-bit
+                                                                    * Src0.
+                                                                    */
+      buffer += data_size;
+
+      if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
+         return buffer;
+   }
+
+   if (gen_mode == PDS_GENERATE_CODE_SEGMENT ||
+       gen_mode == PDS_GENERATE_CODEDATA_SEGMENTS) {
+      /* Generate the PDS pixel shader code. */
+
+      /* Setup the instruction pointer. */
+      uint32_t *instruction = buffer;
+
+      /* Issue the task to the USC.
+       *
+       * dout ds1[constant_use], ds0[constant_use], ds1[constant_use], emit ;
+       * halt halt
+       */
+
+      *instruction++ = pvr_pds_encode_doutu(
+         /* cc */ cc_enabled,
+         /* END */ 1,
+         /* SRC0 */ (constant + start_next_constant) >> 1); /* DOUTU
+                                                             * 64-bit Src0
+                                                             */
+
+      /* Return pointer to just after last instruction. */
+      return instruction;
+   }
+
+   /* Execution should never reach here; keep compiler happy. */
+   return NULL;
+}
+
+uint32_t *pvr_pds_generate_compute_barrier_conditional(
+   uint32_t *buffer,
+   enum pvr_pds_generate_mode gen_mode)
+{
+   /* Compute barriers supported. Need to test for coeff sync task. */
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
+      return buffer; /* No data segment. */
+
+   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+      /* Test whether this is the coefficient update task or not. */
+      *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
+                                                                       */
+                                     PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
+                                                                         */
+                                     PVR_ROGUE_PDSINST_PREDICATE_IF1, /* SETC
+                                                                       */
+                                     1 /* ADDR */);
+
+      /* Encode a HALT. */
+      *buffer++ = pvr_pds_inst_encode_halt(1);
+
+      /* Reset the default predicate to IF0. */
+      *buffer++ = pvr_pds_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SRCC
+                                                                       */
+                                     PVR_ROGUE_PDSINST_BRA_NEG_DISABLE, /* NEG
+                                                                         */
+                                     PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETC
+                                                                       */
+                                     1 /* ADDR */);
+   }
+
+   return buffer;
+}
+
+/**
+ * Generates program to kick the USC task to store shared.
+ *
+ * \param program Pointer to the PDS shared register.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \param dev_info PVR device information structure.
+ * \returns Pointer to just beyond the buffer for the program.
+ */
+uint32_t *pvr_pds_generate_shared_storing_program(
+   struct pvr_pds_shared_storing_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info)
+{
+   struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
+   struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
+
+   if (gen_mode == PDS_GENERATE_SIZES)
+      return NULL;
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+      uint32_t *constants = buffer;
+
+      constants =
+         pvr_pds_generate_doutw(doutw_control, constants, gen_mode, dev_info);
+      program->data_size = doutw_control->data_size;
+
+      constants = pvr_pds_kick_usc(kick_usc_program,
+                                   constants,
+                                   0,
+                                   program->cc_enable,
+                                   gen_mode);
+      program->data_size += kick_usc_program->data_size;
+
+      return constants;
+   }
+
+   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+      /* Generate PDS code segment. */
+      uint32_t *instruction = buffer;
+
+      /* doutw vi1, vi0
+       * doutu ds1[constant_use], ds0[constant_use], ds1[constant_use],
+       * emit
+       */
+      instruction =
+         pvr_pds_generate_doutw(doutw_control, buffer, gen_mode, dev_info);
+      program->code_size = doutw_control->code_size;
+
+      /* Offset into data segment follows on from doutw data segment. */
+      instruction = pvr_pds_kick_usc(kick_usc_program,
+                                     instruction,
+                                     doutw_control->data_size,
+                                     program->cc_enable,
+                                     gen_mode);
+      program->code_size += kick_usc_program->code_size;
+
+      return instruction;
+   }
+
+   /* Execution should never reach here. */
+   return NULL;
+}
+
+uint32_t *pvr_pds_generate_fence_terminate_program(
+   struct pvr_pds_fence_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info)
+{
+   uint32_t data_size = 0;
+   uint32_t code_size = 0;
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+      /* Data segment. */
+      uint32_t *constants, *constants_base;
+
+      constants = constants_base = (uint32_t *)buffer;
+
+      /* DOUTC sources are not used, but they must be valid. */
+      pvr_pds_generate_doutc(program, constants, PDS_GENERATE_DATA_SEGMENT);
+      data_size += program->data_size;
+
+      if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
+         /* Append a 64-bit constant with value 1. Used to increment ptemp.
+          * Return the offset into the data segment.
+          */
+         program->fence_constant_word =
+            pvr_pds_append_constant64(constants_base, 1, &data_size);
+      }
+
+      program->data_size = data_size;
+      return constants;
+   }
+
+   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+      /* Code segment. */
+      uint32_t *instruction = (uint32_t *)buffer;
+
+      instruction = pvr_pds_generate_compute_barrier_conditional(
+         instruction,
+         PDS_GENERATE_CODE_SEGMENT);
+      code_size += 3;
+
+      if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
+         /* lock */
+         *instruction++ = pvr_pds_inst_encode_lock(0); /* cc */
+
+         /* add64      pt[0], pt[0], #1 */
+         *instruction++ = pvr_pds_inst_encode_add64(
+            0, /* cc */
+            PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
+            PVR_ROGUE_PDSINST_MAD_SNA_ADD,
+            PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER + 0, /* src0 = ptemp[0]
+                                                         */
+            PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+               (program->fence_constant_word >> 1), /* src1 = 1 */
+            PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest =
+                                                            * ptemp[0]
+                                                            */
+
+         /* release */
+         *instruction++ = pvr_pds_inst_encode_release(0); /* cc */
+
+         /* cmp                pt[0] EQ 0x4 == Number of USC clusters per phantom */
+         *instruction++ = pvr_pds_inst_encode_cmpi(
+            0, /* cc */
+            PVR_ROGUE_PDSINST_COP_EQ,
+            PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0, /* src0
+                                                           * = ptemp[0]
+                                                           */
+            PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0));
+
+         /* bra                -1 */
+         *instruction++ =
+            pvr_pds_encode_bra(0, /* cc */
+                               1, /* PVR_ROGUE_PDSINST_BRA_NEG_ENABLE
+                                   */
+                               0, /* PVR_ROGUE_PDSINST_BRA_SETC_P0
+                                   */
+                               -1); /* bra PC */
+         code_size += 5;
+      }
+
+      /* DOUTC */
+      instruction = pvr_pds_generate_doutc(program,
+                                           instruction,
+                                           PDS_GENERATE_CODE_SEGMENT);
+      code_size += program->code_size;
+
+      program->code_size = code_size;
+      return instruction;
+   }
+
+   /* Execution should never reach here. */
+   return NULL;
+}
+
+/**
+ * Generates program to kick the USC task to load shared registers from memory.
+ *
+ * \param program Pointer to the PDS shared register.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \param dev_info PVR device information struct.
+ * \returns Pointer to just beyond the buffer for the program.
+ */
+uint32_t *pvr_pds_generate_compute_shared_loading_program(
+   struct pvr_pds_shared_storing_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info)
+{
+   struct pvr_pds_kickusc_program *kick_usc_program = &program->usc_task;
+   struct pvr_pds_doutw_control *doutw_control = &program->doutw_control;
+
+   uint32_t next_constant;
+   uint32_t data_size = 0;
+   uint32_t code_size = 0;
+
+   /* This needs to persist to the CODE_SEGMENT call. */
+   static uint32_t fence_constant_word = 0;
+   uint64_t zero_constant64 = 0;
+
+   if (gen_mode == PDS_GENERATE_SIZES)
+      return NULL;
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+      uint32_t *constants = buffer;
+
+      constants = pvr_pds_generate_doutw(doutw_control,
+                                         constants,
+                                         PDS_GENERATE_DATA_SEGMENT,
+                                         dev_info);
+      data_size += doutw_control->data_size;
+
+      constants = pvr_pds_kick_usc(kick_usc_program,
+                                   constants,
+                                   0,
+                                   program->cc_enable,
+                                   gen_mode);
+      data_size += kick_usc_program->data_size;
+
+      /* Copy the fence constant value (64-bit). */
+      next_constant = data_size; /* Assumes data words fully packed. */
+      fence_constant_word =
+         pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+      /* Encode the fence constant src0 (offset measured from start of data
+       * buffer). Fence barrier is initialized to zero.
+       */
+      pvr_pds_write_wide_constant(buffer, fence_constant_word, zero_constant64);
+      /* Update the const size. */
+      data_size += 2;
+      constants += 2;
+
+      program->data_size = data_size;
+      return constants;
+   }
+
+   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+      /* Generate PDS code segment. */
+      uint32_t *instruction = buffer;
+
+      /* add64 pt0, c0, c0
+       * IF [2x Phantoms]
+       * add64 pt1, c0, c0
+       * st            [constant_mem_addr], pt0, 4
+       * ENDIF
+       * doutw vi1, vi0
+       * doutu ds1[constant_use], ds0[constant_use], ds1[constant_use],
+       * emit
+       *
+       * Zero the persistent temp (SW fence for context switch).
+       */
+      *instruction++ = pvr_pds_inst_encode_add64(
+         0, /* cc */
+         PVR_ROGUE_PDSINST_ALUM_UNSIGNED,
+         PVR_ROGUE_PDSINST_MAD_SNA_ADD,
+         PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+            (fence_constant_word >> 1), /* src0
+                                         *  = 0
+                                         */
+         PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER +
+            (fence_constant_word >> 1), /* src1
+                                         * = 0
+                                         */
+         PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER + 0); /* dest = ptemp64[0]
+                                                         */
+      code_size++;
+
+      instruction = pvr_pds_generate_doutw(doutw_control,
+                                           instruction,
+                                           PDS_GENERATE_CODE_SEGMENT,
+                                           dev_info);
+      code_size += doutw_control->code_size;
+
+      /* Offset into data segment follows on from doutw data segment. */
+      instruction = pvr_pds_kick_usc(kick_usc_program,
+                                     instruction,
+                                     doutw_control->data_size,
+                                     program->cc_enable,
+                                     gen_mode);
+      code_size += kick_usc_program->code_size;
+
+      program->code_size = code_size;
+      return instruction;
+   }
+
+   /* Execution should never reach here. */
+   return NULL;
+}
+
+/**
+ * Generates both code and data when gen_mode is not PDS_GENERATE_SIZES.
+ * Relies on num_fpu_iterators being initialized for size calculation.
+ * Relies on num_fpu_iterators, destination[], and FPU_iterators[] being
+ * initialized for program generation.
+ *
+ * \param program Pointer to the PDS pixel shader program.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \returns Pointer to just beyond the buffer for the program.
+ */
+uint32_t *pvr_pds_coefficient_loading(
+   struct pvr_pds_coeff_loading_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode)
+{
+   uint32_t constant;
+   uint32_t *instruction;
+   uint32_t total_data_size, code_size;
+
+   /* Place constants at the front of the buffer. */
+   uint32_t *constants = buffer;
+   /* Start counting constants from 0. */
+   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+
+   /* Save the data segment pointer and size. */
+   program->data_segment = constants;
+
+   total_data_size = 0;
+   code_size = 0;
+
+   total_data_size += 2 * program->num_fpu_iterators;
+   code_size += program->num_fpu_iterators;
+
+   /* Instructions start where constants finished, but we must take note of
+    * alignment.
+    *
+    * 128-bit boundary = 4 dwords.
+    */
+   total_data_size = ALIGN_POT(total_data_size, 4);
+   if (gen_mode != PDS_GENERATE_SIZES) {
+      uint32_t data_size = 0;
+      uint32_t iterator = 0;
+
+      instruction = buffer + total_data_size;
+
+      while (iterator < program->num_fpu_iterators) {
+         uint64_t iterator_word;
+
+         /* Copy the USC task control words to constants. */
+         constant = pvr_pds_get_constants(&next_constant, 2, &data_size);
+
+         /* Write the first iterator. */
+         iterator_word =
+            (uint64_t)program->FPU_iterators[iterator]
+            << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHIFT;
+
+         /* Write the destination. */
+         iterator_word |=
+            (uint64_t)program->destination[iterator++]
+            << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_SHIFT;
+
+         /* If this is the last DOUTI word the "Last Issue" bit should be
+          * set.
+          */
+         if (iterator >= program->num_fpu_iterators) {
+            iterator_word |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_EN;
+         }
+
+         /* Write the word to the buffer. */
+         pvr_pds_write_wide_constant(constants,
+                                     constant,
+                                     iterator_word); /* 64-bit
+                                                        Src0
+                                                      */
+
+         /* Write the DOUT instruction. */
+         *instruction++ = pvr_pds_encode_douti(
+            /* cc */ 0,
+            /* END */ 0,
+            /* SRC0 */ constant >> 1); /* DOUT Issue word 0 64-bit */
+      }
+
+      /* Update the last DOUTI instruction to have the END flag set. */
+      *(instruction - 1) |= 1 << PVR_ROGUE_PDSINST_DOUT_END_SHIFT;
+   } else {
+      instruction = NULL;
+   }
+
+   /* Update the data size and code size. Minimum temp count is 1. */
+   program->temps_used = 1;
+   program->data_size = total_data_size;
+   program->code_size = code_size;
+
+   return instruction;
+}
+
+/**
+ * Generate a single ld/st instruction. This can correspond to one or more
+ * real ld/st instructions based on the value of count.
+ *
+ * \param ld true to generate load, false to generate store.
+ * \param control Cache mode control.
+ * \param temp_index Dest temp for load/source temp for store, in 32bits
+ *                   register index.
+ * \param address Source for load/dest for store in bytes.
+ * \param count Number of dwords for load/store.
+ * \param next_constant
+ * \param total_data_size
+ * \param total_code_size
+ * \param buffer Pointer to the buffer for the program.
+ * \param data_fence Issue data fence.
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \param dev_info PVR device information structure.
+ * \returns Pointer to just beyond the buffer for the program.
+ */
+uint32_t *pvr_pds_generate_single_ldst_instruction(
+   bool ld,
+   const struct pvr_pds_ldst_control *control,
+   uint32_t temp_index,
+   uint64_t address,
+   uint32_t count,
+   uint32_t *next_constant,
+   uint32_t *total_data_size,
+   uint32_t *total_code_size,
+   uint32_t *restrict buffer,
+   bool data_fence,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info)
+{
+   /* A single ld/ST here does NOT actually correspond to a single ld/ST
+    * instruction, but may needs multiple ld/ST instructions because each ld/ST
+    * instruction can only ld/ST a restricted max number of dwords which may
+    * less than count passed here.
+    */
+
+   uint32_t num_inst;
+   uint32_t constant;
+
+   if (ld) {
+      /* ld must operate on 64bits unit, and it needs to load from and to 128
+       * bits aligned. Apart from the last ld, all the other need to ld 2x(x =
+       * 1, 2, ...) times 64bits unit.
+       */
+      uint32_t per_inst_count = 0;
+      uint32_t last_inst_count;
+
+      assert((gen_mode == PDS_GENERATE_SIZES) ||
+             (((count % 2) == 0) && ((address % 16) == 0) &&
+              (temp_index % 2) == 0));
+
+      count >>= 1;
+      temp_index >>= 1;
+
+      /* Found out how many ld instructions are needed and ld size for the all
+       * possible ld instructions.
+       */
+      if (count <= PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE) {
+         num_inst = 1;
+         last_inst_count = count;
+      } else {
+         per_inst_count = PVR_ROGUE_PDSINST_LD_COUNT8_MAX_SIZE;
+         if ((per_inst_count % 2) != 0)
+            per_inst_count -= 1;
+
+         num_inst = count / per_inst_count;
+         last_inst_count = count - per_inst_count * num_inst;
+         num_inst += 1;
+      }
+
+      /* Generate all the instructions. */
+      for (uint32_t i = 0; i < num_inst; i++) {
+         if ((i == (num_inst - 1)) && (last_inst_count == 0))
+            break;
+
+         /* A single load instruction. */
+         constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
+
+         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+            uint64_t ld_src0 = 0;
+
+            ld_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_LD_SRCADD_MASK)
+                        << PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT);
+            ld_src0 |= (((uint64_t)((i == num_inst - 1) ? last_inst_count
+                                                        : per_inst_count) &
+                         PVR_ROGUE_PDSINST_LD_COUNT8_MASK)
+                        << PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT);
+            ld_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+                        << PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT);
+
+            if (!control) {
+               ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED;
+
+               if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls))
+                  ld_src0 |= PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED;
+
+            } else {
+               ld_src0 |= control->cache_control_const;
+            }
+
+            /* Write it to the constant. */
+            pvr_pds_write_constant64(buffer,
+                                     constant,
+                                     (uint32_t)(ld_src0),
+                                     (uint32_t)(ld_src0 >> 32));
+
+            /* Adjust value for next ld instruction. */
+            temp_index += per_inst_count;
+            address += (((uint64_t)(per_inst_count)) << 3);
+         }
+
+         if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+            *buffer++ = pvr_pds_inst_encode_ld(0, constant >> 1);
+
+            if (data_fence)
+               *buffer++ = pvr_pds_inst_encode_wdf(0);
+         }
+      }
+   } else {
+      /* ST needs source memory address to be 32bits aligned. */
+      assert((gen_mode == PDS_GENERATE_SIZES) || ((address % 4) == 0));
+
+      /* Found out how many ST instructions are needed, each ST can only store
+       * PVR_ROGUE_PDSINST_ST_COUNT4_MASK number of 32bits.
+       */
+      num_inst = count / PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE;
+      num_inst += ((count % PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE) == 0 ? 0 : 1);
+
+      /* Generate all the instructions. */
+      for (uint32_t i = 0; i < num_inst; i++) {
+         /* A single store instruction. */
+         constant = pvr_pds_get_constants(next_constant, 2, total_data_size);
+
+         if (gen_mode == PDS_GENERATE_DATA_SEGMENT) {
+            uint32_t per_inst_count =
+               (count <= PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE
+                   ? count
+                   : PVR_ROGUE_PDSINST_ST_COUNT4_MAX_SIZE);
+            uint64_t st_src0 = 0;
+
+            st_src0 |= (((address >> 2) & PVR_ROGUE_PDSINST_ST_SRCADD_MASK)
+                        << PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT);
+            st_src0 |=
+               (((uint64_t)per_inst_count & PVR_ROGUE_PDSINST_ST_COUNT4_MASK)
+                << PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT);
+            st_src0 |= (((uint64_t)temp_index & PVR_ROGUE_PDSINST_REGS32TP_MASK)
+                        << PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT);
+
+            if (!control) {
+               st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH;
+
+               if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
+                  st_src0 |= PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH;
+               }
+
+            } else {
+               st_src0 |= control->cache_control_const;
+            }
+
+            /* Write it to the constant. */
+            pvr_pds_write_constant64(buffer,
+                                     constant,
+                                     (uint32_t)(st_src0),
+                                     (uint32_t)(st_src0 >> 32));
+
+            /* Adjust value for next ST instruction. */
+            temp_index += per_inst_count;
+            count -= per_inst_count;
+            address += (((uint64_t)(per_inst_count)) << 2);
+         }
+
+         if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+            *buffer++ = pvr_pds_inst_encode_st(0, constant >> 1);
+
+            if (data_fence)
+               *buffer++ = pvr_pds_inst_encode_wdf(0);
+         }
+      }
+   }
+
+   (*total_code_size) += num_inst;
+   if (data_fence)
+      (*total_code_size) += num_inst;
+
+   if (gen_mode != PDS_GENERATE_SIZES)
+      return buffer;
+   return NULL;
+}
+
+/**
+ * Generate programs used to prepare stream out, i.e., clear stream out buffer
+ * overflow flags and update Persistent temps by a ld instruction.
+ *
+ * This must be used in PPP state update.
+ *
+ * \param program Pointer to the stream out program.
+ * \param buffer Pointer to the buffer for the program.
+ * \param store_mode If true then the data is stored to memory. If false then
+ *                   the data is loaded from memory.
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \param dev_info PVR device information structure.
+ * \returns Pointer to just beyond the buffer for the program.
+ */
+uint32_t *pvr_pds_generate_stream_out_init_program(
+   struct pvr_pds_stream_out_init_program *restrict program,
+   uint32_t *restrict buffer,
+   bool store_mode,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info)
+{
+   uint32_t total_data_size = 0;
+   uint32_t PTDst = PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER;
+
+   /* Start counting constants from 0. */
+   uint32_t next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+
+   uint32_t total_code_size = 1;
+
+   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+      /* We only need to clear global stream out predicate, other predicates
+       * are not used during the stream out buffer overflow test.
+       */
+      *buffer++ = pvr_pds_inst_encode_stmc(0, 0x10);
+   }
+
+   for (uint32_t index = 0; index < program->num_buffers; index++) {
+      if (program->dev_address_for_buffer_data[index] != 0) {
+         /* Generate load/store program to load/store persistent temps. */
+
+         /* NOTE: store_mode == true case should be handled by
+          * StreamOutTerminate.
+          */
+         buffer = pvr_pds_generate_single_ldst_instruction(
+            !store_mode,
+            NULL,
+            PTDst,
+            program->dev_address_for_buffer_data[index],
+            program->pds_buffer_data_size[index],
+            &next_constant,
+            &total_data_size,
+            &total_code_size,
+            buffer,
+            false,
+            gen_mode,
+            dev_info);
+      }
+
+      PTDst += program->pds_buffer_data_size[index];
+   }
+
+   total_code_size += 2;
+
+   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+      /* We need to fence the loading. */
+      *buffer++ = pvr_pds_inst_encode_wdf(0);
+      *buffer++ = pvr_pds_inst_encode_halt(0);
+   }
+
+   /* Save size information to program */
+   program->stream_out_init_pds_data_size =
+      ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
+   /* PDS program code size. */
+   program->stream_out_init_pds_code_size = total_code_size;
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
+      return buffer + program->stream_out_init_pds_data_size;
+   else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
+      return buffer;
+
+   return NULL;
+}
+
+/**
+ * Generate stream out terminate program for stream out.
+ *
+ * If pds_persistent_temp_size_to_store is 0, the final primitive written value
+ * will be stored.
+ *
+ * If pds_persistent_temp_size_to_store is non 0, the value of persistent temps
+ * will be stored into memory.
+ *
+ * The stream out terminate program is used to update the PPP state and the data
+ * and code section cannot be separate.
+ *
+ * \param program Pointer to the stream out program.
+ * \param buffer Pointer to the buffer for the program.
+ * \param gen_mode Either code and data can be generated or sizes only updated.
+ * \param dev_info PVR device info structure.
+ * \returns Pointer to just beyond the buffer for the program.
+ */
+uint32_t *pvr_pds_generate_stream_out_terminate_program(
+   struct pvr_pds_stream_out_terminate_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info)
+{
+   uint32_t next_constant;
+   uint32_t total_data_size = 0, total_code_size = 0;
+
+   /* Start counting constants from 0. */
+   next_constant = PVR_PDS_CONSTANTS_BLOCK_BASE;
+
+   /* Generate store program to store persistent temps. */
+   buffer = pvr_pds_generate_single_ldst_instruction(
+      false,
+      NULL,
+      PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER,
+      program->dev_address_for_storing_persistent_temp,
+      program->pds_persistent_temp_size_to_store,
+      &next_constant,
+      &total_data_size,
+      &total_code_size,
+      buffer,
+      false,
+      gen_mode,
+      dev_info);
+
+   total_code_size += 2;
+   if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+      *buffer++ = pvr_pds_inst_encode_wdf(0);
+      *buffer++ = pvr_pds_inst_encode_halt(0);
+   }
+
+   /* Save size information to program. */
+   program->stream_out_terminate_pds_data_size =
+      ALIGN_POT(total_data_size, 4); /* 128-bit boundary = 4 dwords; */
+   /* PDS program code size. */
+   program->stream_out_terminate_pds_code_size = total_code_size;
+
+   if (gen_mode == PDS_GENERATE_DATA_SEGMENT)
+      return buffer + program->stream_out_terminate_pds_data_size;
+   else if (gen_mode == PDS_GENERATE_CODE_SEGMENT)
+      return buffer;
+
+   return NULL;
+}
+
+/* DrawArrays works in several steps:
+ *
+ * 1) load data from draw_indirect buffer
+ * 2) tweak data to match hardware formats
+ * 3) write data to indexblock
+ * 4) signal the VDM to continue
+ *
+ * This is complicated by HW limitations on alignment, as well as a HWBRN.
+ *
+ * 1) Load data.
+ * Loads _must_ be 128-bit aligned. Because there is no such limitation in the
+ * spec we must deal with this by choosing an appropriate earlier address and
+ * loading enough dwords that we load the entirety of the buffer.
+ *
+ * if addr & 0xf:
+ *   load [addr & ~0xf] 6 dwords -> tmp[0, 1, 2, 3, 4, 5]
+ *   data = tmp[0 + (uiAddr & 0xf) >> 2]...
+ * else
+ *   load [addr] 4 dwords -> tmp[0, 1, 2, 3]
+ *   data = tmp[0]...
+ *
+ *
+ * 2) Tweak data.
+ * primCount in the spec does not match the encoding of INDEX_INSTANCE_COUNT in
+ * the VDM control stream. We must subtract 1 from the loaded primCount.
+ *
+ * However, there is a HWBRN that disallows the ADD32 instruction from sourcing
+ * a tmp that is non-64-bit-aligned. To work around this, we must move primCount
+ * into another tmp that has the correct alignment. Note: this is only required
+ * when data = tmp[even], as primCount is data+1:
+ *
+ * if data = tmp[even]:
+ *   primCount = data + 1 = tmp[odd] -- not 64-bit aligned!
+ * else:
+ *   primCount = data + 1 = tmp[even] -- already aligned, don't need workaround.
+ *
+ * This boils down to:
+ *
+ * primCount = data[1]
+ * primCountSrc = data[1]
+ * if brn_present && (data is even):
+ *   mov scratch, primCount
+ *   primCountSrc = scratch
+ * endif
+ * sub primCount, primCountSrc, 1
+ *
+ * 3) Store Data.
+ * Write the now-tweaked data over the top of the indexblock.
+ * To ensure the write completes before the VDM re-reads the data, we must cause
+ * a data hazard by doing a dummy (dummy meaning we don't care about the
+ * returned data) load from the same addresses. Again, because the ld must
+ * always be 128-bit aligned (note: the ST is dword-aligned), we must ensure the
+ * index block is 128-bit aligned. This is the client driver's responsibility.
+ *
+ * st data[0, 1, 2] -> (idxblock + 4)
+ * load [idxblock] 4 dwords
+ *
+ * 4) Signal the VDM
+ * This is simply a DOUTV with a src1 of 0, indicating the VDM should continue
+ * where it is currently fenced on a dummy idxblock that has been inserted by
+ * the driver.
+ */
+
+#include "pvr_draw_indirect_arrays0.h"
+#include "pvr_draw_indirect_arrays1.h"
+#include "pvr_draw_indirect_arrays2.h"
+#include "pvr_draw_indirect_arrays3.h"
+
+#include "pvr_draw_indirect_arrays_base_instance0.h"
+#include "pvr_draw_indirect_arrays_base_instance1.h"
+#include "pvr_draw_indirect_arrays_base_instance2.h"
+#include "pvr_draw_indirect_arrays_base_instance3.h"
+
+#include "pvr_draw_indirect_arrays_base_instance_drawid0.h"
+#include "pvr_draw_indirect_arrays_base_instance_drawid1.h"
+#include "pvr_draw_indirect_arrays_base_instance_drawid2.h"
+#include "pvr_draw_indirect_arrays_base_instance_drawid3.h"
+
+#define ENABLE_SLC_MCU_CACHE_CONTROLS(device)        \
+   ((device)->features.has_slc_mcu_cache_controls    \
+       ? PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED \
+       : PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS)
+
+void pvr_pds_generate_draw_arrays_indirect(
+   struct pvr_pds_drawindirect_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info)
+{
+   if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
+       (gen_mode == PDS_GENERATE_SIZES)) {
+      const struct pvr_psc_program_output *psc_program = NULL;
+      switch ((program->arg_buffer >> 2) % 4) {
+      case 0:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               psc_program =
+                  &pvr_draw_indirect_arrays_base_instance_drawid0_program;
+            } else {
+               psc_program = &pvr_draw_indirect_arrays_base_instance0_program;
+            }
+         } else {
+            psc_program = &pvr_draw_indirect_arrays0_program;
+         }
+         break;
+      case 1:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               psc_program =
+                  &pvr_draw_indirect_arrays_base_instance_drawid1_program;
+            } else {
+               psc_program = &pvr_draw_indirect_arrays_base_instance1_program;
+            }
+         } else {
+            psc_program = &pvr_draw_indirect_arrays1_program;
+         }
+         break;
+      case 2:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               psc_program =
+                  &pvr_draw_indirect_arrays_base_instance_drawid2_program;
+            } else {
+               psc_program = &pvr_draw_indirect_arrays_base_instance2_program;
+            }
+         } else {
+            psc_program = &pvr_draw_indirect_arrays2_program;
+         }
+         break;
+      case 3:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               psc_program =
+                  &pvr_draw_indirect_arrays_base_instance_drawid3_program;
+            } else {
+               psc_program = &pvr_draw_indirect_arrays_base_instance3_program;
+            }
+         } else {
+            psc_program = &pvr_draw_indirect_arrays3_program;
+         }
+         break;
+      }
+
+      if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+         memcpy(buffer,
+                psc_program->code,
+                psc_program->code_size * sizeof(uint32_t));
+#if defined(DUMP_PDS)
+         for (uint32_t i = 0; i < psc_program->code_size; i++)
+            PVR_PDS_PRINT_INST(buffer[i]);
+#endif
+      }
+
+      program->program = *psc_program;
+   } else {
+      switch ((program->arg_buffer >> 2) % 4) {
+      case 0:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               pvr_write_draw_indirect_arrays_base_instance_drawid0_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_arrays_base_instance_drawid0_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer + 4);
+               pvr_write_draw_indirect_arrays_base_instance_drawid0_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_arrays_base_instance_drawid0_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_arrays_base_instance_drawid0_immediates(
+                  buffer);
+            } else {
+               pvr_write_draw_indirect_arrays_base_instance0_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_arrays_base_instance0_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer + 4);
+               pvr_write_draw_indirect_arrays_base_instance0_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_arrays_base_instance0_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_arrays_base_instance0_immediates(buffer);
+            }
+         } else {
+            pvr_write_draw_indirect_arrays0_di_data(buffer,
+                                                    program->arg_buffer &
+                                                       ~0xfull,
+                                                    dev_info);
+            pvr_write_draw_indirect_arrays0_write_vdm(
+               buffer,
+               program->index_list_addr_buffer + 4);
+            pvr_write_draw_indirect_arrays0_flush_vdm(
+               buffer,
+               program->index_list_addr_buffer);
+            pvr_write_draw_indirect_arrays0_num_views(buffer,
+                                                      program->num_views);
+            pvr_write_draw_indirect_arrays0_immediates(buffer);
+         }
+         break;
+      case 1:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               pvr_write_draw_indirect_arrays_base_instance_drawid1_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_arrays_base_instance_drawid1_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer + 4);
+               pvr_write_draw_indirect_arrays_base_instance_drawid1_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_arrays_base_instance_drawid1_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_arrays_base_instance_drawid1_immediates(
+                  buffer);
+            } else {
+               pvr_write_draw_indirect_arrays_base_instance1_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_arrays_base_instance1_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer + 4);
+               pvr_write_draw_indirect_arrays_base_instance1_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_arrays_base_instance1_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_arrays_base_instance1_immediates(buffer);
+            }
+         } else {
+            pvr_write_draw_indirect_arrays1_di_data(buffer,
+                                                    program->arg_buffer &
+                                                       ~0xfull,
+                                                    dev_info);
+            pvr_write_draw_indirect_arrays1_write_vdm(
+               buffer,
+               program->index_list_addr_buffer + 4);
+            pvr_write_draw_indirect_arrays1_flush_vdm(
+               buffer,
+               program->index_list_addr_buffer);
+            pvr_write_draw_indirect_arrays1_num_views(buffer,
+                                                      program->num_views);
+            pvr_write_draw_indirect_arrays1_immediates(buffer);
+         }
+         break;
+      case 2:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               pvr_write_draw_indirect_arrays_base_instance_drawid2_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_arrays_base_instance_drawid2_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer + 4);
+               pvr_write_draw_indirect_arrays_base_instance_drawid2_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_arrays_base_instance_drawid2_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_arrays_base_instance_drawid2_immediates(
+                  buffer);
+            } else {
+               pvr_write_draw_indirect_arrays_base_instance2_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_arrays_base_instance2_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer + 4);
+               pvr_write_draw_indirect_arrays_base_instance2_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_arrays_base_instance2_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_arrays_base_instance2_immediates(buffer);
+            }
+         } else {
+            pvr_write_draw_indirect_arrays2_di_data(buffer,
+                                                    program->arg_buffer &
+                                                       ~0xfull,
+                                                    dev_info);
+            pvr_write_draw_indirect_arrays2_write_vdm(
+               buffer,
+               program->index_list_addr_buffer + 4);
+            pvr_write_draw_indirect_arrays2_flush_vdm(
+               buffer,
+               program->index_list_addr_buffer);
+            pvr_write_draw_indirect_arrays2_num_views(buffer,
+                                                      program->num_views);
+            pvr_write_draw_indirect_arrays2_immediates(buffer);
+         }
+         break;
+      case 3:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               pvr_write_draw_indirect_arrays_base_instance_drawid3_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_arrays_base_instance_drawid3_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer + 4);
+               pvr_write_draw_indirect_arrays_base_instance_drawid3_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_arrays_base_instance_drawid3_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_arrays_base_instance_drawid3_immediates(
+                  buffer);
+            } else {
+               pvr_write_draw_indirect_arrays_base_instance3_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_arrays_base_instance3_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer + 4);
+               pvr_write_draw_indirect_arrays_base_instance3_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_arrays_base_instance3_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_arrays_base_instance3_immediates(buffer);
+            }
+         } else {
+            pvr_write_draw_indirect_arrays3_di_data(buffer,
+                                                    program->arg_buffer &
+                                                       ~0xfull,
+                                                    dev_info);
+            pvr_write_draw_indirect_arrays3_write_vdm(
+               buffer,
+               program->index_list_addr_buffer + 4);
+            pvr_write_draw_indirect_arrays3_flush_vdm(
+               buffer,
+               program->index_list_addr_buffer);
+            pvr_write_draw_indirect_arrays3_num_views(buffer,
+                                                      program->num_views);
+            pvr_write_draw_indirect_arrays3_immediates(buffer);
+         }
+         break;
+      }
+   }
+}
+
+#include "pvr_draw_indirect_elements0.h"
+#include "pvr_draw_indirect_elements1.h"
+#include "pvr_draw_indirect_elements2.h"
+#include "pvr_draw_indirect_elements3.h"
+#include "pvr_draw_indirect_elements_base_instance0.h"
+#include "pvr_draw_indirect_elements_base_instance1.h"
+#include "pvr_draw_indirect_elements_base_instance2.h"
+#include "pvr_draw_indirect_elements_base_instance3.h"
+#include "pvr_draw_indirect_elements_base_instance_drawid0.h"
+#include "pvr_draw_indirect_elements_base_instance_drawid1.h"
+#include "pvr_draw_indirect_elements_base_instance_drawid2.h"
+#include "pvr_draw_indirect_elements_base_instance_drawid3.h"
+
+void pvr_pds_generate_draw_elements_indirect(
+   struct pvr_pds_drawindirect_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info)
+{
+   if ((gen_mode == PDS_GENERATE_CODE_SEGMENT) ||
+       (gen_mode == PDS_GENERATE_SIZES)) {
+      const struct pvr_psc_program_output *psc_program = NULL;
+      switch ((program->arg_buffer >> 2) % 4) {
+      case 0:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               psc_program =
+                  &pvr_draw_indirect_elements_base_instance_drawid0_program;
+            } else {
+               psc_program = &pvr_draw_indirect_elements_base_instance0_program;
+            }
+         } else {
+            psc_program = &pvr_draw_indirect_elements0_program;
+         }
+         break;
+      case 1:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               psc_program =
+                  &pvr_draw_indirect_elements_base_instance_drawid1_program;
+            } else {
+               psc_program = &pvr_draw_indirect_elements_base_instance1_program;
+            }
+         } else {
+            psc_program = &pvr_draw_indirect_elements1_program;
+         }
+         break;
+      case 2:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               psc_program =
+                  &pvr_draw_indirect_elements_base_instance_drawid2_program;
+            } else {
+               psc_program = &pvr_draw_indirect_elements_base_instance2_program;
+            }
+         } else {
+            psc_program = &pvr_draw_indirect_elements2_program;
+         }
+         break;
+      case 3:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               psc_program =
+                  &pvr_draw_indirect_elements_base_instance_drawid3_program;
+            } else {
+               psc_program = &pvr_draw_indirect_elements_base_instance3_program;
+            }
+         } else {
+            psc_program = &pvr_draw_indirect_elements3_program;
+         }
+         break;
+      }
+
+      if (gen_mode == PDS_GENERATE_CODE_SEGMENT) {
+         memcpy(buffer,
+                psc_program->code,
+                psc_program->code_size * sizeof(uint32_t));
+
+#if defined(DUMP_PDS)
+         for (uint32_t i = 0; i < psc_program->code_size; i++)
+            PVR_PDS_PRINT_INST(buffer[i]);
+#endif
+      }
+
+      program->program = *psc_program;
+   } else {
+      switch ((program->arg_buffer >> 2) % 4) {
+      case 0:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               pvr_write_draw_indirect_elements_base_instance_drawid0_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_elements_base_instance_drawid0_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance_drawid0_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance_drawid0_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_elements_base_instance_drawid0_idx_stride(
+                  buffer,
+                  program->index_stride);
+               pvr_write_draw_indirect_elements_base_instance_drawid0_idx_base(
+                  buffer,
+                  program->index_buffer);
+               pvr_write_draw_indirect_elements_base_instance_drawid0_idx_header(
+                  buffer,
+                  program->index_block_header);
+               pvr_write_draw_indirect_elements_base_instance_drawid0_immediates(
+                  buffer);
+            } else {
+               pvr_write_draw_indirect_elements_base_instance0_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_elements_base_instance0_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance0_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance0_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_elements_base_instance0_idx_stride(
+                  buffer,
+                  program->index_stride);
+               pvr_write_draw_indirect_elements_base_instance0_idx_base(
+                  buffer,
+                  program->index_buffer);
+               pvr_write_draw_indirect_elements_base_instance0_idx_header(
+                  buffer,
+                  program->index_block_header);
+               pvr_write_draw_indirect_elements_base_instance0_immediates(
+                  buffer);
+            }
+         } else {
+            pvr_write_draw_indirect_elements0_di_data(buffer,
+                                                      program->arg_buffer &
+                                                         ~0xfull,
+                                                      dev_info);
+            pvr_write_draw_indirect_elements0_write_vdm(
+               buffer,
+               program->index_list_addr_buffer);
+            pvr_write_draw_indirect_elements0_flush_vdm(
+               buffer,
+               program->index_list_addr_buffer);
+            pvr_write_draw_indirect_elements0_num_views(buffer,
+                                                        program->num_views);
+            pvr_write_draw_indirect_elements0_idx_stride(buffer,
+                                                         program->index_stride);
+            pvr_write_draw_indirect_elements0_idx_base(buffer,
+                                                       program->index_buffer);
+            pvr_write_draw_indirect_elements0_idx_header(
+               buffer,
+               program->index_block_header);
+            pvr_write_draw_indirect_elements0_immediates(buffer);
+         }
+         break;
+      case 1:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               pvr_write_draw_indirect_elements_base_instance_drawid1_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_elements_base_instance_drawid1_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance_drawid1_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance_drawid1_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_elements_base_instance_drawid1_idx_stride(
+                  buffer,
+                  program->index_stride);
+               pvr_write_draw_indirect_elements_base_instance_drawid1_idx_base(
+                  buffer,
+                  program->index_buffer);
+               pvr_write_draw_indirect_elements_base_instance_drawid1_idx_header(
+                  buffer,
+                  program->index_block_header);
+               pvr_write_draw_indirect_elements_base_instance_drawid1_immediates(
+                  buffer);
+            } else {
+               pvr_write_draw_indirect_elements_base_instance1_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_elements_base_instance1_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance1_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance1_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_elements_base_instance1_idx_stride(
+                  buffer,
+                  program->index_stride);
+               pvr_write_draw_indirect_elements_base_instance1_idx_base(
+                  buffer,
+                  program->index_buffer);
+               pvr_write_draw_indirect_elements_base_instance1_idx_header(
+                  buffer,
+                  program->index_block_header);
+               pvr_write_draw_indirect_elements_base_instance1_immediates(
+                  buffer);
+            }
+         } else {
+            pvr_write_draw_indirect_elements1_di_data(buffer,
+                                                      program->arg_buffer &
+                                                         ~0xfull,
+                                                      dev_info);
+            pvr_write_draw_indirect_elements1_write_vdm(
+               buffer,
+               program->index_list_addr_buffer);
+            pvr_write_draw_indirect_elements1_flush_vdm(
+               buffer,
+               program->index_list_addr_buffer);
+            pvr_write_draw_indirect_elements1_num_views(buffer,
+                                                        program->num_views);
+            pvr_write_draw_indirect_elements1_idx_stride(buffer,
+                                                         program->index_stride);
+            pvr_write_draw_indirect_elements1_idx_base(buffer,
+                                                       program->index_buffer);
+            pvr_write_draw_indirect_elements1_idx_header(
+               buffer,
+               program->index_block_header);
+            pvr_write_draw_indirect_elements1_immediates(buffer);
+         }
+         break;
+      case 2:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               pvr_write_draw_indirect_elements_base_instance_drawid2_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_elements_base_instance_drawid2_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance_drawid2_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance_drawid2_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_elements_base_instance_drawid2_idx_stride(
+                  buffer,
+                  program->index_stride);
+               pvr_write_draw_indirect_elements_base_instance_drawid2_idx_base(
+                  buffer,
+                  program->index_buffer);
+               pvr_write_draw_indirect_elements_base_instance_drawid2_idx_header(
+                  buffer,
+                  program->index_block_header);
+               pvr_write_draw_indirect_elements_base_instance_drawid2_immediates(
+                  buffer);
+            } else {
+               pvr_write_draw_indirect_elements_base_instance2_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_elements_base_instance2_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance2_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance2_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_elements_base_instance2_idx_stride(
+                  buffer,
+                  program->index_stride);
+               pvr_write_draw_indirect_elements_base_instance2_idx_base(
+                  buffer,
+                  program->index_buffer);
+               pvr_write_draw_indirect_elements_base_instance2_idx_header(
+                  buffer,
+                  program->index_block_header);
+               pvr_write_draw_indirect_elements_base_instance2_immediates(
+                  buffer);
+            }
+         } else {
+            pvr_write_draw_indirect_elements2_di_data(buffer,
+                                                      program->arg_buffer &
+                                                         ~0xfull,
+                                                      dev_info);
+            pvr_write_draw_indirect_elements2_write_vdm(
+               buffer,
+               program->index_list_addr_buffer);
+            pvr_write_draw_indirect_elements2_flush_vdm(
+               buffer,
+               program->index_list_addr_buffer);
+            pvr_write_draw_indirect_elements2_num_views(buffer,
+                                                        program->num_views);
+            pvr_write_draw_indirect_elements2_idx_stride(buffer,
+                                                         program->index_stride);
+            pvr_write_draw_indirect_elements2_idx_base(buffer,
+                                                       program->index_buffer);
+            pvr_write_draw_indirect_elements2_idx_header(
+               buffer,
+               program->index_block_header);
+            pvr_write_draw_indirect_elements2_immediates(buffer);
+         }
+         break;
+      case 3:
+         if (program->support_base_instance) {
+            if (program->increment_draw_id) {
+               pvr_write_draw_indirect_elements_base_instance_drawid3_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_elements_base_instance_drawid3_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance_drawid3_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance_drawid3_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_elements_base_instance_drawid3_idx_stride(
+                  buffer,
+                  program->index_stride);
+               pvr_write_draw_indirect_elements_base_instance_drawid3_idx_base(
+                  buffer,
+                  program->index_buffer);
+               pvr_write_draw_indirect_elements_base_instance_drawid3_idx_header(
+                  buffer,
+                  program->index_block_header);
+               pvr_write_draw_indirect_elements_base_instance_drawid3_immediates(
+                  buffer);
+            } else {
+               pvr_write_draw_indirect_elements_base_instance3_di_data(
+                  buffer,
+                  program->arg_buffer & ~0xfull,
+                  dev_info);
+               pvr_write_draw_indirect_elements_base_instance3_write_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance3_flush_vdm(
+                  buffer,
+                  program->index_list_addr_buffer);
+               pvr_write_draw_indirect_elements_base_instance3_num_views(
+                  buffer,
+                  program->num_views);
+               pvr_write_draw_indirect_elements_base_instance3_idx_stride(
+                  buffer,
+                  program->index_stride);
+               pvr_write_draw_indirect_elements_base_instance3_idx_base(
+                  buffer,
+                  program->index_buffer);
+               pvr_write_draw_indirect_elements_base_instance3_idx_header(
+                  buffer,
+                  program->index_block_header);
+               pvr_write_draw_indirect_elements_base_instance3_immediates(
+                  buffer);
+            }
+         } else {
+            pvr_write_draw_indirect_elements3_di_data(buffer,
+                                                      program->arg_buffer &
+                                                         ~0xfull,
+                                                      dev_info);
+            pvr_write_draw_indirect_elements3_write_vdm(
+               buffer,
+               program->index_list_addr_buffer);
+            pvr_write_draw_indirect_elements3_flush_vdm(
+               buffer,
+               program->index_list_addr_buffer);
+            pvr_write_draw_indirect_elements3_num_views(buffer,
+                                                        program->num_views);
+            pvr_write_draw_indirect_elements3_idx_stride(buffer,
+                                                         program->index_stride);
+            pvr_write_draw_indirect_elements3_idx_base(buffer,
+                                                       program->index_buffer);
+            pvr_write_draw_indirect_elements3_idx_header(
+               buffer,
+               program->index_block_header);
+            pvr_write_draw_indirect_elements3_immediates(buffer);
+         }
+         break;
+      }
+   }
+}
diff --git a/src/imagination/vulkan/pds/pvr_pds.h b/src/imagination/vulkan/pds/pvr_pds.h
new file mode 100644 (file)
index 0000000..18307c9
--- /dev/null
@@ -0,0 +1,1161 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_PDS_H
+#define PVR_PDS_H
+
+#include <stdbool.h>
+
+#include "pvr_device_info.h"
+#include "pvr_limits.h"
+#include "pds/pvr_rogue_pds_defs.h"
+#include "util/macros.h"
+
+#ifdef __cplusplus
+#   define restrict __restrict__
+#endif
+
+/*****************************************************************************
+ Macro definitions
+*****************************************************************************/
+
+/* Based on Maximum number of passes that may emit DOUTW x Maximum number that
+ * might be emitted.
+ */
+#define PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW 6
+/* Based on Maximum number of passes that may emit DOUTW x Maximum number that
+ * might be emitted.
+ */
+#define PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW 3
+/* Based on max(max(UBOs,cbuffers), numTextures). */
+#define PVR_PDS_MAX_NUM_DMA_KICKS 32
+#define PVR_PDS_NUM_VERTEX_STREAMS 32
+#define PVR_PDS_NUM_VERTEX_ELEMENTS 32
+#define PVR_MAXIMUM_ITERATIONS 128
+
+#define PVR_PDS_NUM_COMPUTE_INPUT_REGS 3
+
+#define PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)       \
+   PVR_HAS_FEATURE(dev_info, compute_morton_capable) && \
+      !PVR_HAS_ERN(dev_info, 45493)
+
+/* FIXME: Change BIL to SPV. */
+/* Any variable location can have at most 4 32-bit components. */
+#define BIL_COMPONENTS_PER_LOCATION 4
+
+/* Maximum number of DDMAD's that may be performed (Num attribs * Num DMA's per
+ * attribute).
+ */
+#define PVR_MAX_VERTEX_ATTRIB_DMAS \
+   (PVR_MAX_VERTEX_INPUT_BINDINGS * BIL_COMPONENTS_PER_LOCATION)
+
+/*****************************************************************************
+ Typedefs
+*****************************************************************************/
+
+/* FIXME: We might need to change some bools to this. */
+typedef uint32_t PVR_PDS_BOOL;
+
+/*****************************************************************************
+ Enums
+*****************************************************************************/
+
+enum pvr_pds_generate_mode {
+   PDS_GENERATE_SIZES,
+   PDS_GENERATE_CODE_SEGMENT,
+   PDS_GENERATE_DATA_SEGMENT,
+   PDS_GENERATE_CODEDATA_SEGMENTS
+};
+
+enum pvr_pds_store_type { PDS_COMMON_STORE, PDS_UNIFIED_STORE };
+
+enum pvr_pds_vertex_attrib_program_type {
+   PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC,
+   PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE,
+   PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT,
+   PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT
+};
+
+/*****************************************************************************
+ Structure definitions
+*****************************************************************************/
+
+struct pvr_psc_register {
+   uint32_t num;
+
+   unsigned int size; /* size of each element. */
+   unsigned int dim : 4; /* max number of elements. */
+   unsigned int index; /* offset into array. */
+
+   unsigned int cast;
+
+   unsigned int type;
+   uint64_t name;
+   bool auto_assign;
+   unsigned int original_type;
+};
+
+struct pvr_psc_program_output {
+   const uint32_t *code;
+
+   struct pvr_psc_register *data;
+   unsigned int data_count;
+
+   unsigned int data_size_aligned;
+   unsigned int code_size_aligned;
+   unsigned int temp_size_aligned;
+
+   unsigned int data_size;
+   unsigned int code_size;
+   unsigned int temp_size;
+
+   void (*write_data)(void *data, uint32_t *buffer);
+};
+
+struct pvr_pds_usc_task_control {
+   uint64_t src0;
+};
+
+/* Up to 4 64-bit state words currently supported. */
+#define PVR_PDS_MAX_NUM_DOUTW_CONSTANTS 4
+
+/* Structure for DOUTW. */
+struct pvr_pds_doutw_control {
+   enum pvr_pds_store_type dest_store;
+   uint32_t num_const64;
+   uint64_t doutw_data[PVR_PDS_MAX_NUM_DOUTW_CONSTANTS];
+   bool last_instruction;
+
+   uint32_t *data_segment;
+   uint32_t data_size;
+   uint32_t code_size;
+};
+
+/* Structure representing the PDS pixel event program.
+ *
+ * data_segment - pointer to the data segment
+ * task_control - USC task control words
+ * emit_words - array of Emit words
+ * data_size - size of data segment
+ * code_size - size of code segment
+ */
+struct pvr_pds_event_program {
+   uint32_t *data_segment;
+   struct pvr_pds_usc_task_control task_control;
+
+   uint32_t num_emit_word_pairs;
+   uint32_t *emit_words;
+
+   uint32_t data_size;
+   uint32_t code_size;
+};
+
+/*
+ * Structure representing the PDS pixel shader secondary attribute program.
+ *
+ * data_segment - pointer to the data segment
+ *
+ * num_uniform_dma_kicks - number of Uniform DMA kicks
+ * uniform_dma_control - array of Uniform DMA control words
+ * uniform_dma_address - array of Uniform DMA address words
+ *
+ * num_texture_dma_kicks - number of Texture State DMA kicks
+ * texture_dma_control - array of Texture State DMA control words
+ * texture_dma_address - array of Texture State DMA address words
+ *
+ * data_size - size of data segment
+ * code_size - size of code segment
+ *
+ * temps_used - PDS Temps
+ */
+struct pvr_pds_pixel_shader_sa_program {
+   uint32_t *data_segment;
+
+   uint32_t num_dword_doutw;
+   uint32_t dword_doutw_value[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
+   uint32_t dword_doutw_control[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
+
+   uint32_t num_q_word_doutw;
+   uint32_t q_word_doutw_value[2 * PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
+   uint32_t q_word_doutw_control[PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
+
+   uint32_t num_uniform_dma_kicks;
+   uint64_t uniform_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
+   uint32_t uniform_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
+
+   uint32_t num_texture_dma_kicks;
+   uint64_t texture_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
+   uint32_t texture_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
+
+   bool kick_usc;
+   bool write_tile_position;
+   uint32_t tile_position_attr_dest;
+   struct pvr_pds_usc_task_control usc_task_control;
+
+   bool clear;
+   uint32_t *clear_color;
+   uint32_t clear_color_dest_reg;
+   bool packed_clear;
+
+   uint32_t data_size;
+   uint32_t code_size;
+
+   uint32_t temps_used;
+};
+
+/* Structure representing the PDS pixel shader program.
+ *
+ * data_segment - pointer to the data segment
+ * usc_task_control - array of USC task control words
+ *
+ * data_size - size of data segment
+ * code_size - size of code segment
+ */
+struct pvr_pds_kickusc_program {
+   uint32_t *data_segment;
+   struct pvr_pds_usc_task_control usc_task_control;
+
+   uint32_t data_size;
+   uint32_t code_size;
+};
+
+/* Structure representing the PDS fence/doutc program.
+ *
+ * data_segment - pointer to the data segment
+ * data_size - size of data segment
+ * code_size - size of code segment
+ */
+struct pvr_pds_fence_program {
+   uint32_t *data_segment;
+   uint32_t fence_constant_word;
+   uint32_t data_size;
+   uint32_t code_size;
+};
+
+/* Structure representing the PDS coefficient loading.
+ *
+ * data_segment - pointer to the data segment
+ * num_fpu_iterators - number of FPU iterators
+ * FPU_iterators - array of FPU iterator control words
+ * destination - array of Common Store destinations
+ *
+ * data_size - size of data segment
+ * code_size - size of code segment
+ */
+struct pvr_pds_coeff_loading_program {
+   uint32_t *data_segment;
+   uint32_t num_fpu_iterators;
+   uint32_t FPU_iterators[PVR_MAXIMUM_ITERATIONS];
+   uint32_t destination[PVR_MAXIMUM_ITERATIONS];
+
+   uint32_t data_size;
+   uint32_t code_size;
+
+   uint32_t temps_used;
+};
+
+/* Structure representing the PDS vertex shader secondary attribute program.
+ *
+ * data_segment - pointer to the data segment
+ * num_dma_kicks - number of DMA kicks
+ * dma_control - array of DMA control words
+ * dma_address - array of DMA address words
+ *
+ * data_size - size of data segment
+ * code_size - size of code segment
+ */
+struct pvr_pds_vertex_shader_sa_program {
+   uint32_t *data_segment;
+
+   /* num_uniform_dma_kicks, uniform_dma_address, uniform_dma_control, are not
+    * used for generating PDS data section and code section, they are currently
+    * only used to simpler the driver implementation. The driver should correct
+    * these information into num_dma_kicks, dma_address and dma_control to get
+    * the PDS properly generated.
+    */
+
+   uint32_t num_dword_doutw;
+   uint32_t dword_doutw_value[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
+   uint32_t dword_doutw_control[PVR_PDS_MAX_TOTAL_NUM_DWORD_DOUTW];
+
+   uint32_t num_q_word_doutw;
+   uint32_t q_word_doutw_value[2 * PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
+   uint32_t q_word_doutw_control[PVR_PDS_MAX_TOTAL_NUM_QWORD_DOUTW];
+
+   uint32_t num_uniform_dma_kicks;
+   uint64_t uniform_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
+   uint32_t uniform_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
+
+   uint32_t num_texture_dma_kicks;
+   uint64_t texture_dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
+   uint32_t texture_dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
+
+   uint32_t num_dma_kicks;
+   uint64_t dma_address[PVR_PDS_MAX_NUM_DMA_KICKS];
+   uint32_t dma_control[PVR_PDS_MAX_NUM_DMA_KICKS];
+
+   bool kick_usc;
+   struct pvr_pds_usc_task_control usc_task_control;
+
+   /* Shared register buffer base address (VDM/CDM context load case only). */
+   bool clear_pds_barrier;
+
+   uint32_t data_size;
+   uint32_t code_size;
+};
+
+/* Structure representing a PDS vertex stream element.
+ *
+ * There are two types of element, repeat DMA and non-repeat DMA.
+ *
+ * Non repeat DMA are the classic DMA of some number of bytes from an offset
+ * into contiguous registers. It is assumed the address and size are dword
+ * aligned. To use this, specify 0 for the component size. Each four bytes read
+ * will go to the next HW register.
+ *
+ * Repeat DMA enables copying of sub dword amounts at non dword aligned
+ * addresses. To use this, specify the component size as either 1,2,3 or 4
+ * bytes. Size specifies the number of components, and each component read
+ * will go to the next HW register.
+ *
+ * In both cases, HW registers are written contiguously.
+ *
+ * offset - offset of the vertex stream element
+ * size - size of the vertex stream element in bytes for non repeat DMA, or
+ *        number of components for repeat DMA.
+ * reg - first vertex stream element register to DMA to.
+ * component_size - Size of component for repeat DMA, or 0 for non repeat dma.
+ */
+struct pvr_pds_vertex_element {
+   uint32_t offset;
+   uint32_t size;
+   uint16_t reg;
+   uint16_t component_size;
+};
+
+/* Structure representing a PDS vertex stream.
+ *
+ * instance_data - flag whether the vertex stream is indexed or instance data
+ * read_back - If True, vertex is reading back data output by GPU earlier in
+ *             same kick. This will enable MCU coherency if relevant.
+ * multiplier - vertex stream frequency multiplier
+ * shift - vertex stream frequency shift
+ * address - vertex stream address in bytes
+ * buffer_size_in_bytes - buffer size in bytes if vertex attribute is sourced
+ *                        from buffer object
+ * stride - vertex stream stride in bytes
+ * num_vertices - number of vertices in buffer. Used for OOB checking.
+                - 0 = disable oob checking.
+ * num_elements - number of vertex stream elements
+ * elements - array of vertex stream elements
+ * use_ddmadt - When the has_pds_ddmadt feature is enabled. Boolean allowing
+ *              DDMADT to be use per stream element.
+ */
+struct pvr_pds_vertex_stream {
+   bool current_state;
+   bool instance_data;
+   bool read_back;
+   uint32_t multiplier;
+   uint32_t shift;
+   uint64_t address;
+   uint32_t buffer_size_in_bytes;
+   uint32_t stride;
+   uint32_t num_vertices;
+   uint32_t num_elements;
+   struct pvr_pds_vertex_element elements[PVR_PDS_NUM_VERTEX_ELEMENTS];
+
+   bool use_ddmadt;
+};
+
+/* Structure representing the PDS vertex shader program.
+ *
+ * This structure describes the USC code and vertex buffers required
+ * by the PDS vertex loading program.
+ *
+ * data_segment - Pointer to the data segment.
+ * usc_task_control - Description of USC task for vertex shader program.
+ * num_streams - Number of vertex streams.
+ * iterate_vtx_id - If set, the vertex id should be iterated.
+ * vtx_id_register - The register to iterate the VertexID into (if applicable)
+ * vtx_id_modifier - Value to pvr_add/SUB from index value received by PDS.
+ *                   This is used because the index value received by PDS has
+ *                   INDEX_OFFSET added, and generally VertexID wouldn't.
+ * vtx_id_sub_modifier - If true, vtx_id_modifier is subtracted, else added.
+ * iterate_instance_id - If set, the instance id should be iterated.
+ * instance_id_register - The register to iterate the InstanceID into (if
+ *                        applicable). The vertex and instance id will both be
+ *                        iterated as unsigned ints
+ *
+ * iterate_remap_id - Should be set to true if vertex shader needs
+ *                    VS_REMAPPED_INDEX_ID (e.g. Another TA shader runs after
+ *                    it).
+ * null_idx - Indicates no index buffer is bound, so every index should be
+ *            null_idx_value.
+ * null_idx_value - The value to use as index if null_idx set.
+ * data_size - Size of data segment, in dwords. Output by call to
+ *             pvr_pds_vertex_shader, and used as input when generating data.
+ * code_size - Size of code segment. Output by call to pvr_pds_vertex_shader.
+ *             This is the number of dword instructions that are/were generated.
+ * temps_used - Number of temporaries used. Output by call to
+ *              pvr_pds_vertex_shader.
+ */
+struct pvr_pds_vertex_shader_program {
+   uint32_t *data_segment;
+   struct pvr_pds_usc_task_control usc_task_control;
+   uint32_t num_streams;
+
+   bool iterate_vtx_id;
+   uint32_t vtx_id_register;
+   uint32_t vtx_id_modifier;
+   bool vtx_id_sub_modifier;
+
+   bool iterate_instance_id;
+   uint32_t instance_id_register;
+   uint32_t instance_ID_modifier;
+   uint32_t base_instance;
+
+   bool iterate_remap_id;
+
+   bool null_idx;
+   uint32_t null_idx_value;
+
+   uint32_t *stream_patch_offsets;
+   uint32_t num_stream_patches;
+
+   uint32_t data_size;
+   uint32_t code_size;
+   uint32_t temps_used;
+   uint32_t ddmadt_enables;
+   uint32_t skip_stream_flag;
+
+   bool draw_indirect;
+   bool indexed;
+
+   struct pvr_pds_vertex_stream streams[PVR_PDS_NUM_VERTEX_STREAMS];
+};
+
+/* Structure representing PDS shared reg storing program. */
+struct pvr_pds_shared_storing_program {
+   struct pvr_pds_doutw_control doutw_control; /*!< DOUTW state */
+   struct pvr_pds_kickusc_program usc_task; /*!< DOUTU state */
+   bool cc_enable; /*!< cc bit is set on the doutu instruction. */
+   uint32_t data_size; /*!< total data size, non-aligned. */
+   uint32_t code_size; /*!< total code size, non-aligned. */
+};
+
+#define PVR_MAX_STREAMOUT_BUFFERS 4
+
+/* Structure representing stream out init PDS programs. */
+struct pvr_pds_stream_out_init_program {
+   /* --- Input to PDS_STREAM_OUT_INT_PROGRAM --- */
+
+   /* Number of buffers to load/store.
+    * This indicates the number of entries in the next two arrays.
+    * Data is loaded/stored contiguously to persistent temps.
+    */
+   uint32_t num_buffers;
+
+   /* Number of persistent temps in dword to load/store for each buffer. */
+   uint32_t pds_buffer_data_size[PVR_MAX_STREAMOUT_BUFFERS];
+   /* The device address for loading/storing persistent temps for each buffer.
+    * If address is zero, then no data is loaded/stored
+    * into pt registers for the buffer.
+    */
+   uint64_t dev_address_for_buffer_data[PVR_MAX_STREAMOUT_BUFFERS];
+
+   /* PDS state update Stream Out Init Programs. */
+   uint32_t stream_out_init_pds_data_size;
+   uint32_t stream_out_init_pds_code_size;
+};
+
+/* Structure representing stream out terminate PDS program. */
+struct pvr_pds_stream_out_terminate_program {
+   /* Input to PDS_STREAM_OUT_TERMINATE_PROGRAM.
+    *
+    * Number of persistent temps in dword used in stream out PDS programs needs
+    * to be stored.
+    * The terminate program writes pds_persistent_temp_size_to_store number
+    * persistent temps to dev_address_for_storing_persistent_temp.
+    */
+   uint32_t pds_persistent_temp_size_to_store;
+
+   /* The device address for storing persistent temps. */
+   uint64_t dev_address_for_storing_persistent_temp;
+
+   /* PPP state update Stream Out Program for stream out terminate. */
+   uint32_t stream_out_terminate_pds_data_size;
+   uint32_t stream_out_terminate_pds_code_size;
+};
+
+/*  Structure representing the PDS compute shader program.
+ *     This structure describes the USC code and compute buffers required
+ *     by the PDS compute task loading program
+ *
+ *     data_segment
+ *             pointer to the data segment
+ *     usc_task_control
+ *             Description of USC task for compute shader program.
+ *     data_size
+ *             Size of data segment, in dwords.
+ *             Output by call to pvr_pds_compute_shader, and used as input when
+ *   generating data. code_size Size of code segment. Output by call to
+ *   pvr_pds_compute_shader. This is the number of dword instructions that
+ *   are/were generated. temps_used Number of temporaries used. Output by call
+ *to pvr_pds_compute_shader. highest_temp The highest temp number used. Output
+ *by call to pvr_pds_compute_shader coeff_update_task_branch_size The number of
+ *   instructions we need to branch over to skip the coefficient update task.
+ */
+
+struct pvr_pds_compute_shader_program {
+   uint32_t *data_segment;
+   struct pvr_pds_usc_task_control usc_task_control;
+   struct pvr_pds_usc_task_control usc_task_control_coeff_update;
+
+   uint32_t data_size;
+   uint32_t code_size;
+
+   uint32_t temps_used;
+   uint32_t highest_temp;
+
+   uint32_t local_input_regs[3];
+   uint32_t work_group_input_regs[3];
+   uint32_t global_input_regs[3];
+
+   uint32_t barrier_coefficient;
+
+   bool fence;
+
+   bool flattened_work_groups;
+
+   bool clear_pds_barrier;
+
+   bool has_coefficient_update_task;
+
+   uint32_t coeff_update_task_branch_size;
+
+   bool add_base_workgroup;
+   uint32_t base_workgroup_constant_offset_in_dwords[3];
+
+   bool kick_usc;
+
+   bool conditional_render;
+   uint32_t cond_render_const_offset_in_dwords;
+   uint32_t cond_render_pred_temp;
+};
+struct pvr_pds_ldst_control {
+   uint64_t cache_control_const;
+};
+
+/* Define a value we can use as a register number in the driver to denote that
+ * the value is unused.
+ */
+#define PVR_PDS_COMPUTE_INPUT_REG_UNUSED 0xFFFFFFFFU
+
+/*****************************************************************************
+ function declarations
+*****************************************************************************/
+
+/*****************************************************************************
+ Constructors
+*****************************************************************************/
+
+void pvr_pds_pixel_shader_sa_initialize(
+   struct pvr_pds_pixel_shader_sa_program *program);
+void pvr_pds_compute_shader_initialize(
+   struct pvr_pds_compute_shader_program *program);
+
+/* Utility */
+
+uint32_t pvr_pds_append_constant64(uint32_t *constants,
+                                   uint64_t constant_value,
+                                   uint32_t *data_size);
+
+uint32_t pvr_pds_encode_dma_burst(uint32_t *dma_control,
+                                  uint64_t *dma_address,
+                                  uint32_t dest_offset,
+                                  uint32_t dma_size,
+                                  uint64_t src_address,
+                                  const struct pvr_device_info *dev_info);
+
+void pvr_pds_setup_doutu(struct pvr_pds_usc_task_control *usc_task_control,
+                         uint64_t execution_address,
+                         uint32_t usc_temps,
+                         uint32_t sample_rate,
+                         uint32_t phase_rate_change);
+
+/* Pixel */
+#define pvr_pds_set_sizes_pixel_shader(X) \
+   pvr_pds_kick_usc(X, NULL, 0, false, PDS_GENERATE_SIZES)
+#define pvr_pds_generate_pixel_shader_program(X, Y) \
+   pvr_pds_kick_usc(X, Y, 0, false, PDS_GENERATE_CODEDATA_SEGMENTS)
+
+#define pvr_pds_generate_VDM_sync_program(X, Y) \
+   pvr_pds_kick_usc(X, Y, 0, false, PDS_GENERATE_CODEDATA_SEGMENTS)
+
+uint32_t *pvr_pds_generate_doutc(struct pvr_pds_fence_program *restrict program,
+                                 uint32_t *restrict buffer,
+                                 enum pvr_pds_generate_mode gen_mode);
+
+uint32_t *
+pvr_pds_generate_doutw(struct pvr_pds_doutw_control *restrict psControl,
+                       uint32_t *restrict buffer,
+                       enum pvr_pds_generate_mode gen_mode,
+                       const struct pvr_device_info *dev_info);
+
+uint32_t *pvr_pds_kick_usc(struct pvr_pds_kickusc_program *restrict program,
+                           uint32_t *restrict buffer,
+                           uint32_t start_next_constant,
+                           bool cc_enabled,
+                           enum pvr_pds_generate_mode gen_mode);
+
+/* Pixel Secondary */
+#define pvr_pds_set_sizes_pixel_shader_sa_uniform_data(X, Y)     \
+   pvr_pds_pixel_shader_uniform_texture_data(X,                  \
+                                             NULL,               \
+                                             PDS_GENERATE_SIZES, \
+                                             true,               \
+                                             Y)
+#define pvr_pds_set_sizes_pixel_shader_sa_texture_data(X, Y)     \
+   pvr_pds_pixel_shader_uniform_texture_data(X,                  \
+                                             NULL,               \
+                                             PDS_GENERATE_SIZES, \
+                                             false,              \
+                                             Y)
+#define pvr_pds_set_sizes_pixel_shader_uniform_texture_code(X) \
+   pvr_pds_pixel_shader_uniform_texture_code(X, NULL, PDS_GENERATE_SIZES)
+
+#define pvr_pds_generate_pixel_shader_sa_texture_state_data(X, Y, Z)    \
+   pvr_pds_pixel_shader_uniform_texture_data(X,                         \
+                                             Y,                         \
+                                             PDS_GENERATE_DATA_SEGMENT, \
+                                             false,                     \
+                                             Z)
+
+#define pvr_pds_generate_pixel_shader_sa_code_segment(X, Y) \
+   pvr_pds_pixel_shader_uniform_texture_code(X, Y, PDS_GENERATE_CODE_SEGMENT)
+
+uint32_t *pvr_pds_pixel_shader_uniform_texture_data(
+   struct pvr_pds_pixel_shader_sa_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   bool uniform,
+   const struct pvr_device_info *dev_info);
+
+uint32_t *pvr_pds_pixel_shader_uniform_texture_code(
+   struct pvr_pds_pixel_shader_sa_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode);
+
+/* Vertex */
+#define pvr_pds_set_sizes_vertex_shader(X, Y) \
+   pvr_pds_vertex_shader(X, NULL, PDS_GENERATE_SIZES, Y)
+
+#define pvr_pds_generate_vertex_shader_data_segment(X, Y, Z) \
+   pvr_pds_vertex_shader(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
+
+#define pvr_pds_generate_vertex_shader_code_segment(X, Y, Z) \
+   pvr_pds_vertex_shader(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
+
+uint32_t *
+pvr_pds_vertex_shader(struct pvr_pds_vertex_shader_program *restrict program,
+                      uint32_t *restrict buffer,
+                      enum pvr_pds_generate_mode gen_mode,
+                      const struct pvr_device_info *dev_info);
+
+/* Compute */
+uint32_t *
+pvr_pds_compute_shader(struct pvr_pds_compute_shader_program *restrict program,
+                       uint32_t *restrict buffer,
+                       enum pvr_pds_generate_mode gen_mode,
+                       const struct pvr_device_info *dev_info);
+
+#define pvr_pds_set_sizes_compute_shader(X, Y) \
+   pvr_pds_compute_shader(X, NULL, PDS_GENERATE_SIZES, Y)
+
+#define pvr_pds_generate_compute_shader_data_segment(X, Y, Z) \
+   pvr_pds_compute_shader(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
+
+#define pvr_pds_generate_compute_shader_code_segment(X, Y, Z) \
+   pvr_pds_compute_shader(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
+
+/* Vertex Secondary */
+#define pvr_pds_set_sizes_vertex_shader_sa(X, Y) \
+   pvr_pds_vertex_shader_sa(X, NULL, PDS_GENERATE_SIZES, Y)
+
+#define pvr_pds_generate_vertex_shader_sa_data_segment(X, Y, Z) \
+   pvr_pds_vertex_shader_sa(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
+
+#define pvr_pds_generate_vertex_shader_sa_code_segment(X, Y, Z) \
+   pvr_pds_vertex_shader_sa(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
+
+uint32_t *pvr_pds_vertex_shader_sa(
+   struct pvr_pds_vertex_shader_sa_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info);
+
+/* Pixel Event */
+#define pvr_pds_set_sizes_pixel_event(X) \
+   pvr_pds_generate_pixel_event(X, NULL, PDS_GENERATE_SIZES, NULL)
+
+#define pvr_pds_generate_pixel_event_data_segment(X, Y, Z) \
+   pvr_pds_generate_pixel_event(X, Y, PDS_GENERATE_DATA_SEGMENT, Z)
+
+#define pvr_pds_generate_pixel_event_code_segment(X, Y, Z) \
+   pvr_pds_generate_pixel_event(X, Y, PDS_GENERATE_CODE_SEGMENT, Z)
+
+uint32_t *
+pvr_pds_generate_pixel_event(struct pvr_pds_event_program *restrict program,
+                             uint32_t *restrict buffer,
+                             enum pvr_pds_generate_mode gen_mode,
+                             const struct pvr_device_info *dev_info);
+
+/* Coefficient Loading */
+#define pvr_pds_set_sizes_coeff_loading(X) \
+   pvr_pds_coefficient_loading(X, NULL, PDS_GENERATE_SIZES)
+
+#define pvr_pds_generate_coeff_loading_program(X, Y) \
+   pvr_pds_coefficient_loading(X, Y, PDS_GENERATE_CODE_SEGMENT)
+
+uint32_t *pvr_pds_coefficient_loading(
+   struct pvr_pds_coeff_loading_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode);
+
+/* Compute DM barrier-specific conditional code */
+uint32_t *pvr_pds_generate_compute_barrier_conditional(
+   uint32_t *buffer,
+   enum pvr_pds_generate_mode gen_mode);
+
+/* Shared register storing */
+uint32_t *pvr_pds_generate_shared_storing_program(
+   struct pvr_pds_shared_storing_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info);
+
+/*Shared register loading */
+uint32_t *pvr_pds_generate_fence_terminate_program(
+   struct pvr_pds_fence_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info);
+
+/* CDM Shared register loading */
+uint32_t *pvr_pds_generate_compute_shared_loading_program(
+   struct pvr_pds_shared_storing_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info);
+
+/* Stream out */
+uint32_t *pvr_pds_generate_stream_out_init_program(
+   struct pvr_pds_stream_out_init_program *restrict program,
+   uint32_t *restrict buffer,
+   bool store_mode,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info);
+
+uint32_t *pvr_pds_generate_stream_out_terminate_program(
+   struct pvr_pds_stream_out_terminate_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info);
+
+/* Structure representing DrawIndirect PDS programs. */
+struct pvr_pds_drawindirect_program {
+   /* --- Input to pvr_pds_drawindirect_program --- */
+
+   /* Address of the index list block in the VDM control stream.
+    * This must point to a 128-bit aligned index list header.
+    */
+   uint64_t index_list_addr_buffer;
+   /* Address of arguments for Draw call. Layout is defined by eArgFormat. */
+   uint64_t arg_buffer;
+
+   /* Address of index buffer. */
+   uint64_t index_buffer;
+
+   /* The raw (without addr msb in [7:0]) index block header. */
+   uint32_t index_block_header;
+
+   /* Number of bytes per index. */
+   uint32_t index_stride;
+
+   /* Used during/after compilation to fill in constant buffer. */
+   struct pvr_psc_register data[32];
+
+   /* Results of compilation. */
+   struct pvr_psc_program_output program;
+
+   /* This is used for ARB_multi_draw_indirect. */
+   unsigned int count;
+   unsigned int stride;
+
+   /* Internal stuff. */
+   unsigned int num_views;
+
+   bool support_base_instance;
+   bool increment_draw_id;
+};
+
+void pvr_pds_generate_draw_arrays_indirect(
+   struct pvr_pds_drawindirect_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info);
+void pvr_pds_generate_draw_elements_indirect(
+   struct pvr_pds_drawindirect_program *restrict program,
+   uint32_t *restrict buffer,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info);
+
+uint64_t pvr_pds_encode_st_src0(uint64_t src,
+                                uint64_t count4,
+                                uint64_t dst_add,
+                                bool write_through,
+                                const struct pvr_device_info *dev_info);
+
+uint64_t pvr_pds_encode_ld_src0(uint64_t dest,
+                                uint64_t count8,
+                                uint64_t src_add,
+                                bool cached,
+                                const struct pvr_device_info *dev_info);
+
+uint32_t *pvr_pds_generate_single_ldst_instruction(
+   bool ld,
+   const struct pvr_pds_ldst_control *control,
+   uint32_t temp_index,
+   uint64_t address,
+   uint32_t count,
+   uint32_t *next_constant,
+   uint32_t *total_data_size,
+   uint32_t *total_code_size,
+   uint32_t *buffer,
+   bool data_fence,
+   enum pvr_pds_generate_mode gen_mode,
+   const struct pvr_device_info *dev_info);
+struct pvr_pds_descriptor_set {
+   unsigned int descriptor_set; /* id of the descriptor set. */
+   unsigned int size_in_dwords; /* Number of dwords to transfer. */
+   unsigned int destination; /* Destination shared register to which
+                              * descriptor entries should be loaded.
+                              */
+   bool primary; /* Primary or secondary? */
+   unsigned int offset_in_dwords; /* Offset from the start of the descriptor
+                                   * set to start DMA'ing from.
+                                   */
+};
+
+#define PVR_BUFFER_TYPE_UBO (0)
+#define PVR_BUFFER_TYPES_COMPILE_TIME (1)
+#define PVR_BUFFER_TYPE_BLEND_CONSTS (2)
+#define PVR_BUFFER_TYPE_PUSH_CONSTS (3)
+#define PVR_BUFFER_TYPES_BUFFER_LENGTHS (4)
+#define PVR_BUFFER_TYPE_DYNAMIC (5)
+#define PVR_BUFFER_TYPES_UBO_ZEROING (6)
+#define PVR_BUFFER_TYPE_INVALID (~0)
+
+struct pvr_pds_buffer {
+   uint16_t type;
+
+   uint16_t size_in_dwords;
+   uint32_t destination;
+
+   union {
+      uint32_t *data;
+      struct {
+         uint32_t buffer_id;
+         uint16_t desc_set;
+         uint16_t binding;
+         uint32_t source_offset;
+      };
+   };
+};
+
+#define PVR_PDS_MAX_BUFFERS (24)
+
+struct pvr_descriptor_program_input {
+   /* User-specified descriptor sets. */
+   unsigned int descriptor_set_count;
+   struct pvr_pds_descriptor_set descriptor_sets[8];
+
+   /* "State" buffers, including:
+    * compile-time constants
+    * blend constants
+    * push constants
+    * UBOs that have been hoisted.
+    */
+   uint32_t buffer_count;
+   struct pvr_pds_buffer buffers[PVR_PDS_MAX_BUFFERS];
+
+   uint32_t blend_constants_used_mask;
+
+   bool secondary_program_present;
+   struct pvr_pds_usc_task_control secondary_task_control;
+
+   bool must_not_be_empty;
+};
+
+#define PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED BITFIELD_BIT(0U)
+#define PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED BITFIELD_BIT(1U)
+#define PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT BITFIELD_BIT(2U)
+#define PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT BITFIELD_BIT(3U)
+#define PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED BITFIELD_BIT(4U)
+
+/* BaseVertex is used in shader. */
+#define PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED BITFIELD_BIT(5U)
+
+#define PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED BITFIELD_BIT(6U)
+
+#define PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE BITFIELD_BIT(0U)
+
+struct pvr_pds_vertex_dma {
+   /* Try and keep this structure packing as small as possible. */
+   uint16_t offset;
+   uint16_t stride;
+
+   uint8_t flags;
+   uint8_t size_in_dwords;
+   uint8_t component_size_in_bytes;
+   uint8_t destination;
+   uint8_t binding_index;
+   uint32_t divisor;
+
+   uint16_t robustness_buffer_offset;
+};
+
+struct pvr_pds_vertex_primary_program_input {
+   /* Control for the DOUTU that kicks the vertex USC shader. */
+   struct pvr_pds_usc_task_control usc_task_control;
+   /* List of DMAs (of size dma_count). */
+   struct pvr_pds_vertex_dma *dma_list;
+   uint32_t dma_count;
+
+   /* ORd bitfield of PVR_PDS_VERTEX_FLAGS_* */
+   uint32_t flags;
+
+   uint16_t vertex_id_register;
+   uint16_t instance_id_register;
+
+   /* API provided baseInstance (i.e. not from drawIndirect). */
+   uint32_t base_instance;
+
+   uint16_t base_instance_register;
+   uint16_t base_vertex_register;
+   uint16_t draw_index_register;
+};
+
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_NULL (0)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL64 (1)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32 (2)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET (3)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER (4)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER (5)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS (6)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS (7)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_ROBUST_VERTEX_ATTRIBUTE_ADDRESS (8)
+
+/* Use if pds_ddmadt is enabled. */
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTR_DDMADT_OOB_BUFFER_SIZE (9)
+
+/* Use if pds_ddmadt is not enabled. */
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_MAX_INDEX (9)
+
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE (10)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER_ZEROING (11)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_VERTEX (12)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_WORKGROUP (13)
+#define PVR_PDS_CONST_MAP_ENTRY_TYPE_COND_RENDER (14)
+
+/* We pack all the following structs tightly into a buffer using += sizeof(x)
+ * offsets, this can lead to data that is not native aligned. Supplying the
+ * packed attribute indicates that unaligned accesses may be required, and the
+ * aligned attribute causes the size of the structure to be aligned to a
+ * specific boundary.
+ */
+#define PVR_ALIGNED __attribute__((packed, aligned(1)))
+
+struct pvr_const_map_entry {
+   uint8_t type;
+   uint8_t const_offset;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_literal32 {
+   uint8_t type;
+   uint8_t const_offset;
+
+   uint32_t literal_value;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_literal64 {
+   uint8_t type;
+   uint8_t const_offset;
+
+   uint64_t literal_value;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_descriptor_set {
+   uint8_t type;
+   uint8_t const_offset;
+
+   uint32_t descriptor_set;
+   PVR_PDS_BOOL primary;
+   uint32_t offset_in_dwords;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_constant_buffer {
+   uint8_t type;
+   uint8_t const_offset;
+
+   uint16_t buffer_id;
+   uint16_t desc_set;
+   uint16_t binding;
+   uint32_t offset;
+   uint32_t size_in_dwords;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_constant_buffer_zeroing {
+   uint8_t type;
+   uint8_t const_offset;
+
+   uint16_t buffer_id;
+   uint32_t offset;
+   uint32_t size_in_dwords;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_special_buffer {
+   uint8_t type;
+   uint8_t const_offset;
+
+   uint8_t buffer_type;
+   uint32_t buffer_index;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_doutu_address {
+   uint8_t type;
+   uint8_t const_offset;
+
+   uint64_t doutu_control;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_vertex_attribute_address {
+   uint8_t type;
+   uint8_t const_offset;
+
+   uint16_t offset;
+   uint16_t stride;
+   uint8_t binding_index;
+   uint8_t size_in_dwords;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_robust_vertex_attribute_address {
+   uint8_t type;
+   uint8_t const_offset;
+
+   uint16_t offset;
+   uint16_t stride;
+   uint8_t binding_index;
+   uint8_t size_in_dwords;
+   uint16_t robustness_buffer_offset;
+   uint8_t component_size_in_bytes;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_vertex_attribute_max_index {
+   uint8_t type;
+   uint8_t const_offset;
+
+   uint8_t binding_index;
+   uint8_t size_in_dwords;
+   uint16_t offset;
+   uint16_t stride;
+   uint8_t component_size_in_bytes;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_base_instance {
+   uint8_t type;
+   uint8_t const_offset;
+} PVR_ALIGNED;
+
+struct pvr_const_map_entry_base_vertex {
+   uint8_t type;
+   uint8_t const_offset;
+};
+
+struct pvr_pds_const_map_entry_base_workgroup {
+   uint8_t type;
+   uint8_t const_offset;
+   uint8_t workgroup_component;
+} PVR_ALIGNED;
+
+struct pvr_pds_const_map_entry_vertex_attr_ddmadt_oob_buffer_size {
+   uint8_t type;
+   uint8_t const_offset;
+   uint8_t binding_index;
+} PVR_ALIGNED;
+
+struct pvr_pds_const_map_entry_cond_render {
+   uint8_t type;
+   uint8_t const_offset;
+
+   uint32_t cond_render_pred_temp;
+} PVR_ALIGNED;
+
+struct pvr_pds_info {
+   uint32_t temps_required;
+   uint32_t code_size_in_dwords;
+   uint32_t data_size_in_dwords;
+
+   uint32_t entry_count;
+   size_t entries_size_in_bytes;
+   size_t entries_written_size_in_bytes;
+   struct pvr_const_map_entry *entries;
+};
+
+void pvr_pds_generate_descriptor_upload_program(
+   struct pvr_descriptor_program_input *input_program,
+   uint32_t *code_section,
+   struct pvr_pds_info *info);
+void pvr_pds_generate_vertex_primary_program(
+   struct pvr_pds_vertex_primary_program_input *input_program,
+   uint32_t *code,
+   struct pvr_pds_info *info,
+   bool use_robust_vertex_fetch,
+   const struct pvr_device_info *dev_info);
+
+/**
+ * Generate USC address.
+ *
+ * \param doutu Location to write the generated address.
+ * \param execution_address Address to generate from.
+ */
+static ALWAYS_INLINE void
+pvr_set_usc_execution_address64(uint64_t *doutu, uint64_t execution_address)
+{
+   doutu[0] |= (((execution_address >>
+                  PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_ALIGNSHIFT)
+                 << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_SHIFT) &
+                ~PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_CLRMSK);
+}
+
+#endif /* PVR_PDS_H */
diff --git a/src/imagination/vulkan/pds/pvr_pds_disasm.c b/src/imagination/vulkan/pds/pvr_pds_disasm.c
new file mode 100644 (file)
index 0000000..66ccb3e
--- /dev/null
@@ -0,0 +1,1134 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+#include "pvr_rogue_pds_defs.h"
+#include "pvr_rogue_pds_encode.h"
+#include "pvr_rogue_pds_disasm.h"
+#include "util/macros.h"
+
+static void pvr_error_check(PVR_ERR_CALLBACK err_callback,
+                            struct pvr_dissassembler_error error)
+{
+   if (err_callback)
+      err_callback(error);
+   else
+      fprintf(stderr, "ERROR: %s\n", error.text);
+}
+
+#define X(a) #a,
+static const char *const instructions[] = { PVR_INSTRUCTIONS };
+#undef X
+
+static void error_reg_range(uint32_t raw,
+                            void *context,
+                            PVR_ERR_CALLBACK err_callback,
+                            uint32_t parameter,
+                            struct pvr_dissassembler_error error)
+{
+   char param[32];
+
+   error.type = PVR_PDS_ERR_PARAM_RANGE;
+   error.instruction = error.instruction;
+   error.parameter = parameter;
+   error.raw = raw;
+
+   if (parameter == 0)
+      snprintf(param, sizeof(param), "dst");
+   else
+      snprintf(param, sizeof(param), "src%u", parameter - 1);
+
+   error.text = malloc(PVR_PDS_MAX_INST_STR_LEN);
+   assert(error.text);
+
+   snprintf(error.text,
+            PVR_PDS_MAX_INST_STR_LEN,
+            "Register out of range, instruction: %s, operand: %s, value: %u",
+            instructions[error.instruction],
+            param,
+            raw);
+   pvr_error_check(err_callback, error);
+}
+
+static struct pvr_operand *
+pvr_pds_disassemble_regs32(void *context,
+                           PVR_ERR_CALLBACK err_callback,
+                           struct pvr_dissassembler_error error,
+                           uint32_t instruction,
+                           uint32_t parameter)
+{
+   struct pvr_operand *op = calloc(1, sizeof(*op));
+   assert(op);
+
+   op->type = UNRESOLVED;
+   instruction &= PVR_ROGUE_PDSINST_REGS32_MASK;
+   switch (pvr_pds_inst_decode_field_range_regs32(instruction)) {
+   case PVR_ROGUE_PDSINST_REGS32_CONST32:
+      op->type = CONST32;
+      op->address = instruction - PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER;
+      op->absolute_address = op->address;
+      break;
+   case PVR_ROGUE_PDSINST_REGS32_TEMP32:
+      op->type = TEMP32;
+      op->address = instruction - PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER;
+      op->absolute_address = op->address;
+      break;
+   case PVR_ROGUE_PDSINST_REGS32_PTEMP32:
+      op->type = PTEMP32;
+      op->address = instruction - PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER;
+      op->absolute_address = op->address;
+      break;
+   default:
+      error_reg_range(instruction, context, err_callback, parameter, error);
+   }
+   return op;
+}
+static struct pvr_operand *
+pvr_pds_disassemble_regs32tp(void *context,
+                             PVR_ERR_CALLBACK err_callback,
+                             struct pvr_dissassembler_error error,
+                             uint32_t instruction,
+                             uint32_t parameter)
+{
+   struct pvr_operand *op = calloc(1, sizeof(*op));
+   assert(op);
+
+   op->type = UNRESOLVED;
+   instruction &= PVR_ROGUE_PDSINST_REGS32TP_MASK;
+   switch (pvr_pds_inst_decode_field_range_regs32tp(instruction)) {
+   case PVR_ROGUE_PDSINST_REGS32TP_TEMP32:
+      op->type = TEMP32;
+      op->address = instruction - PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER;
+      op->absolute_address = op->address;
+      break;
+   case PVR_ROGUE_PDSINST_REGS32TP_PTEMP32:
+      op->type = PTEMP32;
+      op->address = instruction - PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER;
+      op->absolute_address = op->address;
+      break;
+   default:
+      error_reg_range(instruction, context, err_callback, parameter, error);
+   }
+   return op;
+}
+static struct pvr_operand *
+pvr_pds_disassemble_regs32t(void *context,
+                            PVR_ERR_CALLBACK err_callback,
+                            struct pvr_dissassembler_error error,
+                            uint32_t instruction,
+                            uint32_t parameter)
+{
+   struct pvr_operand *op = calloc(1, sizeof(*op));
+   assert(op);
+
+   op->type = UNRESOLVED;
+   instruction &= PVR_ROGUE_PDSINST_REGS32T_MASK;
+   switch (pvr_pds_inst_decode_field_range_regs32t(instruction)) {
+   case PVR_ROGUE_PDSINST_REGS32T_TEMP32:
+      op->type = TEMP32;
+      op->address = instruction - PVR_ROGUE_PDSINST_REGS32T_TEMP32_LOWER;
+      op->absolute_address = op->address;
+      break;
+   default:
+      error_reg_range(instruction, context, err_callback, parameter, error);
+   }
+   return op;
+}
+
+static struct pvr_operand *
+pvr_pds_disassemble_regs64(void *context,
+                           PVR_ERR_CALLBACK err_callback,
+                           struct pvr_dissassembler_error error,
+                           uint32_t instruction,
+                           uint32_t parameter)
+{
+   struct pvr_operand *op = calloc(1, sizeof(*op));
+   assert(op);
+
+   op->type = UNRESOLVED;
+   instruction &= PVR_ROGUE_PDSINST_REGS64_MASK;
+   switch (pvr_pds_inst_decode_field_range_regs64(instruction)) {
+   case PVR_ROGUE_PDSINST_REGS64_CONST64:
+      op->type = CONST64;
+      op->address = instruction - PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER;
+      op->absolute_address = op->address * 2;
+      break;
+   case PVR_ROGUE_PDSINST_REGS64_TEMP64:
+      op->type = TEMP64;
+      op->address = instruction - PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER;
+      op->absolute_address = op->address * 2;
+      break;
+   case PVR_ROGUE_PDSINST_REGS64_PTEMP64:
+      op->type = PTEMP64;
+      op->address = instruction - PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER;
+      op->absolute_address = op->address * 2;
+      break;
+   default:
+      error_reg_range(instruction, context, err_callback, parameter, error);
+   }
+
+   return op;
+}
+static struct pvr_operand *
+pvr_pds_disassemble_regs64t(void *context,
+                            PVR_ERR_CALLBACK err_callback,
+                            struct pvr_dissassembler_error error,
+                            uint32_t instruction,
+                            uint32_t parameter)
+{
+   struct pvr_operand *op = calloc(1, sizeof(*op));
+   assert(op);
+
+   op->type = UNRESOLVED;
+   instruction &= PVR_ROGUE_PDSINST_REGS64T_MASK;
+   switch (pvr_pds_inst_decode_field_range_regs64tp(instruction)) {
+   case PVR_ROGUE_PDSINST_REGS64T_TEMP64:
+      op->type = TEMP64;
+      op->address = instruction - PVR_ROGUE_PDSINST_REGS64T_TEMP64_LOWER;
+      op->absolute_address = op->address * 2;
+      break;
+   default:
+      error_reg_range(instruction, context, err_callback, parameter, error);
+   }
+   return op;
+}
+
+static struct pvr_operand *
+pvr_pds_disassemble_regs64C(void *context,
+                            PVR_ERR_CALLBACK err_callback,
+                            struct pvr_dissassembler_error error,
+                            uint32_t instruction,
+                            uint32_t parameter)
+{
+   struct pvr_operand *op = calloc(1, sizeof(*op));
+   assert(op);
+
+   op->type = UNRESOLVED;
+   instruction &= PVR_ROGUE_PDSINST_REGS64C_MASK;
+   switch (pvr_rogue_pds_inst_decode_field_range_regs64c(instruction)) {
+   case PVR_ROGUE_PDSINST_REGS64C_CONST64:
+      op->type = CONST64;
+      op->address = instruction - PVR_ROGUE_PDSINST_REGS64C_CONST64_LOWER;
+      op->absolute_address = op->address * 2;
+      break;
+   default:
+      error_reg_range(instruction, context, err_callback, parameter, error);
+   }
+   return op;
+}
+
+static struct pvr_operand *
+pvr_pds_disassemble_regs64tp(void *context,
+                             PVR_ERR_CALLBACK err_callback,
+                             struct pvr_dissassembler_error error,
+                             uint32_t instruction,
+                             uint32_t parameter)
+{
+   struct pvr_operand *op = calloc(1, sizeof(*op));
+   assert(op);
+
+   op->type = UNRESOLVED;
+   instruction &= PVR_ROGUE_PDSINST_REGS64TP_MASK;
+   switch (pvr_pds_inst_decode_field_range_regs64tp(instruction)) {
+   case PVR_ROGUE_PDSINST_REGS64TP_TEMP64:
+      op->type = TEMP64;
+      op->address = instruction - PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER;
+      op->absolute_address = op->address * 2;
+      break;
+   case PVR_ROGUE_PDSINST_REGS64TP_PTEMP64:
+      op->type = PTEMP64;
+      op->address = instruction - PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER;
+      op->absolute_address = op->address * 2;
+      break;
+   default:
+      error_reg_range(instruction, context, err_callback, parameter, error);
+   }
+   return op;
+}
+
+#define PVR_TYPE_OPCODE BITFIELD_BIT(31U)
+#define PVR_TYPE_OPCODE_SP BITFIELD_BIT(27U)
+#define PVR_TYPE_OPCODEB BITFIELD_BIT(30U)
+
+#define PVR_TYPE_OPCODE_SHIFT 28U
+#define PVR_TYPE_OPCODE_SP_SHIFT 23U
+#define PVR_TYPE_OPCODEB_SHIFT 29U
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_add64(void *context,
+                                      PVR_ERR_CALLBACK err_callback,
+                                      struct pvr_dissassembler_error error,
+                                      uint32_t instruction)
+{
+   struct pvr_add *add = malloc(sizeof(*add));
+   assert(add);
+
+   add->instruction.type = INS_ADD64;
+   add->instruction.next = NULL;
+
+   add->cc = instruction & PVR_ROGUE_PDSINST_ADD64_CC_ENABLE;
+   add->alum = instruction & PVR_ROGUE_PDSINST_ADD64_ALUM_SIGNED;
+   add->sna = instruction & PVR_ROGUE_PDSINST_ADD64_SNA_SUB;
+
+   add->src0 = pvr_pds_disassemble_regs64(context,
+                                          err_callback,
+                                          error,
+                                          instruction >>
+                                             PVR_ROGUE_PDSINST_ADD64_SRC0_SHIFT,
+                                          1);
+   add->src0->instruction = &add->instruction;
+   add->src1 = pvr_pds_disassemble_regs64(context,
+                                          err_callback,
+                                          error,
+                                          instruction >>
+                                             PVR_ROGUE_PDSINST_ADD64_SRC1_SHIFT,
+                                          2);
+   add->src1->instruction = &add->instruction;
+   add->dst = pvr_pds_disassemble_regs64tp(context,
+                                           err_callback,
+                                           error,
+                                           instruction >>
+                                              PVR_ROGUE_PDSINST_ADD64_DST_SHIFT,
+                                           0);
+   add->dst->instruction = &add->instruction;
+
+   return &add->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_add32(void *context,
+                                      PVR_ERR_CALLBACK err_callback,
+                                      struct pvr_dissassembler_error error,
+                                      uint32_t instruction)
+{
+   struct pvr_add *add = malloc(sizeof(*add));
+   assert(add);
+
+   add->instruction.type = INS_ADD32;
+   add->instruction.next = NULL;
+
+   add->cc = instruction & PVR_ROGUE_PDSINST_ADD32_CC_ENABLE;
+   add->alum = instruction & PVR_ROGUE_PDSINST_ADD32_ALUM_SIGNED;
+   add->sna = instruction & PVR_ROGUE_PDSINST_ADD32_SNA_SUB;
+
+   add->src0 = pvr_pds_disassemble_regs32(context,
+                                          err_callback,
+                                          error,
+                                          instruction >>
+                                             PVR_ROGUE_PDSINST_ADD32_SRC0_SHIFT,
+                                          1);
+   add->src0->instruction = &add->instruction;
+   add->src1 = pvr_pds_disassemble_regs32(context,
+                                          err_callback,
+                                          error,
+                                          instruction >>
+                                             PVR_ROGUE_PDSINST_ADD32_SRC1_SHIFT,
+                                          2);
+   add->src1->instruction = &add->instruction;
+   add->dst = pvr_pds_disassemble_regs32tp(context,
+                                           err_callback,
+                                           error,
+                                           instruction >>
+                                              PVR_ROGUE_PDSINST_ADD32_DST_SHIFT,
+                                           0);
+   add->dst->instruction = &add->instruction;
+
+   return &add->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_stm(void *context,
+                                    PVR_ERR_CALLBACK err_callback,
+                                    struct pvr_dissassembler_error error,
+                                    uint32_t instruction)
+{
+   struct pvr_stm *stm = malloc(sizeof(*stm));
+   assert(stm);
+
+   stm->instruction.next = NULL;
+   stm->instruction.type = INS_STM;
+
+   stm->cc = instruction & (1 << PVR_ROGUE_PDSINST_STM_CCS_CCS_CC_SHIFT);
+   stm->ccs_global = instruction &
+                     (1 << PVR_ROGUE_PDSINST_STM_CCS_CCS_GLOBAL_SHIFT);
+   stm->ccs_so = instruction & (1 << PVR_ROGUE_PDSINST_STM_CCS_CCS_SO_SHIFT);
+   stm->tst = instruction & (1 << PVR_ROGUE_PDSINST_STM_SO_TST_SHIFT);
+
+   stm->stream_out = (instruction >> PVR_ROGUE_PDSINST_STM_SO_SHIFT) &
+                     PVR_ROGUE_PDSINST_SO_MASK;
+
+   stm->src0 = pvr_pds_disassemble_regs64tp(
+      context,
+      err_callback,
+      error,
+      instruction >> PVR_ROGUE_PDSINST_STM_SO_SRC0_SHIFT,
+      1);
+   stm->src0->instruction = &stm->instruction;
+
+   stm->src1 = pvr_pds_disassemble_regs64tp(
+      context,
+      err_callback,
+      error,
+      instruction >> PVR_ROGUE_PDSINST_STM_SO_SRC1_SHIFT,
+      2);
+   stm->src1->instruction = &stm->instruction;
+
+   stm->src2 = pvr_pds_disassemble_regs32(
+      context,
+      err_callback,
+      error,
+      instruction >> PVR_ROGUE_PDSINST_STM_SO_SRC2_SHIFT,
+      3);
+   stm->src2->instruction = &stm->instruction;
+
+   stm->src3 = pvr_pds_disassemble_regs64tp(
+      context,
+      err_callback,
+      error,
+      instruction >> PVR_ROGUE_PDSINST_STM_SO_SRC3_SHIFT,
+      4);
+   stm->src3->instruction = &stm->instruction;
+
+   return &stm->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_sftlp32(void *context,
+                                        PVR_ERR_CALLBACK err_callback,
+                                        struct pvr_dissassembler_error error,
+                                        uint32_t instruction)
+{
+   struct pvr_sftlp *ins = malloc(sizeof(*ins));
+   assert(ins);
+
+   ins->instruction.next = NULL;
+   ins->instruction.type = INS_SFTLP32;
+
+   ins->cc = instruction & PVR_ROGUE_PDSINST_SFTLP32_CC_ENABLE;
+   ins->IM = instruction & PVR_ROGUE_PDSINST_SFTLP32_IM_ENABLE;
+   ins->lop = (instruction >> PVR_ROGUE_PDSINST_SFTLP32_LOP_SHIFT) &
+              PVR_ROGUE_PDSINST_LOP_MASK;
+   ins->src0 = pvr_pds_disassemble_regs32t(
+      context,
+      err_callback,
+      error,
+      instruction >> PVR_ROGUE_PDSINST_SFTLP32_SRC0_SHIFT,
+      1);
+   ins->src0->instruction = &ins->instruction;
+   ins->src1 = pvr_pds_disassemble_regs32(
+      context,
+      err_callback,
+      error,
+      instruction >> PVR_ROGUE_PDSINST_SFTLP32_SRC1_SHIFT,
+      2);
+   ins->src1->instruction = &ins->instruction;
+   ins->dst = pvr_pds_disassemble_regs32t(
+      context,
+      err_callback,
+      error,
+      instruction >> PVR_ROGUE_PDSINST_SFTLP32_DST_SHIFT,
+      0);
+   ins->dst->instruction = &ins->instruction;
+
+   if (ins->IM) {
+      signed char cImmediate =
+         ((instruction >> PVR_ROGUE_PDSINST_SFTLP32_SRC2_SHIFT) &
+          PVR_ROGUE_PDSINST_REGS32_MASK)
+         << 2;
+      ins->src2 = calloc(1, sizeof(*ins->src2));
+      assert(ins->src2);
+
+      ins->src2->literal = abs((cImmediate / 4));
+      ins->src2->negate = cImmediate < 0;
+      ins->src2->instruction = &ins->instruction;
+   } else {
+      ins->src2 = pvr_pds_disassemble_regs32tp(
+         context,
+         err_callback,
+         error,
+         (instruction >> PVR_ROGUE_PDSINST_SFTLP32_SRC2_SHIFT),
+         3);
+      ins->src2->instruction = &ins->instruction;
+   }
+
+   return &ins->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_sftlp64(void *context,
+                                        PVR_ERR_CALLBACK err_callback,
+                                        struct pvr_dissassembler_error error,
+                                        uint32_t instruction)
+{
+   struct pvr_sftlp *ins = malloc(sizeof(*ins));
+   assert(ins);
+
+   ins->instruction.next = NULL;
+   ins->instruction.type = INS_SFTLP64;
+
+   ins->cc = instruction & PVR_ROGUE_PDSINST_SFTLP64_CC_ENABLE;
+   ins->IM = instruction & PVR_ROGUE_PDSINST_SFTLP64_IM_ENABLE;
+   ins->lop = (instruction >> PVR_ROGUE_PDSINST_SFTLP64_LOP_SHIFT) &
+              PVR_ROGUE_PDSINST_LOP_MASK;
+   ins->src0 = pvr_pds_disassemble_regs64tp(
+      context,
+      err_callback,
+      error,
+      instruction >> PVR_ROGUE_PDSINST_SFTLP64_SRC0_SHIFT,
+      1);
+   ins->src0->instruction = &ins->instruction;
+   ins->src1 = pvr_pds_disassemble_regs64tp(
+      context,
+      err_callback,
+      error,
+      instruction >> PVR_ROGUE_PDSINST_SFTLP64_SRC1_SHIFT,
+      2);
+   ins->src1->instruction = &ins->instruction;
+   ins->dst = pvr_pds_disassemble_regs64tp(
+      context,
+      err_callback,
+      error,
+      instruction >> PVR_ROGUE_PDSINST_SFTLP64_DST_SHIFT,
+      0);
+   ins->dst->instruction = &ins->instruction;
+
+   if (ins->IM) {
+      signed char cImmediate =
+         (instruction >> PVR_ROGUE_PDSINST_SFTLP64_SRC2_SHIFT) &
+         PVR_ROGUE_PDSINST_REGS32_MASK;
+      ins->src2 = calloc(1, sizeof(*ins->src2));
+      assert(ins->src2);
+
+      ins->src2->literal = (abs(cImmediate) > 63) ? 63 : abs(cImmediate);
+      ins->src2->negate = (cImmediate < 0);
+      ins->src2->instruction = &ins->instruction;
+   } else {
+      ins->src2 = pvr_pds_disassemble_regs32(
+         context,
+         err_callback,
+         error,
+         (instruction >> PVR_ROGUE_PDSINST_SFTLP64_SRC2_SHIFT),
+         3);
+      ins->src2->instruction = &ins->instruction;
+   }
+
+   return &ins->instruction;
+}
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_cmp(void *context,
+                                    PVR_ERR_CALLBACK err_callback,
+                                    struct pvr_dissassembler_error error,
+                                    uint32_t instruction)
+{
+   struct pvr_cmp *cmp = malloc(sizeof(*cmp));
+   assert(cmp);
+
+   cmp->instruction.next = NULL;
+   cmp->instruction.type = INS_CMP;
+   cmp->cc = instruction & PVR_ROGUE_PDSINST_CMP_CC_ENABLE;
+   cmp->IM = instruction & PVR_ROGUE_PDSINST_CMP_IM_ENABLE;
+   cmp->cop = instruction >> PVR_ROGUE_PDSINST_CMP_COP_SHIFT &
+              PVR_ROGUE_PDSINST_COP_MASK;
+   cmp->src0 = pvr_pds_disassemble_regs64tp(context,
+                                            err_callback,
+                                            error,
+                                            instruction >>
+                                               PVR_ROGUE_PDSINST_CMP_SRC0_SHIFT,
+                                            1);
+   cmp->src0->instruction = &cmp->instruction;
+
+   if (cmp->IM) {
+      uint32_t immediate = (instruction >> PVR_ROGUE_PDSINST_CMP_SRC1_SHIFT) &
+                           PVR_ROGUE_PDSINST_IMM16_MASK;
+      cmp->src1 = calloc(1, sizeof(*cmp->src1));
+      assert(cmp->src1);
+
+      cmp->src1->type = LITERAL_NUM;
+      cmp->src1->literal = immediate;
+   } else {
+      cmp->src1 = pvr_pds_disassemble_regs64(
+         context,
+         err_callback,
+         error,
+         instruction >> PVR_ROGUE_PDSINST_CMP_SRC1_SHIFT,
+         2);
+   }
+   cmp->src1->instruction = &cmp->instruction;
+
+   return &cmp->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_sp_ld_st(void *context,
+                                         PVR_ERR_CALLBACK err_callback,
+                                         struct pvr_dissassembler_error error,
+                                         bool ld,
+                                         uint32_t instruction,
+                                         bool cc)
+{
+   struct pvr_ldst *ins = malloc(sizeof(*ins));
+   assert(ins);
+
+   ins->instruction.next = NULL;
+   ins->instruction.type = ld ? INS_LD : INS_ST;
+
+   ins->cc = cc;
+   ins->src0 =
+      pvr_pds_disassemble_regs64(context,
+                                 err_callback,
+                                 error,
+                                 instruction >> PVR_ROGUE_PDSINST_LD_SRC0_SHIFT,
+                                 1);
+   ins->src0->instruction = &ins->instruction;
+   ins->st = !ld;
+
+   return &ins->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_sp_stmc(uint32_t instruction, bool cc)
+{
+   struct pvr_stmc *stmc = malloc(sizeof(*stmc));
+   assert(stmc);
+
+   stmc->instruction.next = NULL;
+   stmc->instruction.type = INS_STMC;
+
+   stmc->cc = cc;
+   stmc->src0 = calloc(1, sizeof(*stmc->src0));
+   assert(stmc->src0);
+
+   stmc->src0->type = LITERAL_NUM;
+   stmc->src0->literal = (instruction >> PVR_ROGUE_PDSINST_STMC_SOMASK_SHIFT) &
+                         PVR_ROGUE_PDSINST_SOMASK_MASK;
+   stmc->src0->instruction = &stmc->instruction;
+
+   return &stmc->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_sp_limm(void *context,
+                                        PVR_ERR_CALLBACK err_callback,
+                                        struct pvr_dissassembler_error error,
+                                        uint32_t instruction,
+                                        bool cc)
+{
+   struct pvr_limm *limm = malloc(sizeof(*limm));
+   assert(limm);
+   limm->instruction.next = NULL;
+   limm->instruction.type = INS_LIMM;
+
+   limm->cc = cc;
+   limm->GR = (instruction & PVR_ROGUE_PDSINST_LIMM_GR_ENABLE) != 0;
+   limm->src0 = calloc(1, sizeof(*limm->src0));
+   assert(limm->src0);
+
+   limm->src0->type = LITERAL_NUM;
+   limm->src0->literal = (instruction >> PVR_ROGUE_PDSINST_LIMM_SRC0_SHIFT) &
+                         PVR_ROGUE_PDSINST_IMM16_MASK;
+   limm->src0->instruction = &limm->instruction;
+   limm->dst = pvr_pds_disassemble_regs32t(context,
+                                           err_callback,
+                                           error,
+                                           instruction >>
+                                              PVR_ROGUE_PDSINST_LIMM_SRC1_SHIFT,
+                                           0);
+   limm->dst->instruction = &limm->instruction;
+
+   return &limm->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_simple(enum pvr_instruction_type type, bool cc)
+{
+   struct pvr_simple *ins = malloc(sizeof(*ins));
+   assert(ins);
+
+   ins->instruction.next = NULL;
+   ins->instruction.type = type;
+   ins->cc = cc;
+
+   return &ins->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_bra(uint32_t instruction)
+{
+   uint32_t branch_addr;
+   struct pvr_bra *bra = (struct pvr_bra *)malloc(sizeof(*bra));
+   assert(bra);
+
+   bra->instruction.type = INS_BRA;
+   bra->instruction.next = NULL;
+
+   branch_addr = (instruction >> PVR_ROGUE_PDSINST_BRA_ADDR_SHIFT) &
+                 PVR_ROGUE_PDSINST_BRAADDR_MASK;
+   bra->address = (branch_addr & 0x40000U) ? ((int)branch_addr) - 0x80000
+                                           : (int)branch_addr;
+
+   bra->srcc = malloc(sizeof(*bra->srcc));
+   assert(bra->srcc);
+
+   bra->srcc->predicate = (instruction >> PVR_ROGUE_PDSINST_BRA_SRCC_SHIFT) &
+                          PVR_ROGUE_PDSINST_PREDICATE_MASK;
+   bra->srcc->negate = instruction & PVR_ROGUE_PDSINST_BRA_NEG_ENABLE;
+
+   bra->setc = malloc(sizeof(*bra->setc));
+   assert(bra->setc);
+
+   bra->setc->predicate = (instruction >> PVR_ROGUE_PDSINST_BRA_SETC_SHIFT) &
+                          PVR_ROGUE_PDSINST_PREDICATE_MASK;
+
+   bra->target = NULL;
+
+   return &bra->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_sp(void *context,
+                                   PVR_ERR_CALLBACK err_callback,
+                                   struct pvr_dissassembler_error error,
+                                   uint32_t instruction)
+{
+   uint32_t op = (instruction >> PVR_TYPE_OPCODE_SP_SHIFT) &
+                 PVR_ROGUE_PDSINST_OPCODESP_MASK;
+   bool cc = instruction & PVR_TYPE_OPCODE_SP;
+
+   switch (op) {
+   case PVR_ROGUE_PDSINST_OPCODESP_LD:
+      error.instruction = INS_LD;
+      return pvr_pds_disassemble_instruction_sp_ld_st(
+         context,
+         err_callback,
+         error,
+         true,
+         instruction,
+         instruction & (1 << PVR_ROGUE_PDSINST_LD_CC_SHIFT));
+   case PVR_ROGUE_PDSINST_OPCODESP_ST:
+      error.instruction = INS_ST;
+      return pvr_pds_disassemble_instruction_sp_ld_st(
+         context,
+         err_callback,
+         error,
+         false,
+         instruction,
+         instruction & (1 << PVR_ROGUE_PDSINST_ST_CC_SHIFT));
+   case PVR_ROGUE_PDSINST_OPCODESP_STMC:
+      error.instruction = INS_STMC;
+      return pvr_pds_disassemble_instruction_sp_stmc(instruction, cc);
+   case PVR_ROGUE_PDSINST_OPCODESP_LIMM:
+      error.instruction = INS_LIMM;
+      return pvr_pds_disassemble_instruction_sp_limm(context,
+                                                     err_callback,
+                                                     error,
+                                                     instruction,
+                                                     cc);
+   case PVR_ROGUE_PDSINST_OPCODESP_WDF:
+      error.instruction = INS_WDF;
+      return pvr_pds_disassemble_simple(INS_WDF, cc);
+   case PVR_ROGUE_PDSINST_OPCODESP_LOCK:
+      error.instruction = INS_LOCK;
+      return pvr_pds_disassemble_simple(INS_LOCK, cc);
+   case PVR_ROGUE_PDSINST_OPCODESP_RELEASE:
+      error.instruction = INS_RELEASE;
+      return pvr_pds_disassemble_simple(INS_RELEASE, cc);
+   case PVR_ROGUE_PDSINST_OPCODESP_HALT:
+      error.instruction = INS_HALT;
+      return pvr_pds_disassemble_simple(INS_HALT, cc);
+   case PVR_ROGUE_PDSINST_OPCODESP_NOP:
+      error.instruction = INS_NOP;
+      return pvr_pds_disassemble_simple(INS_NOP, cc);
+   default:
+      error.type = PVR_PDS_ERR_SP_UNKNOWN;
+      error.text = "opcode unknown for special instruction";
+      pvr_error_check(err_callback, error);
+      return NULL;
+   }
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_ddmad(void *context,
+                                      PVR_ERR_CALLBACK err_callback,
+                                      struct pvr_dissassembler_error error,
+                                      uint32_t instruction)
+{
+   struct pvr_ddmad *ddmad = malloc(sizeof(*ddmad));
+   assert(ddmad);
+
+   ddmad->instruction.next = NULL;
+   ddmad->instruction.type = INS_DDMAD;
+
+   ddmad->cc = instruction & PVR_ROGUE_PDSINST_DDMAD_CC_ENABLE;
+   ddmad->END = instruction & PVR_ROGUE_PDSINST_DDMAD_END_ENABLE;
+
+   ddmad->src0 = pvr_pds_disassemble_regs32(
+      context,
+      err_callback,
+      error,
+      instruction >> PVR_ROGUE_PDSINST_DDMAD_SRC0_SHIFT,
+      1);
+   ddmad->src0->instruction = &ddmad->instruction;
+
+   ddmad->src1 = pvr_pds_disassemble_regs32t(
+      context,
+      err_callback,
+      error,
+      instruction >> PVR_ROGUE_PDSINST_DDMAD_SRC1_SHIFT,
+      2);
+   ddmad->src1->instruction = &ddmad->instruction;
+
+   ddmad->src2 = pvr_pds_disassemble_regs64(
+      context,
+      err_callback,
+      error,
+      instruction >> PVR_ROGUE_PDSINST_DDMAD_SRC2_SHIFT,
+      3);
+   ddmad->src2->instruction = &ddmad->instruction;
+
+   ddmad->src3 = pvr_pds_disassemble_regs64C(
+      context,
+      err_callback,
+      error,
+      instruction >> PVR_ROGUE_PDSINST_DDMAD_SRC3_SHIFT,
+      4);
+   ddmad->src3->instruction = &ddmad->instruction;
+
+   return &ddmad->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_mad(void *context,
+                                    PVR_ERR_CALLBACK err_callback,
+                                    struct pvr_dissassembler_error error,
+                                    uint32_t instruction)
+{
+   struct pvr_mad *mad = malloc(sizeof(*mad));
+   assert(mad);
+
+   mad->instruction.next = NULL;
+   mad->instruction.type = INS_MAD;
+
+   mad->cc = instruction & PVR_ROGUE_PDSINST_MAD_CC_ENABLE;
+   mad->sna = instruction & PVR_ROGUE_PDSINST_MAD_SNA_SUB;
+   mad->alum = (instruction & PVR_ROGUE_PDSINST_MAD_ALUM_SIGNED);
+
+   mad->src0 = pvr_pds_disassemble_regs32(context,
+                                          err_callback,
+                                          error,
+                                          instruction >>
+                                             PVR_ROGUE_PDSINST_MAD_SRC0_SHIFT,
+                                          1);
+   mad->src0->instruction = &mad->instruction;
+
+   mad->src1 = pvr_pds_disassemble_regs32(context,
+                                          err_callback,
+                                          error,
+                                          instruction >>
+                                             PVR_ROGUE_PDSINST_MAD_SRC1_SHIFT,
+                                          2);
+   mad->src1->instruction = &mad->instruction;
+
+   mad->src2 = pvr_pds_disassemble_regs64(context,
+                                          err_callback,
+                                          error,
+                                          instruction >>
+                                             PVR_ROGUE_PDSINST_MAD_SRC2_SHIFT,
+                                          3);
+   mad->src2->instruction = &mad->instruction;
+
+   mad->dst = pvr_pds_disassemble_regs64t(context,
+                                          err_callback,
+                                          error,
+                                          instruction >>
+                                             PVR_ROGUE_PDSINST_MAD_DST_SHIFT,
+                                          0);
+   mad->dst->instruction = &mad->instruction;
+
+   return &mad->instruction;
+}
+
+static struct pvr_instruction *
+pvr_pds_disassemble_instruction_dout(void *context,
+                                     PVR_ERR_CALLBACK err_callback,
+                                     struct pvr_dissassembler_error error,
+                                     uint32_t instruction)
+{
+   struct pvr_dout *dout = malloc(sizeof(*dout));
+   assert(dout);
+
+   dout->instruction.next = NULL;
+   dout->instruction.type = INS_DOUT;
+
+   dout->END = instruction & PVR_ROGUE_PDSINST_DOUT_END_ENABLE;
+   dout->cc = instruction & PVR_ROGUE_PDSINST_DOUT_CC_ENABLE;
+   dout->dst = (instruction >> PVR_ROGUE_PDSINST_DOUT_DST_SHIFT) &
+               PVR_ROGUE_PDSINST_DSTDOUT_MASK;
+
+   dout->src0 = pvr_pds_disassemble_regs64(context,
+                                           err_callback,
+                                           error,
+                                           instruction >>
+                                              PVR_ROGUE_PDSINST_DOUT_SRC0_SHIFT,
+                                           1);
+   dout->src0->instruction = &dout->instruction;
+
+   dout->src1 = pvr_pds_disassemble_regs32(context,
+                                           err_callback,
+                                           error,
+                                           instruction >>
+                                              PVR_ROGUE_PDSINST_DOUT_SRC1_SHIFT,
+                                           2);
+   dout->src1->instruction = &dout->instruction;
+
+   return &dout->instruction;
+}
+
+static void pvr_pds_free_instruction_limm(struct pvr_limm *inst)
+{
+   free(inst->dst);
+   free(inst->src0);
+   free(inst);
+}
+
+static void pvr_pds_free_instruction_add(struct pvr_add *inst)
+{
+   free(inst->dst);
+   free(inst->src0);
+   free(inst->src1);
+   free(inst);
+}
+
+static void pvr_pds_free_instruction_cmp(struct pvr_cmp *inst)
+{
+   free(inst->src0);
+   free(inst->src1);
+   free(inst);
+}
+
+static void pvr_pds_free_instruction_mad(struct pvr_mad *inst)
+{
+   free(inst->dst);
+   free(inst->src0);
+   free(inst->src1);
+   free(inst->src2);
+   free(inst);
+}
+
+static void pvr_pds_free_instruction_bra(struct pvr_bra *inst)
+{
+   free(inst->setc);
+   free(inst->srcc);
+   free(inst);
+}
+
+static void pvr_pds_free_instruction_ddmad(struct pvr_ddmad *inst)
+{
+   free(inst->src0);
+   free(inst->src1);
+   free(inst->src2);
+   free(inst->src3);
+   free(inst);
+}
+
+static void pvr_pds_free_instruction_dout(struct pvr_dout *inst)
+{
+   free(inst->src0);
+   free(inst->src1);
+   free(inst);
+}
+
+static void pvr_pds_free_instruction_ldst(struct pvr_ldst *inst)
+{
+   free(inst->src0);
+   free(inst);
+}
+
+static void pvr_pds_free_instruction_simple(struct pvr_simple *inst)
+{
+   free(inst);
+}
+
+static void pvr_pds_free_instruction_sfltp(struct pvr_sftlp *inst)
+{
+   free(inst->dst);
+   free(inst->src0);
+   free(inst->src1);
+   free(inst->src2);
+   free(inst);
+}
+
+static void pvr_pds_free_instruction_stm(struct pvr_stm *inst)
+{
+   free(inst->src0);
+   free(inst->src1);
+   free(inst->src2);
+   free(inst->src3);
+   free(inst);
+}
+
+static void pvr_pds_free_instruction_stmc(struct pvr_stmc *inst)
+{
+   free(inst->src0);
+   free(inst);
+}
+
+void pvr_pds_free_instruction(struct pvr_instruction *instruction)
+{
+   if (!instruction)
+      return;
+
+   switch (instruction->type) {
+   case INS_LIMM:
+      pvr_pds_free_instruction_limm((struct pvr_limm *)instruction);
+      break;
+   case INS_ADD64:
+   case INS_ADD32:
+      pvr_pds_free_instruction_add((struct pvr_add *)instruction);
+      break;
+   case INS_CMP:
+      pvr_pds_free_instruction_cmp((struct pvr_cmp *)instruction);
+      break;
+   case INS_MAD:
+      pvr_pds_free_instruction_mad((struct pvr_mad *)instruction);
+      break;
+   case INS_BRA:
+      pvr_pds_free_instruction_bra((struct pvr_bra *)instruction);
+      break;
+   case INS_DDMAD:
+      pvr_pds_free_instruction_ddmad((struct pvr_ddmad *)instruction);
+      break;
+   case INS_DOUT:
+      pvr_pds_free_instruction_dout((struct pvr_dout *)instruction);
+      break;
+   case INS_LD:
+   case INS_ST:
+      pvr_pds_free_instruction_ldst((struct pvr_ldst *)instruction);
+      break;
+   case INS_WDF:
+   case INS_LOCK:
+   case INS_RELEASE:
+   case INS_HALT:
+   case INS_NOP:
+      pvr_pds_free_instruction_simple((struct pvr_simple *)instruction);
+      break;
+   case INS_SFTLP64:
+   case INS_SFTLP32:
+      pvr_pds_free_instruction_sfltp((struct pvr_sftlp *)instruction);
+      break;
+   case INS_STM:
+      pvr_pds_free_instruction_stm((struct pvr_stm *)instruction);
+      break;
+   case INS_STMC:
+      pvr_pds_free_instruction_stmc((struct pvr_stmc *)instruction);
+      break;
+   }
+}
+
+struct pvr_instruction *
+pvr_pds_disassemble_instruction2(void *context,
+                                 PVR_ERR_CALLBACK err_callback,
+                                 uint32_t instruction)
+{
+   struct pvr_dissassembler_error error = { .context = context };
+
+   /* First we need to find out what type of OPCODE we are dealing with. */
+   if (instruction & PVR_TYPE_OPCODE) {
+      uint32_t opcode_C = (instruction >> PVR_TYPE_OPCODE_SHIFT) &
+                          PVR_ROGUE_PDSINST_OPCODEC_MASK;
+      switch (opcode_C) {
+      case PVR_ROGUE_PDSINST_OPCODEC_ADD64:
+         error.instruction = INS_ADD64;
+         return pvr_pds_disassemble_instruction_add64(context,
+                                                      err_callback,
+                                                      error,
+                                                      instruction);
+      case PVR_ROGUE_PDSINST_OPCODEC_ADD32:
+         error.instruction = INS_ADD32;
+         return pvr_pds_disassemble_instruction_add32(context,
+                                                      err_callback,
+                                                      error,
+                                                      instruction);
+      case PVR_ROGUE_PDSINST_OPCODEC_SFTLP64:
+         error.instruction = INS_SFTLP64;
+         return pvr_pds_disassemble_instruction_sftlp64(context,
+                                                        err_callback,
+                                                        error,
+                                                        instruction);
+      case PVR_ROGUE_PDSINST_OPCODEC_CMP:
+         error.instruction = INS_CMP;
+         return pvr_pds_disassemble_instruction_cmp(context,
+                                                    err_callback,
+                                                    error,
+                                                    instruction);
+      case PVR_ROGUE_PDSINST_OPCODEC_BRA:
+         error.instruction = INS_BRA;
+         return pvr_pds_disassemble_instruction_bra(instruction);
+      case PVR_ROGUE_PDSINST_OPCODEC_SP:
+         return pvr_pds_disassemble_instruction_sp(context,
+                                                   err_callback,
+                                                   error,
+                                                   instruction);
+      case PVR_ROGUE_PDSINST_OPCODEC_DDMAD:
+         error.instruction = INS_DDMAD;
+         return pvr_pds_disassemble_instruction_ddmad(context,
+                                                      err_callback,
+                                                      error,
+                                                      instruction);
+      case PVR_ROGUE_PDSINST_OPCODEC_DOUT:
+         error.instruction = INS_DOUT;
+         return pvr_pds_disassemble_instruction_dout(context,
+                                                     err_callback,
+                                                     error,
+                                                     instruction);
+      }
+   } else if (instruction & PVR_TYPE_OPCODEB) {
+      uint32_t opcode_B = (instruction >> PVR_TYPE_OPCODEB_SHIFT) &
+                          PVR_ROGUE_PDSINST_OPCODEB_MASK;
+      switch (opcode_B) {
+      case PVR_ROGUE_PDSINST_OPCODEB_SFTLP32:
+         error.instruction = INS_SFTLP32;
+         return pvr_pds_disassemble_instruction_sftlp32(context,
+                                                        err_callback,
+                                                        error,
+                                                        instruction);
+      case PVR_ROGUE_PDSINST_OPCODEB_STM:
+         error.instruction = INS_STM;
+         return pvr_pds_disassemble_instruction_stm(context,
+                                                    err_callback,
+                                                    error,
+                                                    instruction);
+      }
+   } else { /* Opcode A - MAD instruction. */
+      error.instruction = INS_MAD;
+      return pvr_pds_disassemble_instruction_mad(context,
+                                                 err_callback,
+                                                 error,
+                                                 instruction);
+   }
+   return NULL;
+}
diff --git a/src/imagination/vulkan/pds/pvr_pds_printer.c b/src/imagination/vulkan/pds/pvr_pds_printer.c
new file mode 100644 (file)
index 0000000..59730c9
--- /dev/null
@@ -0,0 +1,666 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "pvr_rogue_pds_defs.h"
+#include "pvr_rogue_pds_disasm.h"
+#include "pvr_rogue_pds_encode.h"
+#include "util/log.h"
+
+#define X(lop, str) #str,
+static const char *const LOP[] = { PVR_PDS_LOP };
+#undef X
+
+static void pvr_pds_disassemble_operand(struct pvr_operand *op,
+                                        char *instr_str,
+                                        size_t instr_len)
+{
+#define X(enum, str, size) { #str, #size },
+   static const char *const regs[][2] = { PVR_PDS_OPERAND_TYPES };
+#undef X
+
+   if (op->type == LITERAL_NUM) {
+      snprintf(instr_str,
+               instr_len,
+               "%s (%llu)",
+               regs[op->type][0],
+               (unsigned long long)op->literal);
+   } else if (op->type == UNRESOLVED) {
+      snprintf(instr_str, instr_len, "UNRESOLVED");
+   } else {
+      snprintf(instr_str,
+               instr_len,
+               "%s[%u].%s",
+               regs[op->type][0],
+               op->absolute_address,
+               regs[op->type][1]);
+   }
+}
+
+static void pvr_pds_disassemble_instruction_add64(struct pvr_add *add,
+                                                  char *instr_str,
+                                                  size_t instr_len)
+{
+   char dst[32];
+   char src0[32];
+   char src1[32];
+
+   pvr_pds_disassemble_operand(add->src0, src0, sizeof(src0));
+   pvr_pds_disassemble_operand(add->src1, src1, sizeof(src1));
+   pvr_pds_disassemble_operand(add->dst, dst, sizeof(dst));
+
+   snprintf(instr_str,
+            instr_len,
+            "%-16s%s%s = %s %s %s %s",
+            "ADD64",
+            add->cc ? "? " : "",
+            dst,
+            src0,
+            add->sna ? "-" : "+",
+            src1,
+            add->alum ? "[signed]" : "");
+}
+
+static void pvr_pds_disassemble_instruction_add32(struct pvr_add *add,
+                                                  char *instr_str,
+                                                  size_t instr_len)
+{
+   char dst[32];
+   char src0[32];
+   char src1[32];
+
+   pvr_pds_disassemble_operand(add->src0, src0, sizeof(src0));
+   pvr_pds_disassemble_operand(add->src1, src1, sizeof(src1));
+   pvr_pds_disassemble_operand(add->dst, dst, sizeof(dst));
+
+   snprintf(instr_str,
+            instr_len,
+            "%-16s%s%s = %s %s %s %s",
+            "ADD32",
+            add->cc ? "? " : "",
+            dst,
+            src0,
+            add->sna ? "-" : "+",
+            src1,
+            add->alum ? "[signed]" : "");
+}
+
+static void
+pvr_pds_disassemble_instruction_sftlp32(struct pvr_sftlp *instruction,
+                                        char *instr_str,
+                                        size_t instr_len)
+{
+   char dst[32];
+   char src0[32];
+   char src1[32];
+   char src2[32];
+
+   pvr_pds_disassemble_operand(instruction->src0, src0, sizeof(src0));
+   pvr_pds_disassemble_operand(instruction->src1, src1, sizeof(src1));
+   pvr_pds_disassemble_operand(instruction->dst, dst, sizeof(dst));
+
+   if (instruction->IM)
+      snprintf(src2, sizeof(src2), "%u", (uint32_t)instruction->src2->literal);
+   else
+      pvr_pds_disassemble_operand(instruction->src2, src2, sizeof(src2));
+
+   if (instruction->lop == LOP_NONE) {
+      snprintf(instr_str,
+               instr_len,
+               "%-16s%s%s = %s %s %s",
+               "SFTLP32",
+               instruction->cc ? "? " : "",
+               dst,
+               src0,
+               instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<",
+               src2);
+   } else if (instruction->lop == LOP_NOT) {
+      snprintf(instr_str,
+               instr_len,
+               "%-16s%s%s = (~%s) %s %s",
+               "SFTLP32",
+               instruction->cc ? "? " : "",
+               dst,
+               src0,
+               instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<",
+               src2);
+   } else {
+      snprintf(instr_str,
+               instr_len,
+               "%-16s%s%s = (%s %s %s) %s %s",
+               "SFTLP32",
+               instruction->cc ? "? " : "",
+               dst,
+               src0,
+               LOP[instruction->lop],
+               src1,
+               instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<",
+               src2);
+   }
+}
+
+static void pvr_pds_disassemble_instruction_stm(struct pvr_stm *instruction,
+                                                char *instr_str,
+                                                size_t instr_len)
+{
+   char src0[32];
+   char src1[32];
+   char src2[32];
+   char src3[32];
+
+   char stm_pred[64];
+
+   pvr_pds_disassemble_operand(instruction->src0, src0, sizeof(src0));
+   pvr_pds_disassemble_operand(instruction->src1, src1, sizeof(src1));
+   pvr_pds_disassemble_operand(instruction->src2, src2, sizeof(src2));
+   pvr_pds_disassemble_operand(instruction->src3, src3, sizeof(src3));
+
+   if (instruction->ccs_global)
+      snprintf(stm_pred, sizeof(stm_pred), "overflow_any");
+   else if (instruction->ccs_so)
+      snprintf(stm_pred, sizeof(stm_pred), "overflow_current");
+   else
+      stm_pred[0] = 0;
+
+   snprintf(instr_str,
+            instr_len,
+            "%-16s%s%s%s stm%u = %s, %s, %s, %s",
+            "STM",
+            instruction->cc ? "? " : "",
+            stm_pred,
+            instruction->tst ? " (TST only)" : "",
+            instruction->stream_out,
+            src0,
+            src1,
+            src2,
+            src3);
+}
+
+static void pds_disassemble_instruction_stmc(struct pvr_stmc *instruction,
+                                             char *instr_str,
+                                             size_t instr_len)
+{
+   char src0[32];
+
+   pvr_pds_disassemble_operand(instruction->src0, src0, sizeof(src0));
+
+   snprintf(instr_str,
+            instr_len,
+            "%-16s%s %s",
+            "STMC",
+            instruction->cc ? "? " : "",
+            src0);
+}
+
+static void
+pvr_pds_disassemble_instruction_sftlp64(struct pvr_sftlp *instruction,
+                                        char *instr_str,
+                                        size_t instr_len)
+{
+   char dst[32];
+   char src0[32];
+   char src1[32];
+   char src2[32];
+
+   pvr_pds_disassemble_operand(instruction->src0, src0, sizeof(src0));
+   pvr_pds_disassemble_operand(instruction->src1, src1, sizeof(src1));
+   pvr_pds_disassemble_operand(instruction->dst, dst, sizeof(dst));
+
+   if (instruction->IM)
+      snprintf(src2, sizeof(src2), "%u", (uint32_t)instruction->src2->literal);
+   else
+      pvr_pds_disassemble_operand(instruction->src2, src2, sizeof(src2));
+
+   if (instruction->lop == LOP_NONE) {
+      snprintf(instr_str,
+               instr_len,
+               "%-16s%s%s = %s %s %s",
+               "SFTLP64",
+               instruction->cc ? "? " : "",
+               dst,
+               src0,
+               instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<",
+               src2);
+   } else if (instruction->lop == LOP_NOT) {
+      snprintf(instr_str,
+               instr_len,
+               "%-16s%s%s = (~%s) %s %s",
+               "SFTLP64",
+               instruction->cc ? "? " : "",
+               dst,
+               src0,
+               instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<",
+               src2);
+   } else {
+      snprintf(instr_str,
+               instr_len,
+               "%-16s%s%s = (%s %s %s) %s %s",
+               "SFTLP64",
+               instruction->cc ? "? " : "",
+               dst,
+               src0,
+               LOP[instruction->lop],
+               src1,
+               instruction->IM ? instruction->src2->negate ? ">>" : "<<" : "<<",
+               src2);
+   }
+}
+
+static void pvr_pds_disassemble_instruction_cmp(struct pvr_cmp *cmp,
+                                                char *instr_str,
+                                                size_t instr_len)
+{
+   char src0[32];
+   char src1[32];
+   static const char *const COP[] = { "=", ">", "<", "!=" };
+
+   pvr_pds_disassemble_operand(cmp->src0, src0, sizeof(src0));
+
+   if (cmp->IM) {
+      snprintf(src1,
+               sizeof(src1),
+               "%#04llx",
+               (unsigned long long)cmp->src1->literal);
+   } else {
+      pvr_pds_disassemble_operand(cmp->src1, src1, sizeof(src1));
+   }
+
+   snprintf(instr_str,
+            instr_len,
+            "%-16s%sP0 = (%s %s %s)",
+            "CMP",
+            cmp->cc ? "? " : "",
+            src0,
+            COP[cmp->cop],
+            src1);
+}
+
+static void pvr_pds_disassemble_instruction_ldst(struct pvr_ldst *ins,
+                                                 char *instr_str,
+                                                 size_t instr_len)
+{
+   char src0[PVR_PDS_MAX_INST_STR_LEN];
+
+   pvr_pds_disassemble_operand(ins->src0, src0, sizeof(src0));
+
+   if (ins->st) {
+      snprintf(instr_str,
+               instr_len,
+               "%-16s%s%s: mem(%s) <= src(%s)",
+               "ST",
+               ins->cc ? "? " : "",
+               src0,
+               "?",
+               "?");
+   } else {
+      snprintf(instr_str,
+               instr_len,
+               "%-16s%s%s: dst(%s) <= mem(%s)",
+               "ld",
+               ins->cc ? "? " : "",
+               src0,
+               "?",
+               "?");
+   }
+}
+
+static void pvr_pds_disassemble_simple(struct pvr_simple *simple,
+                                       const char *type,
+                                       char *instr_str,
+                                       size_t instr_len)
+{
+   snprintf(instr_str, instr_len, "%-16s%s", type, simple->cc ? "? " : "");
+}
+
+static void pvr_pds_disassemble_instruction_limm(struct pvr_limm *limm,
+                                                 char *instr_str,
+                                                 size_t instr_len)
+{
+   int32_t imm = (uint32_t)limm->src0->literal;
+   char dst[PVR_PDS_MAX_INST_STR_LEN];
+
+   pvr_pds_disassemble_operand(limm->dst, dst, sizeof(dst));
+
+   if (limm->GR) {
+      char *pchGReg;
+
+      switch (imm) {
+      case 0:
+         pchGReg = "cluster";
+         break;
+      case 1:
+         pchGReg = "instance";
+         break;
+      default:
+         pchGReg = "unknown";
+      }
+
+      snprintf(instr_str,
+               instr_len,
+               "%-16s%s%s = G%d (%s)",
+               "LIMM",
+               limm->cc ? "? " : "",
+               dst,
+               imm,
+               pchGReg);
+   } else {
+      snprintf(instr_str,
+               instr_len,
+               "%-16s%s%s = %#04x",
+               "LIMM",
+               limm->cc ? "? " : "",
+               dst,
+               imm);
+   }
+}
+
+static void pvr_pds_disassemble_instruction_ddmad(struct pvr_ddmad *ddmad,
+                                                  char *instr_str,
+                                                  size_t instr_len)
+{
+   char src0[PVR_PDS_MAX_INST_STR_LEN];
+   char src1[PVR_PDS_MAX_INST_STR_LEN];
+   char src2[PVR_PDS_MAX_INST_STR_LEN];
+   char src3[PVR_PDS_MAX_INST_STR_LEN];
+
+   pvr_pds_disassemble_operand(ddmad->src0, src0, sizeof(src0));
+   pvr_pds_disassemble_operand(ddmad->src1, src1, sizeof(src1));
+   pvr_pds_disassemble_operand(ddmad->src2, src2, sizeof(src2));
+   pvr_pds_disassemble_operand(ddmad->src3, src3, sizeof(src3));
+
+   snprintf(instr_str,
+            instr_len,
+            "%-16s%sdoutd = (%s * %s) + %s, %s%s",
+            "DDMAD",
+            ddmad->cc ? "? " : "",
+            src0,
+            src1,
+            src2,
+            src3,
+            ddmad->END ? "; HALT" : "");
+}
+
+static void pvr_pds_disassemble_predicate(uint32_t predicate,
+                                          char *buffer,
+                                          size_t buffer_length)
+{
+   switch (predicate) {
+   case PVR_ROGUE_PDSINST_PREDICATE_P0:
+      snprintf(buffer, buffer_length, "%s", "p0");
+      break;
+   case PVR_ROGUE_PDSINST_PREDICATE_IF0:
+      snprintf(buffer, buffer_length, "%s", "if0");
+      break;
+   case PVR_ROGUE_PDSINST_PREDICATE_IF1:
+      snprintf(buffer, buffer_length, "%s", "if1");
+      break;
+   case PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_0:
+      snprintf(buffer, buffer_length, "%s", "so_overflow_0");
+      break;
+   case PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_1:
+      snprintf(buffer, buffer_length, "%s", "so_overflow_1");
+      break;
+   case PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_2:
+      snprintf(buffer, buffer_length, "%s", "so_overflow_2");
+      break;
+   case PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_3:
+      snprintf(buffer, buffer_length, "%s", "so_overflow_3");
+      break;
+   case PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_GLOBAL:
+      snprintf(buffer, buffer_length, "%s", "so_overflow_any");
+      break;
+   case PVR_ROGUE_PDSINST_PREDICATE_KEEP:
+      snprintf(buffer, buffer_length, "%s", "keep");
+      break;
+   case PVR_ROGUE_PDSINST_PREDICATE_OOB:
+      snprintf(buffer, buffer_length, "%s", "oob");
+      break;
+   default:
+      snprintf(buffer, buffer_length, "%s", "<ERROR>");
+      break;
+   }
+}
+
+static void pvr_pds_disassemble_instruction_bra(struct pvr_bra *bra,
+                                                char *instr_str,
+                                                size_t instr_len)
+{
+   char setc_pred[32];
+   char srcc_pred[32];
+
+   pvr_pds_disassemble_predicate(bra->srcc->predicate,
+                                 srcc_pred,
+                                 sizeof(srcc_pred));
+   pvr_pds_disassemble_predicate(bra->setc->predicate,
+                                 setc_pred,
+                                 sizeof(setc_pred));
+
+   if (bra->setc->predicate != PVR_ROGUE_PDSINST_PREDICATE_KEEP) {
+      snprintf(instr_str,
+               instr_len,
+               "%-16sif %s%s %d ( setc = %s )",
+               "BRA",
+               bra->srcc->negate ? "! " : "",
+               srcc_pred,
+               bra->address,
+               setc_pred);
+   } else {
+      snprintf(instr_str,
+               instr_len,
+               "%-16sif %s%s %d",
+               "BRA",
+               bra->srcc->negate ? "! " : "",
+               srcc_pred,
+               bra->address);
+   }
+}
+
+static void pvr_pds_disassemble_instruction_mad(struct pvr_mad *mad,
+                                                char *instr_str,
+                                                size_t instr_len)
+{
+   char src0[PVR_PDS_MAX_INST_STR_LEN];
+   char src1[PVR_PDS_MAX_INST_STR_LEN];
+   char src2[PVR_PDS_MAX_INST_STR_LEN];
+   char dst[PVR_PDS_MAX_INST_STR_LEN];
+
+   pvr_pds_disassemble_operand(mad->src0, src0, sizeof(src0));
+   pvr_pds_disassemble_operand(mad->src1, src1, sizeof(src1));
+   pvr_pds_disassemble_operand(mad->src2, src2, sizeof(src2));
+   pvr_pds_disassemble_operand(mad->dst, dst, sizeof(dst));
+
+   snprintf(instr_str,
+            instr_len,
+            "%-16s%s%s = (%s * %s) %s %s%s",
+            "MAD",
+            mad->cc ? "? " : "",
+            dst,
+            src0,
+            src1,
+            mad->sna ? "-" : "+",
+            src2,
+            mad->alum ? " [signed]" : "");
+}
+
+static void pvr_pds_disassemble_instruction_dout(struct pvr_dout *dout,
+                                                 char *instr_str,
+                                                 size_t instr_len)
+{
+   char src0[PVR_PDS_MAX_INST_STR_LEN];
+   char src1[PVR_PDS_MAX_INST_STR_LEN];
+
+#define X(dout_dst, str) #str,
+   static const char *const dst[] = { PVR_PDS_DOUT_DSTS };
+#undef X
+
+   pvr_pds_disassemble_operand(dout->src0, src0, sizeof(src0));
+   pvr_pds_disassemble_operand(dout->src1, src1, sizeof(src1));
+
+   {
+      snprintf(instr_str,
+               instr_len,
+               "%-16s%s%s = %s, %s%s",
+               "DOUT",
+               dout->cc ? "? " : "",
+               dst[dout->dst],
+               src0,
+               src1,
+               dout->END ? "; HALT" : "");
+   }
+}
+
+void pvr_pds_disassemble_instruction(char *instr_str,
+                                     size_t instr_len,
+                                     struct pvr_instruction *instruction)
+{
+   if (!instruction) {
+      snprintf(instr_str,
+               instr_len,
+               "Instruction was not disassembled properly\n");
+      return;
+   }
+
+   switch (instruction->type) {
+   case INS_LIMM:
+      pvr_pds_disassemble_instruction_limm((struct pvr_limm *)instruction,
+                                           instr_str,
+                                           instr_len);
+      break;
+   case INS_ADD64:
+      pvr_pds_disassemble_instruction_add64((struct pvr_add *)instruction,
+                                            instr_str,
+                                            instr_len);
+      break;
+   case INS_ADD32:
+      pvr_pds_disassemble_instruction_add32((struct pvr_add *)instruction,
+                                            instr_str,
+                                            instr_len);
+      break;
+   case INS_CMP:
+      pvr_pds_disassemble_instruction_cmp((struct pvr_cmp *)instruction,
+                                          instr_str,
+                                          instr_len);
+      break;
+   case INS_MAD:
+      pvr_pds_disassemble_instruction_mad((struct pvr_mad *)instruction,
+                                          instr_str,
+                                          instr_len);
+      break;
+   case INS_BRA:
+      pvr_pds_disassemble_instruction_bra((struct pvr_bra *)instruction,
+                                          instr_str,
+                                          instr_len);
+      break;
+   case INS_DDMAD:
+      pvr_pds_disassemble_instruction_ddmad((struct pvr_ddmad *)instruction,
+                                            instr_str,
+                                            instr_len);
+      break;
+   case INS_DOUT:
+      pvr_pds_disassemble_instruction_dout((struct pvr_dout *)instruction,
+                                           instr_str,
+                                           instr_len);
+      break;
+   case INS_LD:
+   case INS_ST:
+      pvr_pds_disassemble_instruction_ldst((struct pvr_ldst *)instruction,
+                                           instr_str,
+                                           instr_len);
+      break;
+   case INS_WDF:
+      pvr_pds_disassemble_simple((struct pvr_simple *)instruction,
+                                 "WDF",
+                                 instr_str,
+                                 instr_len);
+      break;
+   case INS_LOCK:
+      pvr_pds_disassemble_simple((struct pvr_simple *)instruction,
+                                 "LOCK",
+                                 instr_str,
+                                 instr_len);
+      break;
+   case INS_RELEASE:
+      pvr_pds_disassemble_simple((struct pvr_simple *)instruction,
+                                 "RELEASE",
+                                 instr_str,
+                                 instr_len);
+      break;
+   case INS_HALT:
+      pvr_pds_disassemble_simple((struct pvr_simple *)instruction,
+                                 "HALT",
+                                 instr_str,
+                                 instr_len);
+      break;
+   case INS_NOP:
+      pvr_pds_disassemble_simple((struct pvr_simple *)instruction,
+                                 "NOP",
+                                 instr_str,
+                                 instr_len);
+      break;
+   case INS_SFTLP32:
+      pvr_pds_disassemble_instruction_sftlp32((struct pvr_sftlp *)instruction,
+                                              instr_str,
+                                              instr_len);
+      break;
+   case INS_SFTLP64:
+      pvr_pds_disassemble_instruction_sftlp64((struct pvr_sftlp *)instruction,
+                                              instr_str,
+                                              instr_len);
+      break;
+   case INS_STM:
+      pvr_pds_disassemble_instruction_stm((struct pvr_stm *)instruction,
+                                          instr_str,
+                                          instr_len);
+      break;
+   case INS_STMC:
+      pds_disassemble_instruction_stmc((struct pvr_stmc *)instruction,
+                                       instr_str,
+                                       instr_len);
+      break;
+   default:
+      snprintf(instr_str, instr_len, "Printing not implemented\n");
+      break;
+   }
+}
+
+#if defined(DUMP_PDS)
+void pvr_pds_print_instruction(uint32_t instr)
+{
+   char instruction_str[1024];
+   struct pvr_instruction *decoded =
+      pvr_pds_disassemble_instruction2(0, 0, instr);
+
+   if (!decoded) {
+      mesa_logd("%X\n", instr);
+   } else {
+      pvr_pds_disassemble_instruction(instruction_str,
+                                      sizeof(instruction_str),
+                                      decoded);
+      mesa_logd("\t0x%08x, /* %s */\n", instr, instruction_str);
+   }
+}
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays0.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays0.h
new file mode 100644 (file)
index 0000000..ac1d5e3
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS0_H
+#define PVR_DRAW_INDIRECTARRAYS0_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays0_code[15] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd1940000, /* LIMM            temp[5].32 = 0000 */
+   0x500c0804, /* SFTLP32         temp[4].32 = temp[1].32 << 0 */
+   0xb1880000, /* CMP             P0 = (temp[4].64 = 0000) */
+   0xd9800000, /* LIMM            ? temp[0].32 = 0000 */
+   0xd9840000, /* LIMM            ? temp[1].32 = 0000 */
+   0x04081023, /* MAD             temp[6].64 = (temp[1].32 * const[2].32) +
+                                               const[4].64 */
+   0x50343001, /* SFTLP32         temp[1].32 = temp[6].32 << 0 */
+   0x912040c1, /* ADD32           temp[1].32 = temp[1].32 - const[3].32  */
+   0xd0800003, /* ST              const[6].64: mem(?) <= src(?) */
+   0xd0000004, /* LD              const[8].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf40a4003, /* DOUT            doutv = temp[0].64, const[10].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_arrays0_program = {
+   pvr_draw_indirect_arrays0_code, /* code segment */
+   0, /* constant mappings, zeroed since we use the macros below */
+   4, /* number of constant mappings */
+
+   12, /* size of data segment, in dwords, aligned to 4 */
+   16, /* size of code segment, in dwords, aligned to 4 */
+   12, /* size of temp segment, in dwords, aligned to 4 */
+   11, /* size of data segment, in dwords */
+   15, /* size of code segment, in dwords */
+   10, /* size of temp segment, in dwords */
+   NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_arrays0_di_data(buffer, addr, device) \
+   do {                                                               \
+      uint64_t data = ((addr) | (0x40000000000ULL) |                  \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                        \
+   } while (0)
+#define pvr_write_draw_indirect_arrays0_write_vdm(buffer, addr) \
+   do {                                                         \
+      uint64_t data = ((addr) | (0x30000000000ULL));            \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);      \
+      memcpy(buffer + 6, &data, sizeof(data));                  \
+   } while (0)
+#define pvr_write_draw_indirect_arrays0_flush_vdm(buffer, addr) \
+   do {                                                         \
+      uint64_t data = ((addr) | (0x1940000000000ULL));          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);      \
+      memcpy(buffer + 8, &data, sizeof(data));                  \
+   } while (0)
+#define pvr_write_draw_indirect_arrays0_num_views(buffer, value) \
+   do {                                                          \
+      uint32_t data = value;                                     \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);       \
+      memcpy(buffer + 2, &data, sizeof(data));                   \
+   } while (0)
+#define pvr_write_draw_indirect_arrays0_immediates(buffer)    \
+   do {                                                       \
+      {                                                       \
+         uint64_t data = 0x0;                                 \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+         memcpy(buffer + 4, &data, sizeof(data));             \
+      }                                                       \
+      {                                                       \
+         uint32_t data = 0x1;                                 \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+         memcpy(buffer + 3, &data, sizeof(data));             \
+      }                                                       \
+      {                                                       \
+         uint32_t data = 0x0;                                 \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+         memcpy(buffer + 10, &data, sizeof(data));            \
+      }                                                       \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays1.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays1.h
new file mode 100644 (file)
index 0000000..19678dc
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS1_H
+#define PVR_DRAW_INDIRECTARRAYS1_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays1_code[15] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd1940000, /* LIMM            temp[5].32 = 0000 */
+   0x50141004, /* SFTLP32         temp[4].32 = temp[2].32 << 0 */
+   0xb1880000, /* CMP             P0 = (temp[4].64 = 0000) */
+   0xd9840000, /* LIMM            ? temp[1].32 = 0000 */
+   0xd9880000, /* LIMM            ? temp[2].32 = 0000 */
+   0x04101023, /* MAD             temp[6].64 = (temp[2].32 * const[2].32) +
+                                               const[4].64 */
+   0x50343002, /* SFTLP32         temp[2].32 = temp[6].32 << 0 */
+   0x912080c2, /* ADD32           temp[2].32 = temp[2].32 - const[3].32  */
+   0xd0800003, /* ST              const[6].64: mem(?) <= src(?) */
+   0xd0000004, /* LD              const[8].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf40a4003, /* DOUT            doutv = temp[0].64, const[10].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_arrays1_program = {
+   pvr_draw_indirect_arrays1_code, /* code segment */
+   0, /* constant mappings, zeroed since we use the macros below */
+   4, /* number of constant mappings */
+
+   12, /* size of data segment, in dwords, aligned to 4 */
+   16, /* size of code segment, in dwords, aligned to 4 */
+   12, /* size of temp segment, in dwords, aligned to 4 */
+   11, /* size of data segment, in dwords */
+   15, /* size of code segment, in dwords */
+   10, /* size of temp segment, in dwords */
+   NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_arrays1_di_data(buffer, addr, device) \
+   do {                                                               \
+      uint64_t data = ((addr) | (0x40000000000ULL) |                  \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                        \
+   } while (0)
+#define pvr_write_draw_indirect_arrays1_write_vdm(buffer, addr) \
+   do {                                                         \
+      uint64_t data = ((addr) | (0x430000000000ULL));           \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);      \
+      memcpy(buffer + 6, &data, sizeof(data));                  \
+   } while (0)
+#define pvr_write_draw_indirect_arrays1_flush_vdm(buffer, addr) \
+   do {                                                         \
+      uint64_t data = ((addr) | (0x1940000000000ULL));          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);      \
+      memcpy(buffer + 8, &data, sizeof(data));                  \
+   } while (0)
+#define pvr_write_draw_indirect_arrays1_num_views(buffer, value) \
+   do {                                                          \
+      uint32_t data = value;                                     \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);       \
+      memcpy(buffer + 2, &data, sizeof(data));                   \
+   } while (0)
+#define pvr_write_draw_indirect_arrays1_immediates(buffer)    \
+   do {                                                       \
+      {                                                       \
+         uint64_t data = 0x0;                                 \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+         memcpy(buffer + 4, &data, sizeof(data));             \
+      }                                                       \
+      {                                                       \
+         uint32_t data = 0x1;                                 \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+         memcpy(buffer + 3, &data, sizeof(data));             \
+      }                                                       \
+      {                                                       \
+         uint32_t data = 0x0;                                 \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+         memcpy(buffer + 10, &data, sizeof(data));            \
+      }                                                       \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays2.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays2.h
new file mode 100644 (file)
index 0000000..ae75d9a
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS2_H
+#define PVR_DRAW_INDIRECTARRAYS2_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays2_code[15] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd1840000, /* LIMM            temp[1].32 = 0000 */
+   0x501c1800, /* SFTLP32         temp[0].32 = temp[3].32 << 0 */
+   0xb1800000, /* CMP             P0 = (temp[0].64 = 0000) */
+   0xd9880000, /* LIMM            ? temp[2].32 = 0000 */
+   0xd98c0000, /* LIMM            ? temp[3].32 = 0000 */
+   0x04181023, /* MAD             temp[6].64 = (temp[3].32 * const[2].32) +
+                                               const[4].64 */
+   0x50343003, /* SFTLP32         temp[3].32 = temp[6].32 << 0 */
+   0x9120c0c3, /* ADD32           temp[3].32 = temp[3].32 - const[3].32  */
+   0xd0800003, /* ST              const[6].64: mem(?) <= src(?) */
+   0xd0000004, /* LD              const[8].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf40a4003, /* DOUT            doutv = temp[0].64, const[10].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_arrays2_program = {
+   pvr_draw_indirect_arrays2_code, /* code segment */
+   0, /* constant mappings, zeroed since we use the macros below */
+   4, /* number of constant mappings */
+
+   12, /* size of data segment, in dwords, aligned to 4 */
+   16, /* size of code segment, in dwords, aligned to 4 */
+   12, /* size of temp segment, in dwords, aligned to 4 */
+   11, /* size of data segment, in dwords */
+   15, /* size of code segment, in dwords */
+   10, /* size of temp segment, in dwords */
+   NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_arrays2_di_data(buffer, addr, device) \
+   do {                                                               \
+      uint64_t data = ((addr) | (0x60000000000ULL) |                  \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                        \
+   } while (0)
+#define pvr_write_draw_indirect_arrays2_write_vdm(buffer, addr) \
+   do {                                                         \
+      uint64_t data = ((addr) | (0x830000000000ULL));           \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);      \
+      memcpy(buffer + 6, &data, sizeof(data));                  \
+   } while (0)
+#define pvr_write_draw_indirect_arrays2_flush_vdm(buffer, addr) \
+   do {                                                         \
+      uint64_t data = ((addr) | (0x1940000000000ULL));          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);      \
+      memcpy(buffer + 8, &data, sizeof(data));                  \
+   } while (0)
+#define pvr_write_draw_indirect_arrays2_num_views(buffer, value) \
+   do {                                                          \
+      uint32_t data = value;                                     \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);       \
+      memcpy(buffer + 2, &data, sizeof(data));                   \
+   } while (0)
+#define pvr_write_draw_indirect_arrays2_immediates(buffer)    \
+   do {                                                       \
+      {                                                       \
+         uint64_t data = 0x0;                                 \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+         memcpy(buffer + 4, &data, sizeof(data));             \
+      }                                                       \
+      {                                                       \
+         uint32_t data = 0x1;                                 \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+         memcpy(buffer + 3, &data, sizeof(data));             \
+      }                                                       \
+      {                                                       \
+         uint32_t data = 0x0;                                 \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+         memcpy(buffer + 10, &data, sizeof(data));            \
+      }                                                       \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays3.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays3.h
new file mode 100644 (file)
index 0000000..c3df11e
--- /dev/null
@@ -0,0 +1,105 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS3_H
+#define PVR_DRAW_INDIRECTARRAYS3_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays3_code[15] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd1840000, /* LIMM            temp[1].32 = 0000 */
+   0x50242000, /* SFTLP32         temp[0].32 = temp[4].32 << 0 */
+   0xb1800000, /* CMP             P0 = (temp[0].64 = 0000) */
+   0xd98c0000, /* LIMM            ? temp[3].32 = 0000 */
+   0xd9900000, /* LIMM            ? temp[4].32 = 0000 */
+   0x04201023, /* MAD             temp[6].64 = (temp[4].32 * const[2].32) +
+                                               const[4].64 */
+   0x50343004, /* SFTLP32         temp[4].32 = temp[6].32 << 0 */
+   0x912100c4, /* ADD32           temp[4].32 = temp[4].32 - const[3].32  */
+   0xd0800003, /* ST              const[6].64: mem(?) <= src(?) */
+   0xd0000004, /* LD              const[8].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf40a4003, /* DOUT            doutv = temp[0].64, const[10].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_arrays3_program = {
+   pvr_draw_indirect_arrays3_code, /* code segment */
+   0, /* constant mappings, zeroed since we use the macros below */
+   4, /* number of constant mappings */
+
+   12, /* size of data segment, in dwords, aligned to 4 */
+   16, /* size of code segment, in dwords, aligned to 4 */
+   12, /* size of temp segment, in dwords, aligned to 4 */
+   11, /* size of data segment, in dwords */
+   15, /* size of code segment, in dwords */
+   10, /* size of temp segment, in dwords */
+   NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_arrays3_di_data(buffer, addr, device) \
+   do {                                                               \
+      uint64_t data = ((addr) | (0x60000000000ULL) |                  \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                        \
+   } while (0)
+#define pvr_write_draw_indirect_arrays3_write_vdm(buffer, addr) \
+   do {                                                         \
+      uint64_t data = ((addr) | (0xc30000000000ULL));           \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);      \
+      memcpy(buffer + 6, &data, sizeof(data));                  \
+   } while (0)
+#define pvr_write_draw_indirect_arrays3_flush_vdm(buffer, addr) \
+   do {                                                         \
+      uint64_t data = ((addr) | (0x1940000000000ULL));          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);      \
+      memcpy(buffer + 8, &data, sizeof(data));                  \
+   } while (0)
+#define pvr_write_draw_indirect_arrays3_num_views(buffer, value) \
+   do {                                                          \
+      uint32_t data = value;                                     \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);       \
+      memcpy(buffer + 2, &data, sizeof(data));                   \
+   } while (0)
+#define pvr_write_draw_indirect_arrays3_immediates(buffer)    \
+   do {                                                       \
+      {                                                       \
+         uint64_t data = 0x0;                                 \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+         memcpy(buffer + 4, &data, sizeof(data));             \
+      }                                                       \
+      {                                                       \
+         uint32_t data = 0x1;                                 \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+         memcpy(buffer + 3, &data, sizeof(data));             \
+      }                                                       \
+      {                                                       \
+         uint32_t data = 0x0;                                 \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0); \
+         memcpy(buffer + 10, &data, sizeof(data));            \
+      }                                                       \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance0.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance0.h
new file mode 100644 (file)
index 0000000..6513876
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE0_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE0_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance0_code[18] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd1940000, /* LIMM            temp[5].32 = 0000 */
+   0x500c0804, /* SFTLP32         temp[4].32 = temp[1].32 << 0 */
+   0xb1880000, /* CMP             P0 = (temp[4].64 = 0000) */
+   0xd9800000, /* LIMM            ? temp[0].32 = 0000 */
+   0xd9840000, /* LIMM            ? temp[1].32 = 0000 */
+   0x04081023, /* MAD             temp[6].64 = (temp[1].32 * const[2].32) +
+                                               const[4].64 */
+   0x50343001, /* SFTLP32         temp[1].32 = temp[6].32 << 0 */
+   0x912040c1, /* ADD32           temp[1].32 = temp[1].32 - const[3].32  */
+   0x9001a0a0, /* ADD32           ptemp[0].32 = const[6].32 + temp[2].32  */
+   0xd0800004, /* ST              const[8].64: mem(?) <= src(?) */
+   0x9001a0e1, /* ADD32           ptemp[1].32 = const[6].32 + temp[3].32  */
+   0x9130f0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 - ptemp[3].32  */
+   0xd0000005, /* LD              const[10].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4064003, /* DOUT            doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_arrays_base_instance0_program = {
+      pvr_draw_indirect_arrays_base_instance0_code, /* code segment */
+      0, /* constant mappings, zeroed since we use the macros below */
+      4, /* number of constant mappings */
+
+      12, /* size of data segment, in dwords, aligned to 4 */
+      20, /* size of code segment, in dwords, aligned to 4 */
+      12, /* size of temp segment, in dwords, aligned to 4 */
+      12, /* size of data segment, in dwords */
+      18, /* size of code segment, in dwords */
+      10, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_arrays_base_instance0_di_data(buffer, \
+                                                              addr,   \
+                                                              device) \
+   do {                                                               \
+      uint64_t data = ((addr) | (0x40000000000ULL) |                  \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                        \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance0_write_vdm(buffer, addr) \
+   do {                                                                       \
+      uint64_t data = ((addr) | (0x30000000000ULL));                          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                    \
+      memcpy(buffer + 8, &data, sizeof(data));                                \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance0_flush_vdm(buffer, addr) \
+   do {                                                                       \
+      uint64_t data = ((addr) | (0x1940000000000ULL));                        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                    \
+      memcpy(buffer + 10, &data, sizeof(data));                               \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance0_num_views(buffer, value) \
+   do {                                                                        \
+      uint32_t data = value;                                                   \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 2, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance0_immediates(buffer) \
+   do {                                                                  \
+      {                                                                  \
+         uint64_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 4, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x1;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 3, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 6, &data, sizeof(data));                        \
+      }                                                                  \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance1.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance1.h
new file mode 100644 (file)
index 0000000..aa5f675
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE1_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE1_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance1_code[18] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd19c0000, /* LIMM            temp[7].32 = 0000 */
+   0x50141006, /* SFTLP32         temp[6].32 = temp[2].32 << 0 */
+   0xb18c0000, /* CMP             P0 = (temp[6].64 = 0000) */
+   0xd9840000, /* LIMM            ? temp[1].32 = 0000 */
+   0xd9880000, /* LIMM            ? temp[2].32 = 0000 */
+   0x04101024, /* MAD             temp[8].64 = (temp[2].32 * const[2].32) +
+                                               const[4].64 */
+   0x50444002, /* SFTLP32         temp[2].32 = temp[8].32 << 0 */
+   0x912080c2, /* ADD32           temp[2].32 = temp[2].32 - const[3].32  */
+   0x9001a0e0, /* ADD32           ptemp[0].32 = const[6].32 + temp[3].32  */
+   0xd0800004, /* ST              const[8].64: mem(?) <= src(?) */
+   0x9001a121, /* ADD32           ptemp[1].32 = const[6].32 + temp[4].32  */
+   0x9130f0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 - ptemp[3].32  */
+   0xd0000005, /* LD              const[10].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4064003, /* DOUT            doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_arrays_base_instance1_program = {
+      pvr_draw_indirect_arrays_base_instance1_code, /* code segment */
+      0, /* constant mappings, zeroed since we use the macros below */
+      4, /* number of constant mappings */
+
+      12, /* size of data segment, in dwords, aligned to 4 */
+      20, /* size of code segment, in dwords, aligned to 4 */
+      12, /* size of temp segment, in dwords, aligned to 4 */
+      12, /* size of data segment, in dwords */
+      18, /* size of code segment, in dwords */
+      12, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_arrays_base_instance1_di_data(buffer, \
+                                                              addr,   \
+                                                              device) \
+   do {                                                               \
+      uint64_t data = ((addr) | (0x60000000000ULL) |                  \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                        \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance1_write_vdm(buffer, addr) \
+   do {                                                                       \
+      uint64_t data = ((addr) | (0x430000000000ULL));                         \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                    \
+      memcpy(buffer + 8, &data, sizeof(data));                                \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance1_flush_vdm(buffer, addr) \
+   do {                                                                       \
+      uint64_t data = ((addr) | (0x2140000000000ULL));                        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                    \
+      memcpy(buffer + 10, &data, sizeof(data));                               \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance1_num_views(buffer, value) \
+   do {                                                                        \
+      uint32_t data = value;                                                   \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 2, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance1_immediates(buffer) \
+   do {                                                                  \
+      {                                                                  \
+         uint64_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 4, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x1;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 3, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 6, &data, sizeof(data));                        \
+      }                                                                  \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance2.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance2.h
new file mode 100644 (file)
index 0000000..588c0cb
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE2_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE2_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance2_code[18] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd1840000, /* LIMM            temp[1].32 = 0000 */
+   0x501c1800, /* SFTLP32         temp[0].32 = temp[3].32 << 0 */
+   0xb1800000, /* CMP             P0 = (temp[0].64 = 0000) */
+   0xd9880000, /* LIMM            ? temp[2].32 = 0000 */
+   0xd98c0000, /* LIMM            ? temp[3].32 = 0000 */
+   0x04181023, /* MAD             temp[6].64 = (temp[3].32 * const[2].32) +
+                                               const[4].64 */
+   0x50343003, /* SFTLP32         temp[3].32 = temp[6].32 << 0 */
+   0x9120c0c3, /* ADD32           temp[3].32 = temp[3].32 - const[3].32  */
+   0x9001a120, /* ADD32           ptemp[0].32 = const[6].32 + temp[4].32  */
+   0xd0800004, /* ST              const[8].64: mem(?) <= src(?) */
+   0x9001a161, /* ADD32           ptemp[1].32 = const[6].32 + temp[5].32  */
+   0x9130f0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 - ptemp[3].32  */
+   0xd0000005, /* LD              const[10].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4064003, /* DOUT            doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_arrays_base_instance2_program = {
+      pvr_draw_indirect_arrays_base_instance2_code, /* code segment */
+      0, /* constant mappings, zeroed since we use the macros below */
+      4, /* number of constant mappings */
+
+      12, /* size of data segment, in dwords, aligned to 4 */
+      20, /* size of code segment, in dwords, aligned to 4 */
+      12, /* size of temp segment, in dwords, aligned to 4 */
+      12, /* size of data segment, in dwords */
+      18, /* size of code segment, in dwords */
+      10, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_arrays_base_instance2_di_data(buffer, \
+                                                              addr,   \
+                                                              device) \
+   do {                                                               \
+      uint64_t data = ((addr) | (0x60000000000ULL) |                  \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                        \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance2_write_vdm(buffer, addr) \
+   do {                                                                       \
+      uint64_t data = ((addr) | (0x830000000000ULL));                         \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                    \
+      memcpy(buffer + 8, &data, sizeof(data));                                \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance2_flush_vdm(buffer, addr) \
+   do {                                                                       \
+      uint64_t data = ((addr) | (0x1940000000000ULL));                        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                    \
+      memcpy(buffer + 10, &data, sizeof(data));                               \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance2_num_views(buffer, value) \
+   do {                                                                        \
+      uint32_t data = value;                                                   \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 2, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance2_immediates(buffer) \
+   do {                                                                  \
+      {                                                                  \
+         uint64_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 4, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x1;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 3, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 6, &data, sizeof(data));                        \
+      }                                                                  \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance3.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance3.h
new file mode 100644 (file)
index 0000000..2f36afa
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE3_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE3_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance3_code[18] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd1840000, /* LIMM            temp[1].32 = 0000 */
+   0x50242000, /* SFTLP32         temp[0].32 = temp[4].32 << 0 */
+   0xb1800000, /* CMP             P0 = (temp[0].64 = 0000) */
+   0xd98c0000, /* LIMM            ? temp[3].32 = 0000 */
+   0xd9900000, /* LIMM            ? temp[4].32 = 0000 */
+   0x04201024, /* MAD             temp[8].64 = (temp[4].32 * const[2].32) +
+                                               const[4].64 */
+   0x50444004, /* SFTLP32         temp[4].32 = temp[8].32 << 0 */
+   0x912100c4, /* ADD32           temp[4].32 = temp[4].32 - const[3].32  */
+   0x9001a160, /* ADD32           ptemp[0].32 = const[6].32 + temp[5].32  */
+   0xd0800004, /* ST              const[8].64: mem(?) <= src(?) */
+   0x9001a1a1, /* ADD32           ptemp[1].32 = const[6].32 + temp[6].32  */
+   0x9130f0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 - ptemp[3].32  */
+   0xd0000005, /* LD              const[10].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4064003, /* DOUT            doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_arrays_base_instance3_program = {
+      pvr_draw_indirect_arrays_base_instance3_code, /* code segment */
+      0, /* constant mappings, zeroed since we use the macros below */
+      4, /* number of constant mappings */
+
+      12, /* size of data segment, in dwords, aligned to 4 */
+      20, /* size of code segment, in dwords, aligned to 4 */
+      12, /* size of temp segment, in dwords, aligned to 4 */
+      12, /* size of data segment, in dwords */
+      18, /* size of code segment, in dwords */
+      12, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_arrays_base_instance3_di_data(buffer, \
+                                                              addr,   \
+                                                              device) \
+   do {                                                               \
+      uint64_t data = ((addr) | (0x80000000000ULL) |                  \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                        \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance3_write_vdm(buffer, addr) \
+   do {                                                                       \
+      uint64_t data = ((addr) | (0xc30000000000ULL));                         \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                    \
+      memcpy(buffer + 8, &data, sizeof(data));                                \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance3_flush_vdm(buffer, addr) \
+   do {                                                                       \
+      uint64_t data = ((addr) | (0x2140000000000ULL));                        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                    \
+      memcpy(buffer + 10, &data, sizeof(data));                               \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance3_num_views(buffer, value) \
+   do {                                                                        \
+      uint32_t data = value;                                                   \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 2, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance3_immediates(buffer) \
+   do {                                                                  \
+      {                                                                  \
+         uint64_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 4, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x1;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 3, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 6, &data, sizeof(data));                        \
+      }                                                                  \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid0.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid0.h
new file mode 100644 (file)
index 0000000..62c36fd
--- /dev/null
@@ -0,0 +1,115 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID0_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID0_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance_drawid0_code[18] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd1940000, /* LIMM            temp[5].32 = 0000 */
+   0x500c0804, /* SFTLP32         temp[4].32 = temp[1].32 << 0 */
+   0xb1880000, /* CMP             P0 = (temp[4].64 = 0000) */
+   0xd9800000, /* LIMM            ? temp[0].32 = 0000 */
+   0xd9840000, /* LIMM            ? temp[1].32 = 0000 */
+   0x04081023, /* MAD             temp[6].64 = (temp[1].32 * const[2].32) +
+                                               const[4].64 */
+   0x50343001, /* SFTLP32         temp[1].32 = temp[6].32 << 0 */
+   0x912040c1, /* ADD32           temp[1].32 = temp[1].32 - const[3].32  */
+   0x9001a0a0, /* ADD32           ptemp[0].32 = const[6].32 + temp[2].32  */
+   0xd0800004, /* ST              const[8].64: mem(?) <= src(?) */
+   0x9001a0e1, /* ADD32           ptemp[1].32 = const[6].32 + temp[3].32  */
+   0x9030c0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 + const[3].32  */
+   0xd0000005, /* LD              const[10].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4064003, /* DOUT            doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_arrays_base_instance_drawid0_program = {
+      pvr_draw_indirect_arrays_base_instance_drawid0_code, /* code segment */
+      0, /* constant mappings, zeroed since we use the macros below */
+      4, /* number of constant mappings */
+
+      12, /* size of data segment, in dwords, aligned to 4 */
+      20, /* size of code segment, in dwords, aligned to 4 */
+      12, /* size of temp segment, in dwords, aligned to 4 */
+      12, /* size of data segment, in dwords */
+      18, /* size of code segment, in dwords */
+      10, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_arrays_base_instance_drawid0_di_data(buffer, \
+                                                                     addr,   \
+                                                                     device) \
+   do {                                                                      \
+      uint64_t data = ((addr) | (0x40000000000ULL) |                         \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                   \
+      memcpy(buffer + 0, &data, sizeof(data));                               \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid0_write_vdm(buffer, \
+                                                                       addr)   \
+   do {                                                                        \
+      uint64_t data = ((addr) | (0x30000000000ULL));                           \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 8, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid0_flush_vdm(buffer, \
+                                                                       addr)   \
+   do {                                                                        \
+      uint64_t data = ((addr) | (0x1940000000000ULL));                         \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 10, &data, sizeof(data));                                \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid0_num_views(buffer, \
+                                                                       value)  \
+   do {                                                                        \
+      uint32_t data = value;                                                   \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 2, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid0_immediates( \
+   buffer)                                                               \
+   do {                                                                  \
+      {                                                                  \
+         uint64_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 4, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x1;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 3, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 6, &data, sizeof(data));                        \
+      }                                                                  \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid1.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid1.h
new file mode 100644 (file)
index 0000000..2207ac7
--- /dev/null
@@ -0,0 +1,115 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID1_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID1_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance_drawid1_code[18] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd19c0000, /* LIMM            temp[7].32 = 0000 */
+   0x50141006, /* SFTLP32         temp[6].32 = temp[2].32 << 0 */
+   0xb18c0000, /* CMP             P0 = (temp[6].64 = 0000) */
+   0xd9840000, /* LIMM            ? temp[1].32 = 0000 */
+   0xd9880000, /* LIMM            ? temp[2].32 = 0000 */
+   0x04101024, /* MAD             temp[8].64 = (temp[2].32 * const[2].32) +
+                                               const[4].64 */
+   0x50444002, /* SFTLP32         temp[2].32 = temp[8].32 << 0 */
+   0x912080c2, /* ADD32           temp[2].32 = temp[2].32 - const[3].32  */
+   0x9001a0e0, /* ADD32           ptemp[0].32 = const[6].32 + temp[3].32  */
+   0xd0800004, /* ST              const[8].64: mem(?) <= src(?) */
+   0x9001a121, /* ADD32           ptemp[1].32 = const[6].32 + temp[4].32  */
+   0x9030c0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 + const[3].32  */
+   0xd0000005, /* LD              const[10].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4064003, /* DOUT            doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_arrays_base_instance_drawid1_program = {
+      pvr_draw_indirect_arrays_base_instance_drawid1_code, /* code segment */
+      0, /* constant mappings, zeroed since we use the macros below */
+      4, /* number of constant mappings */
+
+      12, /* size of data segment, in dwords, aligned to 4 */
+      20, /* size of code segment, in dwords, aligned to 4 */
+      12, /* size of temp segment, in dwords, aligned to 4 */
+      12, /* size of data segment, in dwords */
+      18, /* size of code segment, in dwords */
+      12, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_arrays_base_instance_drawid1_di_data(buffer, \
+                                                                     addr,   \
+                                                                     device) \
+   do {                                                                      \
+      uint64_t data = ((addr) | (0x60000000000ULL) |                         \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                   \
+      memcpy(buffer + 0, &data, sizeof(data));                               \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid1_write_vdm(buffer, \
+                                                                       addr)   \
+   do {                                                                        \
+      uint64_t data = ((addr) | (0x430000000000ULL));                          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 8, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid1_flush_vdm(buffer, \
+                                                                       addr)   \
+   do {                                                                        \
+      uint64_t data = ((addr) | (0x2140000000000ULL));                         \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 10, &data, sizeof(data));                                \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid1_num_views(buffer, \
+                                                                       value)  \
+   do {                                                                        \
+      uint32_t data = value;                                                   \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 2, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid1_immediates( \
+   buffer)                                                               \
+   do {                                                                  \
+      {                                                                  \
+         uint64_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 4, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x1;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 3, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 6, &data, sizeof(data));                        \
+      }                                                                  \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid2.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid2.h
new file mode 100644 (file)
index 0000000..daf4df6
--- /dev/null
@@ -0,0 +1,115 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID2_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID2_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance_drawid2_code[18] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd1840000, /* LIMM            temp[1].32 = 0000 */
+   0x501c1800, /* SFTLP32         temp[0].32 = temp[3].32 << 0 */
+   0xb1800000, /* CMP             P0 = (temp[0].64 = 0000) */
+   0xd9880000, /* LIMM            ? temp[2].32 = 0000 */
+   0xd98c0000, /* LIMM            ? temp[3].32 = 0000 */
+   0x04181023, /* MAD             temp[6].64 = (temp[3].32 * const[2].32) +
+                                               const[4].64 */
+   0x50343003, /* SFTLP32         temp[3].32 = temp[6].32 << 0 */
+   0x9120c0c3, /* ADD32           temp[3].32 = temp[3].32 - const[3].32  */
+   0x9001a120, /* ADD32           ptemp[0].32 = const[6].32 + temp[4].32  */
+   0xd0800004, /* ST              const[8].64: mem(?) <= src(?) */
+   0x9001a161, /* ADD32           ptemp[1].32 = const[6].32 + temp[5].32  */
+   0x9030c0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 + const[3].32  */
+   0xd0000005, /* LD              const[10].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4064003, /* DOUT            doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_arrays_base_instance_drawid2_program = {
+      pvr_draw_indirect_arrays_base_instance_drawid2_code, /* code segment */
+      0, /* constant mappings, zeroed since we use the macros below */
+      4, /* number of constant mappings */
+
+      12, /* size of data segment, in dwords, aligned to 4 */
+      20, /* size of code segment, in dwords, aligned to 4 */
+      12, /* size of temp segment, in dwords, aligned to 4 */
+      12, /* size of data segment, in dwords */
+      18, /* size of code segment, in dwords */
+      10, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_arrays_base_instance_drawid2_di_data(buffer, \
+                                                                     addr,   \
+                                                                     device) \
+   do {                                                                      \
+      uint64_t data = ((addr) | (0x60000000000ULL) |                         \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                   \
+      memcpy(buffer + 0, &data, sizeof(data));                               \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid2_write_vdm(buffer, \
+                                                                       addr)   \
+   do {                                                                        \
+      uint64_t data = ((addr) | (0x830000000000ULL));                          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 8, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid2_flush_vdm(buffer, \
+                                                                       addr)   \
+   do {                                                                        \
+      uint64_t data = ((addr) | (0x1940000000000ULL));                         \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 10, &data, sizeof(data));                                \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid2_num_views(buffer, \
+                                                                       value)  \
+   do {                                                                        \
+      uint32_t data = value;                                                   \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 2, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid2_immediates( \
+   buffer)                                                               \
+   do {                                                                  \
+      {                                                                  \
+         uint64_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 4, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x1;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 3, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 6, &data, sizeof(data));                        \
+      }                                                                  \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid3.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_arrays_base_instance_drawid3.h
new file mode 100644 (file)
index 0000000..56e773b
--- /dev/null
@@ -0,0 +1,115 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID3_H
+#define PVR_DRAW_INDIRECTARRAYS_BASE_INSTANCE_DRAWID3_H
+
+/* Initially generated from ARB_draw_indirect_arrays.pds */
+
+static const uint32_t pvr_draw_indirect_arrays_base_instance_drawid3_code[18] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd1840000, /* LIMM            temp[1].32 = 0000 */
+   0x50242000, /* SFTLP32         temp[0].32 = temp[4].32 << 0 */
+   0xb1800000, /* CMP             P0 = (temp[0].64 = 0000) */
+   0xd98c0000, /* LIMM            ? temp[3].32 = 0000 */
+   0xd9900000, /* LIMM            ? temp[4].32 = 0000 */
+   0x04201024, /* MAD             temp[8].64 = (temp[4].32 * const[2].32) +
+                                               const[4].64 */
+   0x50444004, /* SFTLP32         temp[4].32 = temp[8].32 << 0 */
+   0x912100c4, /* ADD32           temp[4].32 = temp[4].32 - const[3].32  */
+   0x9001a160, /* ADD32           ptemp[0].32 = const[6].32 + temp[5].32  */
+   0xd0800004, /* ST              const[8].64: mem(?) <= src(?) */
+   0x9001a1a1, /* ADD32           ptemp[1].32 = const[6].32 + temp[6].32  */
+   0x9030c0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 + const[3].32  */
+   0xd0000005, /* LD              const[10].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4064003, /* DOUT            doutv = temp[0].64, const[6].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_arrays_base_instance_drawid3_program = {
+      pvr_draw_indirect_arrays_base_instance_drawid3_code, /* code segment */
+      0, /* constant mappings, zeroed since we use the macros below */
+      4, /* number of constant mappings */
+
+      12, /* size of data segment, in dwords, aligned to 4 */
+      20, /* size of code segment, in dwords, aligned to 4 */
+      12, /* size of temp segment, in dwords, aligned to 4 */
+      12, /* size of data segment, in dwords */
+      18, /* size of code segment, in dwords */
+      12, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_arrays_base_instance_drawid3_di_data(buffer, \
+                                                                     addr,   \
+                                                                     device) \
+   do {                                                                      \
+      uint64_t data = ((addr) | (0x80000000000ULL) |                         \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                   \
+      memcpy(buffer + 0, &data, sizeof(data));                               \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid3_write_vdm(buffer, \
+                                                                       addr)   \
+   do {                                                                        \
+      uint64_t data = ((addr) | (0xc30000000000ULL));                          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 8, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid3_flush_vdm(buffer, \
+                                                                       addr)   \
+   do {                                                                        \
+      uint64_t data = ((addr) | (0x2140000000000ULL));                         \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 10, &data, sizeof(data));                                \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid3_num_views(buffer, \
+                                                                       value)  \
+   do {                                                                        \
+      uint32_t data = value;                                                   \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);                     \
+      memcpy(buffer + 2, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_arrays_base_instance_drawid3_immediates( \
+   buffer)                                                               \
+   do {                                                                  \
+      {                                                                  \
+         uint64_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 4, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x1;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 3, &data, sizeof(data));                        \
+      }                                                                  \
+      {                                                                  \
+         uint32_t data = 0x0;                                            \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ARRAYS", data, 0);            \
+         memcpy(buffer + 6, &data, sizeof(data));                        \
+      }                                                                  \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements0.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements0.h
new file mode 100644 (file)
index 0000000..d2f00a3
--- /dev/null
@@ -0,0 +1,131 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS0_H
+#define PVR_DRAW_INDIRECTELEMENTS0_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements0_code[21] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0x9000a0e0, /* ADD32           ptemp[0].32 = const[2].32 + temp[3].32  */
+   0x04101822, /* MAD             temp[4].64 = (temp[2].32 * const[3].32) +
+                                               const[4].64 */
+   0x53283006, /* SFTLP32         temp[6].32 = (temp[5].32 | const[6].32) << 0
+                */
+   0x50242007, /* SFTLP32         temp[7].32 = temp[4].32 << 0 */
+   0x04083842, /* MAD             temp[4].64 = (temp[1].32 * const[7].32) +
+                                               const[8].64 */
+   0x50242001, /* SFTLP32         temp[1].32 = temp[4].32 << 0 */
+   0x50040008, /* SFTLP32         temp[8].32 = temp[0].32 << 0 */
+   0x91204289, /* ADD32           temp[9].32 = temp[1].32 - const[10].32  */
+   0x501c180a, /* SFTLP32         temp[10].32 = temp[3].32 << 0 */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd1940000, /* LIMM            temp[5].32 = 0000 */
+   0x500c0804, /* SFTLP32         temp[4].32 = temp[1].32 << 0 */
+   0xb1880000, /* CMP             P0 = (temp[4].64 = 0000) */
+   0xd9a00000, /* LIMM            ? temp[8].32 = 0000 */
+   0xd9a40000, /* LIMM            ? temp[9].32 = 0000 */
+   0xd0800006, /* ST              const[12].64: mem(?) <= src(?) */
+   0xd0000007, /* LD              const[14].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4024003, /* DOUT            doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_elements0_program = {
+   pvr_draw_indirect_elements0_code, /* code segment */
+   0, /* constant mappings, zeroed since we use the macros below */
+   7, /* number of constant mappings */
+
+   16, /* size of data segment, in dwords, aligned to 4 */
+   24, /* size of code segment, in dwords, aligned to 4 */
+   20, /* size of temp segment, in dwords, aligned to 4 */
+   16, /* size of data segment, in dwords */
+   21, /* size of code segment, in dwords */
+   18, /* size of temp segment, in dwords */
+   NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_elements0_di_data(buffer, addr, device) \
+   do {                                                                 \
+      uint64_t data = ((addr) | (0x40000000000ULL) |                    \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                          \
+   } while (0)
+#define pvr_write_draw_indirect_elements0_write_vdm(buffer, addr) \
+   do {                                                           \
+      uint64_t data = ((addr) | (0x1850000000000ULL));            \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);      \
+      memcpy(buffer + 12, &data, sizeof(data));                   \
+   } while (0)
+#define pvr_write_draw_indirect_elements0_flush_vdm(buffer, addr) \
+   do {                                                           \
+      uint64_t data = ((addr) | (0x3160000000000ULL));            \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);      \
+      memcpy(buffer + 14, &data, sizeof(data));                   \
+   } while (0)
+#define pvr_write_draw_indirect_elements0_idx_stride(buffer, value) \
+   do {                                                             \
+      uint32_t data = value;                                        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);        \
+      memcpy(buffer + 3, &data, sizeof(data));                      \
+   } while (0)
+#define pvr_write_draw_indirect_elements0_idx_base(buffer, value) \
+   do {                                                           \
+      uint64_t data = value;                                      \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);      \
+      memcpy(buffer + 4, &data, sizeof(data));                    \
+   } while (0)
+#define pvr_write_draw_indirect_elements0_idx_header(buffer, value) \
+   do {                                                             \
+      uint32_t data = value;                                        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);        \
+      memcpy(buffer + 6, &data, sizeof(data));                      \
+   } while (0)
+#define pvr_write_draw_indirect_elements0_num_views(buffer, value) \
+   do {                                                            \
+      uint32_t data = value;                                       \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);       \
+      memcpy(buffer + 7, &data, sizeof(data));                     \
+   } while (0)
+#define pvr_write_draw_indirect_elements0_immediates(buffer)    \
+   do {                                                         \
+      {                                                         \
+         uint32_t data = 0x0;                                   \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+         memcpy(buffer + 2, &data, sizeof(data));               \
+      }                                                         \
+      {                                                         \
+         uint64_t data = 0x0;                                   \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+         memcpy(buffer + 8, &data, sizeof(data));               \
+      }                                                         \
+      {                                                         \
+         uint32_t data = 0x1;                                   \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+         memcpy(buffer + 10, &data, sizeof(data));              \
+      }                                                         \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements1.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements1.h
new file mode 100644 (file)
index 0000000..6ce4d63
--- /dev/null
@@ -0,0 +1,131 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS1_H
+#define PVR_DRAW_INDIRECTELEMENTS1_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements1_code[21] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0x9000a120, /* ADD32           ptemp[0].32 = const[2].32 + temp[4].32  */
+   0x04181823, /* MAD             temp[6].64 = (temp[3].32 * const[3].32) +
+                                               const[4].64 */
+   0x53383008, /* SFTLP32         temp[8].32 = (temp[7].32 | const[6].32) << 0
+                */
+   0x50343009, /* SFTLP32         temp[9].32 = temp[6].32 << 0 */
+   0x04103843, /* MAD             temp[6].64 = (temp[2].32 * const[7].32) +
+                                               const[8].64 */
+   0x50343002, /* SFTLP32         temp[2].32 = temp[6].32 << 0 */
+   0x500c080a, /* SFTLP32         temp[10].32 = temp[1].32 << 0 */
+   0x9120828b, /* ADD32           temp[11].32 = temp[2].32 - const[10].32  */
+   0x5024200c, /* SFTLP32         temp[12].32 = temp[4].32 << 0 */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd19c0000, /* LIMM            temp[7].32 = 0000 */
+   0x50141006, /* SFTLP32         temp[6].32 = temp[2].32 << 0 */
+   0xb18c0000, /* CMP             P0 = (temp[6].64 = 0000) */
+   0xd9a80000, /* LIMM            ? temp[10].32 = 0000 */
+   0xd9ac0000, /* LIMM            ? temp[11].32 = 0000 */
+   0xd0800006, /* ST              const[12].64: mem(?) <= src(?) */
+   0xd0000007, /* LD              const[14].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4024003, /* DOUT            doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_elements1_program = {
+   pvr_draw_indirect_elements1_code, /* code segment */
+   0, /* constant mappings, zeroed since we use the macros below */
+   7, /* number of constant mappings */
+
+   16, /* size of data segment, in dwords, aligned to 4 */
+   24, /* size of code segment, in dwords, aligned to 4 */
+   20, /* size of temp segment, in dwords, aligned to 4 */
+   16, /* size of data segment, in dwords */
+   21, /* size of code segment, in dwords */
+   20, /* size of temp segment, in dwords */
+   NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_elements1_di_data(buffer, addr, device) \
+   do {                                                                 \
+      uint64_t data = ((addr) | (0x60000000000ULL) |                    \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                          \
+   } while (0)
+#define pvr_write_draw_indirect_elements1_write_vdm(buffer, addr) \
+   do {                                                           \
+      uint64_t data = ((addr) | (0x2050000000000ULL));            \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);      \
+      memcpy(buffer + 12, &data, sizeof(data));                   \
+   } while (0)
+#define pvr_write_draw_indirect_elements1_flush_vdm(buffer, addr) \
+   do {                                                           \
+      uint64_t data = ((addr) | (0x3960000000000ULL));            \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);      \
+      memcpy(buffer + 14, &data, sizeof(data));                   \
+   } while (0)
+#define pvr_write_draw_indirect_elements1_idx_stride(buffer, value) \
+   do {                                                             \
+      uint32_t data = value;                                        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);        \
+      memcpy(buffer + 3, &data, sizeof(data));                      \
+   } while (0)
+#define pvr_write_draw_indirect_elements1_idx_base(buffer, value) \
+   do {                                                           \
+      uint64_t data = value;                                      \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);      \
+      memcpy(buffer + 4, &data, sizeof(data));                    \
+   } while (0)
+#define pvr_write_draw_indirect_elements1_idx_header(buffer, value) \
+   do {                                                             \
+      uint32_t data = value;                                        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);        \
+      memcpy(buffer + 6, &data, sizeof(data));                      \
+   } while (0)
+#define pvr_write_draw_indirect_elements1_num_views(buffer, value) \
+   do {                                                            \
+      uint32_t data = value;                                       \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);       \
+      memcpy(buffer + 7, &data, sizeof(data));                     \
+   } while (0)
+#define pvr_write_draw_indirect_elements1_immediates(buffer)    \
+   do {                                                         \
+      {                                                         \
+         uint32_t data = 0x0;                                   \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+         memcpy(buffer + 2, &data, sizeof(data));               \
+      }                                                         \
+      {                                                         \
+         uint64_t data = 0x0;                                   \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+         memcpy(buffer + 8, &data, sizeof(data));               \
+      }                                                         \
+      {                                                         \
+         uint32_t data = 0x1;                                   \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+         memcpy(buffer + 10, &data, sizeof(data));              \
+      }                                                         \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements2.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements2.h
new file mode 100644 (file)
index 0000000..d7823eb
--- /dev/null
@@ -0,0 +1,131 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS2_H
+#define PVR_DRAW_INDIRECTELEMENTS2_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements2_code[21] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0x9000a160, /* ADD32           ptemp[0].32 = const[2].32 + temp[5].32  */
+   0x04201820, /* MAD             temp[0].64 = (temp[4].32 * const[3].32) +
+                                               const[4].64 */
+   0x53083006, /* SFTLP32         temp[6].32 = (temp[1].32 | const[6].32) << 0
+                */
+   0x50040007, /* SFTLP32         temp[7].32 = temp[0].32 << 0 */
+   0x04183840, /* MAD             temp[0].64 = (temp[3].32 * const[7].32) +
+                                               const[8].64 */
+   0x50040003, /* SFTLP32         temp[3].32 = temp[0].32 << 0 */
+   0x50141008, /* SFTLP32         temp[8].32 = temp[2].32 << 0 */
+   0x9120c289, /* ADD32           temp[9].32 = temp[3].32 - const[10].32  */
+   0x502c280a, /* SFTLP32         temp[10].32 = temp[5].32 << 0 */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd1840000, /* LIMM            temp[1].32 = 0000 */
+   0x501c1800, /* SFTLP32         temp[0].32 = temp[3].32 << 0 */
+   0xb1800000, /* CMP             P0 = (temp[0].64 = 0000) */
+   0xd9a00000, /* LIMM            ? temp[8].32 = 0000 */
+   0xd9a40000, /* LIMM            ? temp[9].32 = 0000 */
+   0xd0800006, /* ST              const[12].64: mem(?) <= src(?) */
+   0xd0000007, /* LD              const[14].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4024003, /* DOUT            doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_elements2_program = {
+   pvr_draw_indirect_elements2_code, /* code segment */
+   0, /* constant mappings, zeroed since we use the macros below */
+   7, /* number of constant mappings */
+
+   16, /* size of data segment, in dwords, aligned to 4 */
+   24, /* size of code segment, in dwords, aligned to 4 */
+   20, /* size of temp segment, in dwords, aligned to 4 */
+   16, /* size of data segment, in dwords */
+   21, /* size of code segment, in dwords */
+   18, /* size of temp segment, in dwords */
+   NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_elements2_di_data(buffer, addr, device) \
+   do {                                                                 \
+      uint64_t data = ((addr) | (0x60000000000ULL) |                    \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                          \
+   } while (0)
+#define pvr_write_draw_indirect_elements2_write_vdm(buffer, addr) \
+   do {                                                           \
+      uint64_t data = ((addr) | (0x1850000000000ULL));            \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);      \
+      memcpy(buffer + 12, &data, sizeof(data));                   \
+   } while (0)
+#define pvr_write_draw_indirect_elements2_flush_vdm(buffer, addr) \
+   do {                                                           \
+      uint64_t data = ((addr) | (0x3160000000000ULL));            \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);      \
+      memcpy(buffer + 14, &data, sizeof(data));                   \
+   } while (0)
+#define pvr_write_draw_indirect_elements2_idx_stride(buffer, value) \
+   do {                                                             \
+      uint32_t data = value;                                        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);        \
+      memcpy(buffer + 3, &data, sizeof(data));                      \
+   } while (0)
+#define pvr_write_draw_indirect_elements2_idx_base(buffer, value) \
+   do {                                                           \
+      uint64_t data = value;                                      \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);      \
+      memcpy(buffer + 4, &data, sizeof(data));                    \
+   } while (0)
+#define pvr_write_draw_indirect_elements2_idx_header(buffer, value) \
+   do {                                                             \
+      uint32_t data = value;                                        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);        \
+      memcpy(buffer + 6, &data, sizeof(data));                      \
+   } while (0)
+#define pvr_write_draw_indirect_elements2_num_views(buffer, value) \
+   do {                                                            \
+      uint32_t data = value;                                       \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);       \
+      memcpy(buffer + 7, &data, sizeof(data));                     \
+   } while (0)
+#define pvr_write_draw_indirect_elements2_immediates(buffer)    \
+   do {                                                         \
+      {                                                         \
+         uint32_t data = 0x0;                                   \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+         memcpy(buffer + 2, &data, sizeof(data));               \
+      }                                                         \
+      {                                                         \
+         uint64_t data = 0x0;                                   \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+         memcpy(buffer + 8, &data, sizeof(data));               \
+      }                                                         \
+      {                                                         \
+         uint32_t data = 0x1;                                   \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+         memcpy(buffer + 10, &data, sizeof(data));              \
+      }                                                         \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements3.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements3.h
new file mode 100644 (file)
index 0000000..27f59e3
--- /dev/null
@@ -0,0 +1,131 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS3_H
+#define PVR_DRAW_INDIRECTELEMENTS3_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements3_code[21] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0x9000a1a0, /* ADD32           ptemp[0].32 = const[2].32 + temp[6].32  */
+   0x04281820, /* MAD             temp[0].64 = (temp[5].32 * const[3].32) +
+                                               const[4].64 */
+   0x53083007, /* SFTLP32         temp[7].32 = (temp[1].32 | const[6].32) << 0
+                */
+   0x50040008, /* SFTLP32         temp[8].32 = temp[0].32 << 0 */
+   0x04203840, /* MAD             temp[0].64 = (temp[4].32 * const[7].32) +
+                                               const[8].64 */
+   0x50040004, /* SFTLP32         temp[4].32 = temp[0].32 << 0 */
+   0x501c1809, /* SFTLP32         temp[9].32 = temp[3].32 << 0 */
+   0x9121028a, /* ADD32           temp[10].32 = temp[4].32 - const[10].32  */
+   0x5034300b, /* SFTLP32         temp[11].32 = temp[6].32 << 0 */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd1840000, /* LIMM            temp[1].32 = 0000 */
+   0x50242000, /* SFTLP32         temp[0].32 = temp[4].32 << 0 */
+   0xb1800000, /* CMP             P0 = (temp[0].64 = 0000) */
+   0xd9a40000, /* LIMM            ? temp[9].32 = 0000 */
+   0xd9a80000, /* LIMM            ? temp[10].32 = 0000 */
+   0xd0800006, /* ST              const[12].64: mem(?) <= src(?) */
+   0xd0000007, /* LD              const[14].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4024003, /* DOUT            doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output pvr_draw_indirect_elements3_program = {
+   pvr_draw_indirect_elements3_code, /* code segment */
+   0, /* constant mappings, zeroed since we use the macros below */
+   7, /* number of constant mappings */
+
+   16, /* size of data segment, in dwords, aligned to 4 */
+   24, /* size of code segment, in dwords, aligned to 4 */
+   20, /* size of temp segment, in dwords, aligned to 4 */
+   16, /* size of data segment, in dwords */
+   21, /* size of code segment, in dwords */
+   18, /* size of temp segment, in dwords */
+   NULL /* function pointer to write data segment */
+};
+
+#define pvr_write_draw_indirect_elements3_di_data(buffer, addr, device) \
+   do {                                                                 \
+      uint64_t data = ((addr) | (0x80000000000ULL) |                    \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                          \
+   } while (0)
+#define pvr_write_draw_indirect_elements3_write_vdm(buffer, addr) \
+   do {                                                           \
+      uint64_t data = ((addr) | (0x1c50000000000ULL));            \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);      \
+      memcpy(buffer + 12, &data, sizeof(data));                   \
+   } while (0)
+#define pvr_write_draw_indirect_elements3_flush_vdm(buffer, addr) \
+   do {                                                           \
+      uint64_t data = ((addr) | (0x3160000000000ULL));            \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);      \
+      memcpy(buffer + 14, &data, sizeof(data));                   \
+   } while (0)
+#define pvr_write_draw_indirect_elements3_idx_stride(buffer, value) \
+   do {                                                             \
+      uint32_t data = value;                                        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);        \
+      memcpy(buffer + 3, &data, sizeof(data));                      \
+   } while (0)
+#define pvr_write_draw_indirect_elements3_idx_base(buffer, value) \
+   do {                                                           \
+      uint64_t data = value;                                      \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);      \
+      memcpy(buffer + 4, &data, sizeof(data));                    \
+   } while (0)
+#define pvr_write_draw_indirect_elements3_idx_header(buffer, value) \
+   do {                                                             \
+      uint32_t data = value;                                        \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);        \
+      memcpy(buffer + 6, &data, sizeof(data));                      \
+   } while (0)
+#define pvr_write_draw_indirect_elements3_num_views(buffer, value) \
+   do {                                                            \
+      uint32_t data = value;                                       \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);       \
+      memcpy(buffer + 7, &data, sizeof(data));                     \
+   } while (0)
+#define pvr_write_draw_indirect_elements3_immediates(buffer)    \
+   do {                                                         \
+      {                                                         \
+         uint32_t data = 0x0;                                   \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+         memcpy(buffer + 2, &data, sizeof(data));               \
+      }                                                         \
+      {                                                         \
+         uint64_t data = 0x0;                                   \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+         memcpy(buffer + 8, &data, sizeof(data));               \
+      }                                                         \
+      {                                                         \
+         uint32_t data = 0x1;                                   \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0); \
+         memcpy(buffer + 10, &data, sizeof(data));              \
+      }                                                         \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance0.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance0.h
new file mode 100644 (file)
index 0000000..6ac6d8b
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE0_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE0_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements_base_instance0_code[23] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0x9000a0e0, /* ADD32           ptemp[0].32 = const[2].32 + temp[3].32  */
+   0x9000a121, /* ADD32           ptemp[1].32 = const[2].32 + temp[4].32  */
+   0x9130f0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 - ptemp[3].32  */
+   0x04101823, /* MAD             temp[6].64 = (temp[2].32 * const[3].32) +
+                                               const[4].64 */
+   0x53383008, /* SFTLP32         temp[8].32 = (temp[7].32 | const[6].32) << 0
+                */
+   0x50343009, /* SFTLP32         temp[9].32 = temp[6].32 << 0 */
+   0x04083843, /* MAD             temp[6].64 = (temp[1].32 * const[7].32) +
+                                               const[8].64 */
+   0x50343001, /* SFTLP32         temp[1].32 = temp[6].32 << 0 */
+   0x5004000a, /* SFTLP32         temp[10].32 = temp[0].32 << 0 */
+   0x9120428b, /* ADD32           temp[11].32 = temp[1].32 - const[10].32  */
+   0x501c180c, /* SFTLP32         temp[12].32 = temp[3].32 << 0 */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd19c0000, /* LIMM            temp[7].32 = 0000 */
+   0x500c0806, /* SFTLP32         temp[6].32 = temp[1].32 << 0 */
+   0xb18c0000, /* CMP             P0 = (temp[6].64 = 0000) */
+   0xd9a80000, /* LIMM            ? temp[10].32 = 0000 */
+   0xd9ac0000, /* LIMM            ? temp[11].32 = 0000 */
+   0xd0800006, /* ST              const[12].64: mem(?) <= src(?) */
+   0xd0000007, /* LD              const[14].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4024003, /* DOUT            doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_elements_base_instance0_program = {
+      pvr_draw_indirect_elements_base_instance0_code, /* code segment */
+      0, /* constant mappings, zeroed since we use the macros below */
+      7, /* number of constant mappings */
+
+      16, /* size of data segment, in dwords, aligned to 4 */
+      24, /* size of code segment, in dwords, aligned to 4 */
+      20, /* size of temp segment, in dwords, aligned to 4 */
+      16, /* size of data segment, in dwords */
+      23, /* size of code segment, in dwords */
+      20, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_elements_base_instance0_di_data(buffer, \
+                                                                addr,   \
+                                                                device) \
+   do {                                                                 \
+      uint64_t data = ((addr) | (0x60000000000ULL) |                    \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                          \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance0_write_vdm(buffer, \
+                                                                  addr)   \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x2050000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 12, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance0_flush_vdm(buffer, \
+                                                                  addr)   \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x3960000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 14, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance0_idx_stride(buffer, \
+                                                                   value)  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 3, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance0_idx_base(buffer, \
+                                                                 value)  \
+   do {                                                                  \
+      uint64_t data = value;                                             \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);             \
+      memcpy(buffer + 4, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance0_idx_header(buffer, \
+                                                                   value)  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 6, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance0_num_views(buffer, \
+                                                                  value)  \
+   do {                                                                   \
+      uint32_t data = value;                                              \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 7, &data, sizeof(data));                            \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance0_immediates(buffer) \
+   do {                                                                    \
+      {                                                                    \
+         uint32_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 2, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint64_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 8, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint32_t data = 0x1;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 10, &data, sizeof(data));                         \
+      }                                                                    \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance1.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance1.h
new file mode 100644 (file)
index 0000000..ba91dc8
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE1_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE1_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements_base_instance1_code[23] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0x9000a120, /* ADD32           ptemp[0].32 = const[2].32 + temp[4].32  */
+   0x9000a161, /* ADD32           ptemp[1].32 = const[2].32 + temp[5].32  */
+   0x9130f0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 - ptemp[3].32  */
+   0x04181823, /* MAD             temp[6].64 = (temp[3].32 * const[3].32) +
+                                               const[4].64 */
+   0x53383008, /* SFTLP32         temp[8].32 = (temp[7].32 | const[6].32) << 0
+                */
+   0x50343009, /* SFTLP32         temp[9].32 = temp[6].32 << 0 */
+   0x04103843, /* MAD             temp[6].64 = (temp[2].32 * const[7].32) +
+                                               const[8].64 */
+   0x50343002, /* SFTLP32         temp[2].32 = temp[6].32 << 0 */
+   0x500c080a, /* SFTLP32         temp[10].32 = temp[1].32 << 0 */
+   0x9120828b, /* ADD32           temp[11].32 = temp[2].32 - const[10].32  */
+   0x5024200c, /* SFTLP32         temp[12].32 = temp[4].32 << 0 */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd19c0000, /* LIMM            temp[7].32 = 0000 */
+   0x50141006, /* SFTLP32         temp[6].32 = temp[2].32 << 0 */
+   0xb18c0000, /* CMP             P0 = (temp[6].64 = 0000) */
+   0xd9a80000, /* LIMM            ? temp[10].32 = 0000 */
+   0xd9ac0000, /* LIMM            ? temp[11].32 = 0000 */
+   0xd0800006, /* ST              const[12].64: mem(?) <= src(?) */
+   0xd0000007, /* LD              const[14].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4024003, /* DOUT            doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_elements_base_instance1_program = {
+      pvr_draw_indirect_elements_base_instance1_code, /* code segment */
+      0, /* constant mappings, zeroed since we use the macros below */
+      7, /* number of constant mappings */
+
+      16, /* size of data segment, in dwords, aligned to 4 */
+      24, /* size of code segment, in dwords, aligned to 4 */
+      20, /* size of temp segment, in dwords, aligned to 4 */
+      16, /* size of data segment, in dwords */
+      23, /* size of code segment, in dwords */
+      20, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_elements_base_instance1_di_data(buffer, \
+                                                                addr,   \
+                                                                device) \
+   do {                                                                 \
+      uint64_t data = ((addr) | (0x60000000000ULL) |                    \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                          \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance1_write_vdm(buffer, \
+                                                                  addr)   \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x2050000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 12, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance1_flush_vdm(buffer, \
+                                                                  addr)   \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x3960000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 14, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance1_idx_stride(buffer, \
+                                                                   value)  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 3, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance1_idx_base(buffer, \
+                                                                 value)  \
+   do {                                                                  \
+      uint64_t data = value;                                             \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);             \
+      memcpy(buffer + 4, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance1_idx_header(buffer, \
+                                                                   value)  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 6, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance1_num_views(buffer, \
+                                                                  value)  \
+   do {                                                                   \
+      uint32_t data = value;                                              \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 7, &data, sizeof(data));                            \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance1_immediates(buffer) \
+   do {                                                                    \
+      {                                                                    \
+         uint32_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 2, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint64_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 8, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint32_t data = 0x1;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 10, &data, sizeof(data));                         \
+      }                                                                    \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance2.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance2.h
new file mode 100644 (file)
index 0000000..a6515b3
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE2_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE2_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements_base_instance2_code[23] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0x9000a160, /* ADD32           ptemp[0].32 = const[2].32 + temp[5].32  */
+   0x9000a1a1, /* ADD32           ptemp[1].32 = const[2].32 + temp[6].32  */
+   0x9130f0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 - ptemp[3].32  */
+   0x04201820, /* MAD             temp[0].64 = (temp[4].32 * const[3].32) +
+                                               const[4].64 */
+   0x53083007, /* SFTLP32         temp[7].32 = (temp[1].32 | const[6].32) << 0
+                */
+   0x50040008, /* SFTLP32         temp[8].32 = temp[0].32 << 0 */
+   0x04183840, /* MAD             temp[0].64 = (temp[3].32 * const[7].32) +
+                                               const[8].64 */
+   0x50040003, /* SFTLP32         temp[3].32 = temp[0].32 << 0 */
+   0x50141009, /* SFTLP32         temp[9].32 = temp[2].32 << 0 */
+   0x9120c28a, /* ADD32           temp[10].32 = temp[3].32 - const[10].32  */
+   0x502c280b, /* SFTLP32         temp[11].32 = temp[5].32 << 0 */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd1840000, /* LIMM            temp[1].32 = 0000 */
+   0x501c1800, /* SFTLP32         temp[0].32 = temp[3].32 << 0 */
+   0xb1800000, /* CMP             P0 = (temp[0].64 = 0000) */
+   0xd9a40000, /* LIMM            ? temp[9].32 = 0000 */
+   0xd9a80000, /* LIMM            ? temp[10].32 = 0000 */
+   0xd0800006, /* ST              const[12].64: mem(?) <= src(?) */
+   0xd0000007, /* LD              const[14].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4024003, /* DOUT            doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_elements_base_instance2_program = {
+      pvr_draw_indirect_elements_base_instance2_code, /* code segment */
+      0, /* constant mappings, zeroed since we use the macros below */
+      7, /* number of constant mappings */
+
+      16, /* size of data segment, in dwords, aligned to 4 */
+      24, /* size of code segment, in dwords, aligned to 4 */
+      20, /* size of temp segment, in dwords, aligned to 4 */
+      16, /* size of data segment, in dwords */
+      23, /* size of code segment, in dwords */
+      18, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_elements_base_instance2_di_data(buffer, \
+                                                                addr,   \
+                                                                device) \
+   do {                                                                 \
+      uint64_t data = ((addr) | (0x80000000000ULL) |                    \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                          \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance2_write_vdm(buffer, \
+                                                                  addr)   \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x1c50000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 12, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance2_flush_vdm(buffer, \
+                                                                  addr)   \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x3160000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 14, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance2_idx_stride(buffer, \
+                                                                   value)  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 3, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance2_idx_base(buffer, \
+                                                                 value)  \
+   do {                                                                  \
+      uint64_t data = value;                                             \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);             \
+      memcpy(buffer + 4, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance2_idx_header(buffer, \
+                                                                   value)  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 6, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance2_num_views(buffer, \
+                                                                  value)  \
+   do {                                                                   \
+      uint32_t data = value;                                              \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 7, &data, sizeof(data));                            \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance2_immediates(buffer) \
+   do {                                                                    \
+      {                                                                    \
+         uint32_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 2, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint64_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 8, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint32_t data = 0x1;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 10, &data, sizeof(data));                         \
+      }                                                                    \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance3.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance3.h
new file mode 100644 (file)
index 0000000..7a18888
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE3_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE3_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t pvr_draw_indirect_elements_base_instance3_code[23] = {
+   0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0x9000a1a0, /* ADD32           ptemp[0].32 = const[2].32 + temp[6].32  */
+   0x9000a1e1, /* ADD32           ptemp[1].32 = const[2].32 + temp[7].32  */
+   0x9130f0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 - ptemp[3].32  */
+   0x04281820, /* MAD             temp[0].64 = (temp[5].32 * const[3].32) +
+                                               const[4].64 */
+   0x53083008, /* SFTLP32         temp[8].32 = (temp[1].32 | const[6].32) << 0
+                */
+   0x50040009, /* SFTLP32         temp[9].32 = temp[0].32 << 0 */
+   0x04203840, /* MAD             temp[0].64 = (temp[4].32 * const[7].32) +
+                                               const[8].64 */
+   0x50040004, /* SFTLP32         temp[4].32 = temp[0].32 << 0 */
+   0x501c180a, /* SFTLP32         temp[10].32 = temp[3].32 << 0 */
+   0x9121028b, /* ADD32           temp[11].32 = temp[4].32 - const[10].32  */
+   0x5034300c, /* SFTLP32         temp[12].32 = temp[6].32 << 0 */
+   0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+   0xd1840000, /* LIMM            temp[1].32 = 0000 */
+   0x50242000, /* SFTLP32         temp[0].32 = temp[4].32 << 0 */
+   0xb1800000, /* CMP             P0 = (temp[0].64 = 0000) */
+   0xd9a80000, /* LIMM            ? temp[10].32 = 0000 */
+   0xd9ac0000, /* LIMM            ? temp[11].32 = 0000 */
+   0xd0800006, /* ST              const[12].64: mem(?) <= src(?) */
+   0xd0000007, /* LD              const[14].64: dst(?) <= mem(?) */
+   0xd1000000, /* WDF              */
+   0xf4024003, /* DOUT            doutv = temp[0].64, const[2].32; HALT */
+};
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_elements_base_instance3_program = {
+      pvr_draw_indirect_elements_base_instance3_code, /* code segment */
+      0, /* constant mappings, zeroed since we use the macros below */
+      7, /* number of constant mappings */
+
+      16, /* size of data segment, in dwords, aligned to 4 */
+      24, /* size of code segment, in dwords, aligned to 4 */
+      20, /* size of temp segment, in dwords, aligned to 4 */
+      16, /* size of data segment, in dwords */
+      23, /* size of code segment, in dwords */
+      20, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_elements_base_instance3_di_data(buffer, \
+                                                                addr,   \
+                                                                device) \
+   do {                                                                 \
+      uint64_t data = ((addr) | (0x80000000000ULL) |                    \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));          \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+      memcpy(buffer + 0, &data, sizeof(data));                          \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance3_write_vdm(buffer, \
+                                                                  addr)   \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x2050000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 12, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance3_flush_vdm(buffer, \
+                                                                  addr)   \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x3960000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 14, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance3_idx_stride(buffer, \
+                                                                   value)  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 3, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance3_idx_base(buffer, \
+                                                                 value)  \
+   do {                                                                  \
+      uint64_t data = value;                                             \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);             \
+      memcpy(buffer + 4, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance3_idx_header(buffer, \
+                                                                   value)  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 6, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance3_num_views(buffer, \
+                                                                  value)  \
+   do {                                                                   \
+      uint32_t data = value;                                              \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 7, &data, sizeof(data));                            \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance3_immediates(buffer) \
+   do {                                                                    \
+      {                                                                    \
+         uint32_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 2, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint64_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 8, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint32_t data = 0x1;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 10, &data, sizeof(data));                         \
+      }                                                                    \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid0.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid0.h
new file mode 100644 (file)
index 0000000..885c00c
--- /dev/null
@@ -0,0 +1,152 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID0_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID0_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t
+   pvr_draw_indirect_elements_base_instance_drawid0_code[23] = {
+      0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+      0xd1000000, /* WDF              */
+      0x9000a0e0, /* ADD32           ptemp[0].32 = const[2].32 + temp[3].32  */
+      0x9000a121, /* ADD32           ptemp[1].32 = const[2].32 + temp[4].32  */
+      0x9030c0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 + const[3].32  */
+      0x04102033, /* MAD             temp[6].64 = (temp[2].32 * const[4].32) +
+                                                  const[6].64 */
+      0x53382808, /* SFTLP32         temp[8].32 = (temp[7].32 | const[5].32) <<
+                   * 0
+                   */
+      0x50343009, /* SFTLP32         temp[9].32 = temp[6].32 << 0 */
+      0x04084053, /* MAD             temp[6].64 = (temp[1].32 * const[8].32) +
+                                                  const[10].64 */
+      0x50343001, /* SFTLP32         temp[1].32 = temp[6].32 << 0 */
+      0x5004000a, /* SFTLP32         temp[10].32 = temp[0].32 << 0 */
+      0x912040cb, /* ADD32           temp[11].32 = temp[1].32 - const[3].32  */
+      0x501c180c, /* SFTLP32         temp[12].32 = temp[3].32 << 0 */
+      0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+      0xd19c0000, /* LIMM            temp[7].32 = 0000 */
+      0x500c0806, /* SFTLP32         temp[6].32 = temp[1].32 << 0 */
+      0xb18c0000, /* CMP             P0 = (temp[6].64 = 0000) */
+      0xd9a80000, /* LIMM            ? temp[10].32 = 0000 */
+      0xd9ac0000, /* LIMM            ? temp[11].32 = 0000 */
+      0xd0800006, /* ST              const[12].64: mem(?) <= src(?) */
+      0xd0000007, /* LD              const[14].64: dst(?) <= mem(?) */
+      0xd1000000, /* WDF              */
+      0xf4024003, /* DOUT            doutv = temp[0].64, const[2].32; HALT */
+   };
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_elements_base_instance_drawid0_program = {
+      pvr_draw_indirect_elements_base_instance_drawid0_code, /* code segment
+                                                              */
+      0, /* constant mappings, zeroed since we use the macros below */
+      7, /* number of constant mappings */
+
+      16, /* size of data segment, in dwords, aligned to 4 */
+      24, /* size of code segment, in dwords, aligned to 4 */
+      20, /* size of temp segment, in dwords, aligned to 4 */
+      16, /* size of data segment, in dwords */
+      23, /* size of code segment, in dwords */
+      20, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_di_data(buffer, \
+                                                                       addr,   \
+                                                                       device) \
+   do {                                                                        \
+      uint64_t data = ((addr) | (0x60000000000ULL) |                           \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));                 \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);                   \
+      memcpy(buffer + 0, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_write_vdm( \
+   buffer,                                                                \
+   addr)                                                                  \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x2050000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 12, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_flush_vdm( \
+   buffer,                                                                \
+   addr)                                                                  \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x3960000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 14, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_idx_stride( \
+   buffer,                                                                 \
+   value)                                                                  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 4, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_idx_base( \
+   buffer,                                                               \
+   value)                                                                \
+   do {                                                                  \
+      uint64_t data = value;                                             \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);             \
+      memcpy(buffer + 6, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_idx_header( \
+   buffer,                                                                 \
+   value)                                                                  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 5, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_num_views( \
+   buffer,                                                                \
+   value)                                                                 \
+   do {                                                                   \
+      uint32_t data = value;                                              \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 8, &data, sizeof(data));                            \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid0_immediates( \
+   buffer)                                                                 \
+   do {                                                                    \
+      {                                                                    \
+         uint32_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 2, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint32_t data = 0x1;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 3, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint64_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 10, &data, sizeof(data));                         \
+      }                                                                    \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid1.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid1.h
new file mode 100644 (file)
index 0000000..aa4dae7
--- /dev/null
@@ -0,0 +1,152 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID1_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID1_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t
+   pvr_draw_indirect_elements_base_instance_drawid1_code[23] = {
+      0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+      0xd1000000, /* WDF              */
+      0x9000a120, /* ADD32           ptemp[0].32 = const[2].32 + temp[4].32  */
+      0x9000a161, /* ADD32           ptemp[1].32 = const[2].32 + temp[5].32  */
+      0x9030c0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 + const[3].32  */
+      0x04182033, /* MAD             temp[6].64 = (temp[3].32 * const[4].32) +
+                                                  const[6].64 */
+      0x53382808, /* SFTLP32         temp[8].32 = (temp[7].32 | const[5].32) <<
+                   * 0
+                   */
+      0x50343009, /* SFTLP32         temp[9].32 = temp[6].32 << 0 */
+      0x04104053, /* MAD             temp[6].64 = (temp[2].32 * const[8].32) +
+                                                  const[10].64 */
+      0x50343002, /* SFTLP32         temp[2].32 = temp[6].32 << 0 */
+      0x500c080a, /* SFTLP32         temp[10].32 = temp[1].32 << 0 */
+      0x912080cb, /* ADD32           temp[11].32 = temp[2].32 - const[3].32  */
+      0x5024200c, /* SFTLP32         temp[12].32 = temp[4].32 << 0 */
+      0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+      0xd19c0000, /* LIMM            temp[7].32 = 0000 */
+      0x50141006, /* SFTLP32         temp[6].32 = temp[2].32 << 0 */
+      0xb18c0000, /* CMP             P0 = (temp[6].64 = 0000) */
+      0xd9a80000, /* LIMM            ? temp[10].32 = 0000 */
+      0xd9ac0000, /* LIMM            ? temp[11].32 = 0000 */
+      0xd0800006, /* ST              const[12].64: mem(?) <= src(?) */
+      0xd0000007, /* LD              const[14].64: dst(?) <= mem(?) */
+      0xd1000000, /* WDF              */
+      0xf4024003, /* DOUT            doutv = temp[0].64, const[2].32; HALT */
+   };
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_elements_base_instance_drawid1_program = {
+      pvr_draw_indirect_elements_base_instance_drawid1_code, /* code segment
+                                                              */
+      0, /* constant mappings, zeroed since we use the macros below */
+      7, /* number of constant mappings */
+
+      16, /* size of data segment, in dwords, aligned to 4 */
+      24, /* size of code segment, in dwords, aligned to 4 */
+      20, /* size of temp segment, in dwords, aligned to 4 */
+      16, /* size of data segment, in dwords */
+      23, /* size of code segment, in dwords */
+      20, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_di_data(buffer, \
+                                                                       addr,   \
+                                                                       device) \
+   do {                                                                        \
+      uint64_t data = ((addr) | (0x60000000000ULL) |                           \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));                 \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);                   \
+      memcpy(buffer + 0, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_write_vdm( \
+   buffer,                                                                \
+   addr)                                                                  \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x2050000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 12, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_flush_vdm( \
+   buffer,                                                                \
+   addr)                                                                  \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x3960000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 14, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_idx_stride( \
+   buffer,                                                                 \
+   value)                                                                  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 4, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_idx_base( \
+   buffer,                                                               \
+   value)                                                                \
+   do {                                                                  \
+      uint64_t data = value;                                             \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);             \
+      memcpy(buffer + 6, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_idx_header( \
+   buffer,                                                                 \
+   value)                                                                  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 5, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_num_views( \
+   buffer,                                                                \
+   value)                                                                 \
+   do {                                                                   \
+      uint32_t data = value;                                              \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 8, &data, sizeof(data));                            \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid1_immediates( \
+   buffer)                                                                 \
+   do {                                                                    \
+      {                                                                    \
+         uint32_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 2, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint32_t data = 0x1;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 3, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint64_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 10, &data, sizeof(data));                         \
+      }                                                                    \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid2.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid2.h
new file mode 100644 (file)
index 0000000..a6449f7
--- /dev/null
@@ -0,0 +1,152 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID2_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID2_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t
+   pvr_draw_indirect_elements_base_instance_drawid2_code[23] = {
+      0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+      0xd1000000, /* WDF              */
+      0x9000a160, /* ADD32           ptemp[0].32 = const[2].32 + temp[5].32  */
+      0x9000a1a1, /* ADD32           ptemp[1].32 = const[2].32 + temp[6].32  */
+      0x9030c0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 + const[3].32  */
+      0x04202030, /* MAD             temp[0].64 = (temp[4].32 * const[4].32) +
+                                                  const[6].64 */
+      0x53082807, /* SFTLP32         temp[7].32 = (temp[1].32 | const[5].32) <<
+                   * 0
+                   */
+      0x50040008, /* SFTLP32         temp[8].32 = temp[0].32 << 0 */
+      0x04184050, /* MAD             temp[0].64 = (temp[3].32 * const[8].32) +
+                                                  const[10].64 */
+      0x50040003, /* SFTLP32         temp[3].32 = temp[0].32 << 0 */
+      0x50141009, /* SFTLP32         temp[9].32 = temp[2].32 << 0 */
+      0x9120c0ca, /* ADD32           temp[10].32 = temp[3].32 - const[3].32  */
+      0x502c280b, /* SFTLP32         temp[11].32 = temp[5].32 << 0 */
+      0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+      0xd1840000, /* LIMM            temp[1].32 = 0000 */
+      0x501c1800, /* SFTLP32         temp[0].32 = temp[3].32 << 0 */
+      0xb1800000, /* CMP             P0 = (temp[0].64 = 0000) */
+      0xd9a40000, /* LIMM            ? temp[9].32 = 0000 */
+      0xd9a80000, /* LIMM            ? temp[10].32 = 0000 */
+      0xd0800006, /* ST              const[12].64: mem(?) <= src(?) */
+      0xd0000007, /* LD              const[14].64: dst(?) <= mem(?) */
+      0xd1000000, /* WDF              */
+      0xf4024003, /* DOUT            doutv = temp[0].64, const[2].32; HALT */
+   };
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_elements_base_instance_drawid2_program = {
+      pvr_draw_indirect_elements_base_instance_drawid2_code, /* code segment
+                                                              */
+      0, /* constant mappings, zeroed since we use the macros below */
+      7, /* number of constant mappings */
+
+      16, /* size of data segment, in dwords, aligned to 4 */
+      24, /* size of code segment, in dwords, aligned to 4 */
+      20, /* size of temp segment, in dwords, aligned to 4 */
+      16, /* size of data segment, in dwords */
+      23, /* size of code segment, in dwords */
+      18, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_di_data(buffer, \
+                                                                       addr,   \
+                                                                       device) \
+   do {                                                                        \
+      uint64_t data = ((addr) | (0x80000000000ULL) |                           \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));                 \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);                   \
+      memcpy(buffer + 0, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_write_vdm( \
+   buffer,                                                                \
+   addr)                                                                  \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x1c50000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 12, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_flush_vdm( \
+   buffer,                                                                \
+   addr)                                                                  \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x3160000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 14, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_idx_stride( \
+   buffer,                                                                 \
+   value)                                                                  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 4, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_idx_base( \
+   buffer,                                                               \
+   value)                                                                \
+   do {                                                                  \
+      uint64_t data = value;                                             \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);             \
+      memcpy(buffer + 6, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_idx_header( \
+   buffer,                                                                 \
+   value)                                                                  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 5, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_num_views( \
+   buffer,                                                                \
+   value)                                                                 \
+   do {                                                                   \
+      uint32_t data = value;                                              \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 8, &data, sizeof(data));                            \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid2_immediates( \
+   buffer)                                                                 \
+   do {                                                                    \
+      {                                                                    \
+         uint32_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 2, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint32_t data = 0x1;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 3, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint64_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 10, &data, sizeof(data));                         \
+      }                                                                    \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid3.h b/src/imagination/vulkan/pds/pvr_pds_programs/pvr_draw_indirect_elements_base_instance_drawid3.h
new file mode 100644 (file)
index 0000000..73ae0ee
--- /dev/null
@@ -0,0 +1,152 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID3_H
+#define PVR_DRAW_INDIRECTELEMENTS_BASE_INSTANCE_DRAWID3_H
+
+/* Initially generated from ARB_draw_indirect_elements.pds */
+
+static const uint32_t
+   pvr_draw_indirect_elements_base_instance_drawid3_code[23] = {
+      0xd0000000, /* LD              const[0].64: dst(?) <= mem(?) */
+      0xd1000000, /* WDF              */
+      0x9000a1a0, /* ADD32           ptemp[0].32 = const[2].32 + temp[6].32  */
+      0x9000a1e1, /* ADD32           ptemp[1].32 = const[2].32 + temp[7].32  */
+      0x9030c0e3, /* ADD32           ptemp[3].32 = ptemp[3].32 + const[3].32  */
+      0x04282030, /* MAD             temp[0].64 = (temp[5].32 * const[4].32) +
+                                                  const[6].64 */
+      0x53082808, /* SFTLP32         temp[8].32 = (temp[1].32 | const[5].32) <<
+                   * 0
+                   */
+      0x50040009, /* SFTLP32         temp[9].32 = temp[0].32 << 0 */
+      0x04204050, /* MAD             temp[0].64 = (temp[4].32 * const[8].32) +
+                                                  const[10].64 */
+      0x50040004, /* SFTLP32         temp[4].32 = temp[0].32 << 0 */
+      0x501c180a, /* SFTLP32         temp[10].32 = temp[3].32 << 0 */
+      0x912100cb, /* ADD32           temp[11].32 = temp[4].32 - const[3].32  */
+      0x5034300c, /* SFTLP32         temp[12].32 = temp[6].32 << 0 */
+      0xc8000001, /* BRA             if keep 1 ( setc = p0 ) */
+      0xd1840000, /* LIMM            temp[1].32 = 0000 */
+      0x50242000, /* SFTLP32         temp[0].32 = temp[4].32 << 0 */
+      0xb1800000, /* CMP             P0 = (temp[0].64 = 0000) */
+      0xd9a80000, /* LIMM            ? temp[10].32 = 0000 */
+      0xd9ac0000, /* LIMM            ? temp[11].32 = 0000 */
+      0xd0800006, /* ST              const[12].64: mem(?) <= src(?) */
+      0xd0000007, /* LD              const[14].64: dst(?) <= mem(?) */
+      0xd1000000, /* WDF              */
+      0xf4024003, /* DOUT            doutv = temp[0].64, const[2].32; HALT */
+   };
+
+static const struct pvr_psc_program_output
+   pvr_draw_indirect_elements_base_instance_drawid3_program = {
+      pvr_draw_indirect_elements_base_instance_drawid3_code, /* code segment
+                                                              */
+      0, /* constant mappings, zeroed since we use the macros below */
+      7, /* number of constant mappings */
+
+      16, /* size of data segment, in dwords, aligned to 4 */
+      24, /* size of code segment, in dwords, aligned to 4 */
+      20, /* size of temp segment, in dwords, aligned to 4 */
+      16, /* size of data segment, in dwords */
+      23, /* size of code segment, in dwords */
+      20, /* size of temp segment, in dwords */
+      NULL /* function pointer to write data segment */
+   };
+
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_di_data(buffer, \
+                                                                       addr,   \
+                                                                       device) \
+   do {                                                                        \
+      uint64_t data = ((addr) | (0x80000000000ULL) |                           \
+                       ENABLE_SLC_MCU_CACHE_CONTROLS(device));                 \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);                   \
+      memcpy(buffer + 0, &data, sizeof(data));                                 \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_write_vdm( \
+   buffer,                                                                \
+   addr)                                                                  \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x2050000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 12, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_flush_vdm( \
+   buffer,                                                                \
+   addr)                                                                  \
+   do {                                                                   \
+      uint64_t data = ((addr) | (0x3960000000000ULL));                    \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 14, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_idx_stride( \
+   buffer,                                                                 \
+   value)                                                                  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 4, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_idx_base( \
+   buffer,                                                               \
+   value)                                                                \
+   do {                                                                  \
+      uint64_t data = value;                                             \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);             \
+      memcpy(buffer + 6, &data, sizeof(data));                           \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_idx_header( \
+   buffer,                                                                 \
+   value)                                                                  \
+   do {                                                                    \
+      uint32_t data = value;                                               \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);               \
+      memcpy(buffer + 5, &data, sizeof(data));                             \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_num_views( \
+   buffer,                                                                \
+   value)                                                                 \
+   do {                                                                   \
+      uint32_t data = value;                                              \
+      PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);              \
+      memcpy(buffer + 8, &data, sizeof(data));                            \
+   } while (0)
+#define pvr_write_draw_indirect_elements_base_instance_drawid3_immediates( \
+   buffer)                                                                 \
+   do {                                                                    \
+      {                                                                    \
+         uint32_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 2, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint32_t data = 0x1;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 3, &data, sizeof(data));                          \
+      }                                                                    \
+      {                                                                    \
+         uint64_t data = 0x0;                                              \
+         PVR_PDS_PRINT_DATA("DRAW_INDIRECT_ELEMENTS", data, 0);            \
+         memcpy(buffer + 10, &data, sizeof(data));                         \
+      }                                                                    \
+   } while (0)
+#endif
diff --git a/src/imagination/vulkan/pds/pvr_rogue_pds_defs.h b/src/imagination/vulkan/pds/pvr_rogue_pds_defs.h
new file mode 100644 (file)
index 0000000..249b688
--- /dev/null
@@ -0,0 +1,1661 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_ROGUE_PDS_DEFS_H
+#define PVR_ROGUE_PDS_DEFS_H
+
+#include <stdint.h>
+
+/* Instruction type C */
+#define PVR_ROGUE_PDSINST_OPCODEC_MASK (0x0000000FU)
+/* 64 bit add*/
+#define PVR_ROGUE_PDSINST_OPCODEC_ADD64 UINT32_C(0x00000008)
+/* 32 bit add*/
+#define PVR_ROGUE_PDSINST_OPCODEC_ADD32 UINT32_C(0x00000009)
+/* Shift and/or Logic Operation (64 bit)*/
+#define PVR_ROGUE_PDSINST_OPCODEC_SFTLP64 UINT32_C(0x0000000a)
+/* Compare and set predicate*/
+#define PVR_ROGUE_PDSINST_OPCODEC_CMP UINT32_C(0x0000000b)
+/* Branch and/or select predicate*/
+#define PVR_ROGUE_PDSINST_OPCODEC_BRA UINT32_C(0x0000000c)
+/* Umbrella OpcodeSP instructions*/
+#define PVR_ROGUE_PDSINST_OPCODEC_SP UINT32_C(0x0000000d)
+/* Multiply Accumulate with DOUD*/
+#define PVR_ROGUE_PDSINST_OPCODEC_DDMAD UINT32_C(0x0000000e)
+/* DOUT Command*/
+#define PVR_ROGUE_PDSINST_OPCODEC_DOUT UINT32_C(0x0000000f)
+
+/* Logical Operation */
+#define PVR_ROGUE_PDSINST_LOP_MASK (0x00000007U)
+#define PVR_ROGUE_PDSINST_LOP_NONE (0x00000000U)
+#define PVR_ROGUE_PDSINST_LOP_NOT (0x00000001U)
+#define PVR_ROGUE_PDSINST_LOP_AND (0x00000002U)
+#define PVR_ROGUE_PDSINST_LOP_OR (0x00000003U)
+#define PVR_ROGUE_PDSINST_LOP_XOR (0x00000004U)
+#define PVR_ROGUE_PDSINST_LOP_XNOR (0x00000005U)
+#define PVR_ROGUE_PDSINST_LOP_NAND (0x00000006U)
+#define PVR_ROGUE_PDSINST_LOP_NOR (0x00000007U)
+
+/* 64-bit Source Temps and Persistent Temps. */
+#define PVR_ROGUE_PDSINST_REGS64TP_MASK (0x0000001FU)
+#define PVR_ROGUE_PDSINST_REGS64TP_TEMP64 (0U)
+#define PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER (0U)
+#define PVR_ROGUE_PDSINST_REGS64TP_TEMP64_UPPER (15U)
+#define PVR_ROGUE_PDSINST_REGS64TP_PTEMP64 (1U)
+#define PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER (16U)
+#define PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_UPPER (31U)
+
+/* 32-bit Registers - 32-bit aligned. */
+#define PVR_ROGUE_PDSINST_REGS32_MASK (0x000000FFU)
+#define PVR_ROGUE_PDSINST_REGS32_CONST32 (0U)
+#define PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER (0U)
+#define PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER (127U)
+#define PVR_ROGUE_PDSINST_REGS32_TEMP32 (1U)
+#define PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER (128U)
+#define PVR_ROGUE_PDSINST_REGS32_TEMP32_UPPER (159U)
+#define PVR_ROGUE_PDSINST_REGS32_PTEMP32 (2U)
+#define PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER (192U)
+#define PVR_ROGUE_PDSINST_REGS32_PTEMP32_UPPER (223U)
+
+/* cc ? if im then
+ * cc ?     dst = (*src0 lop *src1) << src2
+ * cc ? else
+ * cc ?     dst = (*src0 lop *src1) << *src2
+ *
+ * Take the logical operation of the 2 sources, and shift to a 64 bit result.
+ * For unary operator NOT, *src0 is taken as the logical operand; for operator
+ * NONE, an unmodified *src0 is shifted. If IM is set use SFT as a direct shift
+ * value, otherwise use an address to obtain the shift value. The shift value
+ * (SRC2) is treated as a 2's complement encoded signed value. A negative value
+ * encodes a right shift. Values are clamped to the range [-63,63].
+ */
+#define PVR_ROGUE_PDSINST_SFTLP64_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_SFTLP64_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_SFTLP64_OPCODE_DEFAULT (0xA0000000U) /* SFTLP64 */
+#define PVR_ROGUE_PDSINST_SFTLP64_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_SFTLP64_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_SFTLP64_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_SFTLP64_LOP_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_SFTLP64_IM_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_SFTLP64_IM_ENABLE (0x00800000U)
+#define PVR_ROGUE_PDSINST_SFTLP64_SRC0_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_SFTLP64_SRC1_SHIFT (13U)
+#define PVR_ROGUE_PDSINST_SFTLP64_SRC2_SHIFT (5U)
+#define PVR_ROGUE_PDSINST_SFTLP64_DST_SHIFT (0U)
+
+/* Instruction type B */
+#define PVR_ROGUE_PDSINST_OPCODEB_MASK (0x00000007U)
+/* Shift and/or Logic Operation (32 bit) */
+#define PVR_ROGUE_PDSINST_OPCODEB_SFTLP32 UINT32_C(0x00000002)
+/* Vertex Stream Out DMA Command */
+#define PVR_ROGUE_PDSINST_OPCODEB_STM UINT32_C(0x00000003)
+
+/* 32-bit Source Temps. */
+#define PVR_ROGUE_PDSINST_REGS32T_MASK (0x0000001FU)
+#define PVR_ROGUE_PDSINST_REGS32T_TEMP32 (0U)
+#define PVR_ROGUE_PDSINST_REGS32T_TEMP32_LOWER (0U)
+#define PVR_ROGUE_PDSINST_REGS32T_TEMP32_UPPER (31U)
+
+/* 32-bit Source Temps and Persistent Temps. */
+#define PVR_ROGUE_PDSINST_REGS32TP_MASK (0x0000003FU)
+#define PVR_ROGUE_PDSINST_REGS32TP_TEMP32 (0U)
+#define PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER (0U)
+#define PVR_ROGUE_PDSINST_REGS32TP_TEMP32_UPPER (31U)
+#define PVR_ROGUE_PDSINST_REGS32TP_PTEMP32 (1U)
+#define PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER (32U)
+#define PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_UPPER (63U)
+
+/* cc ? if im then
+ * cc ?     dst = (*src0 lop *src1) << src2
+ * cc ? else
+ * cc ?     dst = (*src0 lop *src1) << *src2
+ *
+ * Take the logical operation of the 2 sources, and shift to a 32 bit result.
+ * For unary operator NOT, *src0 is taken as the logical operand; for operator
+ * NONE, an unmodified *src0 is shifted.If IM is set, use the shift value SFT
+ * (SRC2) as a direct shift value, otherwise use an address to obtain the shift
+ * value. SFT (SRC2) is treated as a 2's complement encoded signed value. A
+ * negative value encodes a right shift. Values are clamped to the range
+ * [-31,31].
+ */
+#define PVR_ROGUE_PDSINST_SFTLP32_OPCODE_SHIFT (29U)
+#define PVR_ROGUE_PDSINST_SFTLP32_OPCODE_CLRMSK (0x1FFFFFFFU)
+#define PVR_ROGUE_PDSINST_SFTLP32_OPCODE_DEFAULT (0x40000000U) /* SFTLP32 */
+#define PVR_ROGUE_PDSINST_SFTLP32_IM_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_SFTLP32_IM_ENABLE (0x10000000U)
+#define PVR_ROGUE_PDSINST_SFTLP32_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_SFTLP32_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_SFTLP32_LOP_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_SFTLP32_SRC0_SHIFT (19U)
+#define PVR_ROGUE_PDSINST_SFTLP32_SRC1_SHIFT (11U)
+#define PVR_ROGUE_PDSINST_SFTLP32_SRC2_SHIFT (5U)
+#define PVR_ROGUE_PDSINST_SFTLP32_DST_SHIFT (0U)
+
+/* The stream being processed within the vertex, selects 1 of 4 streams. */
+#define PVR_ROGUE_PDSINST_SO_MASK (0x00000003U)
+
+/* An instruction to enable the 'Streaming Out' of data to memory.
+ *
+ * This instruction can only be used when called from a Stream Output Program
+ * (see
+ *
+ * Stream output configuration words, as it reads its source data from unified
+ * vertex store within the TA.
+ *
+ * Stream Out programs use the vertex data master, but are called from the TA.
+ * They do not execute on the USC. If synchronization is required with the
+ * control stream to the next draw call, a DOUTV command must be used when
+ * stream out finishes for the current draw call. The VDM must have a
+ * corresponding entry in the control stream indicating when it should wait for
+ * the PDS.
+ *
+ * As SRC0, SRC1 needs to be held from program to program it is assumed these
+ * are in persistent temps. There are 32 (dword) persistent temps, 8 of which
+ * are required to support 4 streams. The driver needs to manage the allocation
+ * of these. If the value needs to be carried from one geometry job to another,
+ * it will need to be loaded from memory at the start of the geometry job, and
+ * stored at the end of it (using a state program in the input control stream).
+ *
+ * When a new buffer is altered which was in use, the driver will need to fence
+ * in order to make sure that the preceding operation have completed before the
+ * persistent temps are updated.
+ *
+ * It is assumed that the USC compiler will optimize the stream order to keep
+ * data which is contiguous in the output vertex (going to memory)
+ * together. This will enable multiple words to be streamed out in a single
+ * DMA. This will reduce the processing load on the TA.
+ *
+ * The sources are read from within the constant, temporary stores of the PDS,
+ * and have the following meaning.
+ *
+ * If the buffer is being appended to then persistent constants need to be
+ * stored to memory at the end of the geometry job, and reloaded at the start
+ * of the next job (as another context may be run).
+ *
+ * ccs ? if (so_address + (so_vosize * so_primtype)) <= so_limit then
+ *
+ * dma the data from the vbg, and write it into memory. so_vioff is
+ * an offset into the current vertex.
+ * ccs ?      for (so_vertex=0 ; so_vertex < so_primtype; so_vertex++)
+ * ccs ?         for (i=0 ; i < so_dmasize; i++)
+ * ccs ?            *(so_address + so_vooff + i + (so_vertex * so_vosize)) =
+ *                                     readvertexvbg(so_vioff + i + (so_vertex * stream_size))
+ *
+ * ccs ?     if so_eop then
+ * ccs ?         so_address = so_address + (so_vosize * so_primtype)
+ * ccs ?         so_primwritten = so_primwritten + 1
+ * ccs ?
+ * end if
+ *
+ * ccs ? else
+ *
+ * ccs ?     setp(so_overflow_predicate[so])
+ * ccs ?     [so_overflow_predicate[global]]
+ *
+ * ccs ? end if
+ *
+ * if so_eop then
+ *     so_primneeded = so_primneeded + 1
+ * end if
+ *
+ * The VBG presents a stream when outputted from the shader. A bit is set in the
+ * input register indicating which stream is present. The PDS is called on a per
+ * primitive basis. In simple geometry this is per input triangle, strip etc.,
+ * in geometry shader land this is per output primitive from the geometry
+ * shader. Primitives are unraveled to remove vertex sharing. The PDS is called
+ * in submission order. The PDS program needs to be written for the primitive
+ * which is being emitted.
+ *
+ * Example
+ *
+ * Data is actually going into three buffers (this is defined elsewhere).
+ * SO_VERTEX0.Pos.XY -> buffer0
+ * SO_VERTEX0.Mult.XY -> buffer0
+ * SO_VERTEX1.Add.XY -> buffer1
+ *
+ * SO_VERTEX0.Pos.ZW -> buffer2
+ *
+ * Persistent temps:
+ * pt0 = Buffer0 start address;
+ * pt1 = Buffer1 start address;
+ * pt2 = Buffer2 start address;
+ * pt3 = 0 (buffer0 primwritten/needed)
+ * pt4 = 0 (buffer1 primwritten/needed)
+ * pt5 = 0 (buffer2 primwritten/needed)
+ *
+ * Constants:
+ * c0 = Buffer 0 top
+ * c1 = Buffer 1 top
+ * c2 = Buffer 2 top
+ * c3 = SRC2,3 for Pos.XY: VOOFF = 0, DMASIZE = 2, SO_VIOFF = 0, EOP = 0
+ * c4 = SRC2,3 for Mult: VOSIZE = 4, VOOFF = 2, DMASIZE = 2, SO_VIOFF = 2, EOP =
+ * 1 c5 = SRC2,3 for Pos.ZW: VOSIZE=2, VOOFF = 0, DMASIZE = 2, SO_VIOFF = 0, EOP
+ * = 1 c6 = SRC2,3 for Add: VOSIZE=2, VOOFF = 0, DMASIZE = 2, SO_VIOFF = 0, EOP
+ * = 1
+ *
+ * ifstream0 {
+ *
+ *  # Write Pos.XY
+ *  STM SO=0, SRC3=c0, SRC2=c3, SRC1=pt3, SRC0=pt0
+ *  STM SO=0, SRC3=c0, SRC2=c4, SRC1=pt3, SRC0=pt0
+ *  #Write Pos.ZW to buffer 1 and advance
+ *  STM SO=0, SRC3=c2, SRC2=c5, SRC1=pt5, SRC0=pt2
+ *
+ * }
+ *
+ * else if stream1 {
+ *
+ *  #Write Add to buffer 1 and advance
+ *  STM S0=1, SRC3=c1, SRC2=c6, SRC1=pt4, SRC0=pt1
+ *
+ * }
+ */
+#define PVR_ROGUE_PDSINST_STM_OPCODE_SHIFT (29U)
+#define PVR_ROGUE_PDSINST_STM_CCS_CCS_GLOBAL_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_STM_CCS_CCS_SO_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_STM_CCS_CCS_CC_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_STM_SO_TST_SHIFT (25U)
+#define PVR_ROGUE_PDSINST_STM_SO_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_STM_SO_SRC0_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_STM_SO_SRC1_SHIFT (13U)
+#define PVR_ROGUE_PDSINST_STM_SO_SRC2_SHIFT (5U)
+#define PVR_ROGUE_PDSINST_STM_SO_SRC3_SHIFT (0U)
+
+/* Multiple Accumulate */
+#define PVR_ROGUE_PDSINST_OPCODEA_MAD UINT32_C(0x00000000)
+
+/* ALU Mode */
+
+/* ALU will perform unsigned math.*/
+#define PVR_ROGUE_PDSINST_ALUM_UNSIGNED (0x00000000U)
+
+/* 64-bit Registers - 64-bit aligned */
+#define PVR_ROGUE_PDSINST_REGS64_MASK (0x0000007FU)
+#define PVR_ROGUE_PDSINST_REGS64_CONST64 (0U)
+#define PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER (0U)
+#define PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER (63U)
+#define PVR_ROGUE_PDSINST_REGS64_TEMP64 (1U)
+#define PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER (64U)
+#define PVR_ROGUE_PDSINST_REGS64_TEMP64_UPPER (79U)
+#define PVR_ROGUE_PDSINST_REGS64_PTEMP64 (2U)
+#define PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER (96U)
+#define PVR_ROGUE_PDSINST_REGS64_PTEMP64_UPPER (111U)
+
+/* 64-bit Temps 0-15 Destination */
+#define PVR_ROGUE_PDSINST_REGS64T_MASK (0x0000000FU)
+#define PVR_ROGUE_PDSINST_REGS64T_TEMP64 (0U)
+#define PVR_ROGUE_PDSINST_REGS64T_TEMP64_LOWER (0U)
+#define PVR_ROGUE_PDSINST_REGS64T_TEMP64_UPPER (15U)
+
+/* cc ? dst = (src0 * src1) + (src2 * -1sna) + cin
+ *
+ * Multiply 2 source 32 bit numbers to generate a 64 bit result, then add or
+ * subtract a third source. Conditionally takes in a carry in. Always generates
+ * a carry out which is held in the status register.
+ */
+#define PVR_ROGUE_PDSINST_MAD_OPCODE_SHIFT (30U)
+#define PVR_ROGUE_PDSINST_MAD_SNA_SHIFT (29U)
+#define PVR_ROGUE_PDSINST_MAD_SNA_ADD (0x00000000U)
+#define PVR_ROGUE_PDSINST_MAD_SNA_SUB (0x20000000U)
+#define PVR_ROGUE_PDSINST_MAD_ALUM_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_MAD_ALUM_SIGNED (0x10000000U)
+#define PVR_ROGUE_PDSINST_MAD_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_MAD_CC_ENABLE (0x08000000U)
+/* 32-bit source to multiply - 32-bit range. */
+#define PVR_ROGUE_PDSINST_MAD_SRC0_SHIFT (19U)
+/* 32-bit source to multiply - 32-bit range */
+#define PVR_ROGUE_PDSINST_MAD_SRC1_SHIFT (11U)
+/* 64-bit source to add - 64-bit range */
+#define PVR_ROGUE_PDSINST_MAD_SRC2_SHIFT (4U)
+#define PVR_ROGUE_PDSINST_MAD_DST_SHIFT (0U)
+
+/* cc ? dst = src0 + (src1 * -1sna) + cin
+ *
+ * Add or subtract 2 64 bit numbers. Conditionally takes in a carry in. Always
+ * generates a carry out which is held in the status register.
+ */
+#define PVR_ROGUE_PDSINST_ADD64_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_ADD64_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_ADD64_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_ADD64_ALUM_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_ADD64_ALUM_SIGNED (0x04000000U)
+#define PVR_ROGUE_PDSINST_ADD64_SNA_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_ADD64_SNA_SUB (0x01000000U)
+
+/* 64-bit source to add. */
+#define PVR_ROGUE_PDSINST_ADD64_SRC0_SHIFT (12U)
+
+/* 64-bit source to add */
+#define PVR_ROGUE_PDSINST_ADD64_SRC1_SHIFT (5U)
+
+/* 64-bit temp or persistent temp */
+#define PVR_ROGUE_PDSINST_ADD64_DST_SHIFT (0U)
+/* cc ? dst = src0 + (src1 * -1sna) + cin
+ *
+ * Add or subtract 2 32 bit numbers. Conditionally takes in a carry in. Always
+ * generates a carry out which is held in the status register.
+ */
+#define PVR_ROGUE_PDSINST_ADD32_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_ADD32_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_ADD32_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_ADD32_ALUM_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_ADD32_ALUM_SIGNED (0x04000000U)
+#define PVR_ROGUE_PDSINST_ADD32_SNA_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_ADD32_SNA_SUB (0x01000000U)
+/* 32-bit source to add */
+#define PVR_ROGUE_PDSINST_ADD32_SRC0_SHIFT (14U)
+#define PVR_ROGUE_PDSINST_ADD32_SRC0_CLRMSK (0xFFC03FFFU)
+/* 32-bit source to add */
+#define PVR_ROGUE_PDSINST_ADD32_SRC1_SHIFT (6U)
+#define PVR_ROGUE_PDSINST_ADD32_SRC1_CLRMSK (0xFFFFC03FU)
+/* 32-bit temp or persistent temp */
+#define PVR_ROGUE_PDSINST_ADD32_DST_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_ADD32_DST_CLRMSK (0xFFFFFFC0U)
+
+/* Comparison Operation */
+#define PVR_ROGUE_PDSINST_COP_MASK (0x00000003U)
+
+/* = */
+#define PVR_ROGUE_PDSINST_COP_EQ (0x00000000U)
+
+/* > */
+#define PVR_ROGUE_PDSINST_COP_GT (0x00000001U)
+
+/* < */
+#define PVR_ROGUE_PDSINST_COP_LT (0x00000002U)
+
+/* != */
+#define PVR_ROGUE_PDSINST_COP_NE (0x00000003U)
+
+/* Compare Instruction with 2 sources (IM=0)
+ *
+ * im = 0;
+ * cc ? dst = src0 op src1
+ *
+ * Test source 0 against source 1. The result is written to the destination
+ * predicate (P0). All arguments are treated as unsigned.
+ */
+#define PVR_ROGUE_PDSINST_CMP_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_CMP_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_CMP_OPCODE_DEFAULT (0xB0000000U) /* CMP */
+#define PVR_ROGUE_PDSINST_CMP_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_CMP_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_CMP_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_CMP_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_CMP_COP_SHIFT (25U)
+#define PVR_ROGUE_PDSINST_CMP_COP_CLRMSK (0xF9FFFFFFU)
+#define PVR_ROGUE_PDSINST_CMP_COP_EQ (0x00000000U)
+#define PVR_ROGUE_PDSINST_CMP_COP_GT (0x02000000U)
+#define PVR_ROGUE_PDSINST_CMP_COP_LT (0x04000000U)
+#define PVR_ROGUE_PDSINST_CMP_COP_NE (0x06000000U)
+#define PVR_ROGUE_PDSINST_CMP_SETCP_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_CMP_SETCP_CLRMSK (0xFEFFFFFFU)
+#define PVR_ROGUE_PDSINST_CMP_SETCP_EN (0x01000000U)
+#define PVR_ROGUE_PDSINST_CMP_IM_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_CMP_IM_CLRMSK (0xFF7FFFFFU)
+#define PVR_ROGUE_PDSINST_CMP_IM_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_CMP_IM_ENABLE (0x00800000U)
+#define PVR_ROGUE_PDSINST_CMP_SRC0_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_CMP_SRC0_CLRMSK (0xFF83FFFFU)
+#define PVR_ROGUE_PDSINST_CMP_SRC1_SHIFT (2U)
+#define PVR_ROGUE_PDSINST_CMP_SRC1_CLRMSK (0xFFFFFE03U)
+
+/* 16-bit signed immediate. */
+#define PVR_ROGUE_PDSINST_IMM16_MASK (0x0000FFFFU)
+
+/* Compare Instruction with Immediate (IM=1)
+ *
+ * im = 1;
+ * cc ? dst = src0 op imm16
+ *
+ * Test source 0 against an immediate. The result is written to the destination
+ * predicate (P0). All arguments are treated as unsigned.
+ */
+#define PVR_ROGUE_PDSINST_CMPI_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_CMPI_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_CMPI_OPCODE_DEFAULT (0xB0000000U) /* CMP */
+#define PVR_ROGUE_PDSINST_CMPI_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_CMPI_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_CMPI_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_CMPI_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_CMPI_COP_SHIFT (25U)
+#define PVR_ROGUE_PDSINST_CMPI_COP_CLRMSK (0xF9FFFFFFU)
+#define PVR_ROGUE_PDSINST_CMPI_COP_EQ (0x00000000U)
+#define PVR_ROGUE_PDSINST_CMPI_COP_GT (0x02000000U)
+#define PVR_ROGUE_PDSINST_CMPI_COP_LT (0x04000000U)
+#define PVR_ROGUE_PDSINST_CMPI_COP_NE (0x06000000U)
+#define PVR_ROGUE_PDSINST_CMPI_SETCP_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_CMPI_SETCP_CLRMSK (0xFEFFFFFFU)
+#define PVR_ROGUE_PDSINST_CMPI_SETCP_EN (0x01000000U)
+#define PVR_ROGUE_PDSINST_CMPI_IM_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_CMPI_IM_CLRMSK (0xFF7FFFFFU)
+#define PVR_ROGUE_PDSINST_CMPI_IM_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_CMPI_IM_ENABLE (0x00800000U)
+#define PVR_ROGUE_PDSINST_CMPI_SRC0_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_CMPI_SRC0_CLRMSK (0xFF83FFFFU)
+#define PVR_ROGUE_PDSINST_CMPI_IM16_SHIFT (2U)
+#define PVR_ROGUE_PDSINST_CMPI_IM16_CLRMSK (0xFFFC0003U)
+
+/* Condition codes */
+#define PVR_ROGUE_PDSINST_PREDICATE_MASK (0x0000000FU)
+
+/* Use programmable predicate 0 */
+#define PVR_ROGUE_PDSINST_PREDICATE_P0 (0x00000000U)
+/* Input Predicate 0 - When DM Pixel Start/End Program End of Tile, When DM
+ * Pixel State Program indicates load Uniforms, When DM Vertex Last Vertex In
+ * Task, When DM Compute indicates shared or kernel task (compute thread barrier
+ * mode) or Last In Task (normal mode), When DM Tessellator TBD.
+ */
+#define PVR_ROGUE_PDSINST_PREDICATE_IF0 (0x00000001U)
+/* Input Predicate 1 - When DM Pixel Start/End Program End Render, When DM Pixel
+ * State Program indicates load Texture, When DM vertex First In Task, When DM
+ * Compute indicates synchronization task (compute thread barrier mode) or First
+ * In Task (normal mode), When DM Tessellator TBD.
+ */
+#define PVR_ROGUE_PDSINST_PREDICATE_IF1 (0x00000002U)
+/* Stream 0 Out has overflowed. Note this is per stream not per buffer. */
+#define PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_0 (0x00000003U)
+/* Stream 1 Out has overflowed. Note this is per stream not per buffer. */
+#define PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_1 (0x00000004U)
+/* Stream 2 Out has overflowed. Note this is per stream not per buffer. */
+#define PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_2 (0x00000005U)
+/* Stream 3 Out has overflowed. Note this is per stream not per buffer. */
+#define PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_3 (0x00000006U)
+/* A Stream Out has overflowed. Note this is per stream not per buffer. */
+#define PVR_ROGUE_PDSINST_PREDICATE_SO_OVERFLOW_PREDICATE_GLOBAL (0x00000007U)
+/* For SETC Don't set a new predicate, KEEP the existing one. For BRA
+ * instruction where this is the source predicate, KEEP the instruction, don't
+ * predicate it out.
+ */
+#define PVR_ROGUE_PDSINST_PREDICATE_KEEP (0x00000008U)
+/* DMA Out of Bounds predicate - set by DDMAT instruction when DMA is out of
+ * bounds.
+ */
+#define PVR_ROGUE_PDSINST_PREDICATE_OOB (0x00000009U)
+
+/* Negate condition. */
+
+/* Do not negate condition. */
+#define PVR_ROGUE_PDSINST_NEG_DISABLE (0x00000000U)
+/* Negate condition. */
+#define PVR_ROGUE_PDSINST_NEG_ENABLE (0x00000001U)
+
+/* Branch Address. */
+#define PVR_ROGUE_PDSINST_BRAADDR_MASK (0x0007FFFFU)
+
+/* Branch and Set Selected Predicate Instruction
+ *
+ * im = 1;
+ * cc xor neg ? pc = dst;
+ *
+ * Conditionally branch to an address (ADDR), depending upon the predicate. The
+ * meaning of the predicate can be negated using NEG. This instruction also
+ * allows the current predicate referenced by other instructions to be set by
+ * the SETC field. The current predicate is available by all instructions. This
+ * is a signed offset from the current PC. BRA ADDR=0 would be an infinite loop
+ * of the instruction.
+ */
+
+#define PVR_ROGUE_PDSINST_BRA_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_BRA_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_BRA_OPCODE_DEFAULT (0xC0000000U) /* BRA */
+#define PVR_ROGUE_PDSINST_BRA_SRCC_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_CLRMSK (0xF0FFFFFFU)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_P0 (0x00000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_IF0 (0x01000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_IF1 (0x02000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_SO_OVERFLOW_PREDICATE_0 (0x03000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_SO_OVERFLOW_PREDICATE_1 (0x04000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_SO_OVERFLOW_PREDICATE_2 (0x05000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_SO_OVERFLOW_PREDICATE_3 (0x06000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_SO_OVERFLOW_PREDICATE_GLOBAL (0x07000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_KEEP (0x08000000U)
+#define PVR_ROGUE_PDSINST_BRA_SRCC_OOB (0x09000000U)
+#define PVR_ROGUE_PDSINST_BRA_NEG_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_BRA_NEG_CLRMSK (0xFF7FFFFFU)
+#define PVR_ROGUE_PDSINST_BRA_NEG_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_BRA_NEG_ENABLE (0x00800000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_SHIFT (19U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_CLRMSK (0xFF87FFFFU)
+#define PVR_ROGUE_PDSINST_BRA_SETC_P0 (0x00000000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_IF0 (0x00080000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_IF1 (0x00100000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_SO_OVERFLOW_PREDICATE_0 (0x00180000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_SO_OVERFLOW_PREDICATE_1 (0x00200000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_SO_OVERFLOW_PREDICATE_2 (0x00280000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_SO_OVERFLOW_PREDICATE_3 (0x00300000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_SO_OVERFLOW_PREDICATE_GLOBAL (0x00380000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_KEEP (0x00400000U)
+#define PVR_ROGUE_PDSINST_BRA_SETC_OOB (0x00480000U)
+#define PVR_ROGUE_PDSINST_BRA_ADDR_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_BRA_ADDR_CLRMSK (0xFFF80000U)
+
+/* SLC_MODE_LD   SLC Cache Policy for loads. */
+#define PVR_ROGUE_PDSINST_SLC_MODE_LD_MASK (0x00000003U)
+/* Bypass Policy */
+#define PVR_ROGUE_PDSINST_SLC_MODE_LD_BYPASS (0x00000000U)
+/* Standard Cached Read */
+#define PVR_ROGUE_PDSINST_SLC_MODE_LD_CACHED (0x00000001U)
+/* Cached Read no allocate */
+#define PVR_ROGUE_PDSINST_SLC_MODE_LD_CACHED_RD_NA (0x00000003U)
+
+/* CMODE_LD   MCU (SLC) Cache Mode for Loads. */
+#define PVR_ROGUE_PDSINST_CMODE_LD_MASK (0x00000003U)
+
+/* Normal cache operation. */
+#define PVR_ROGUE_PDSINST_CMODE_LD_CACHED (0x00000000U)
+
+/* Bypass L0 and L1. */
+#define PVR_ROGUE_PDSINST_CMODE_LD_BYPASS (0x00000001U)
+
+/* Force line fill of L0 and L1. */
+#define PVR_ROGUE_PDSINST_CMODE_LD_FORCE_LINE_FILL (0x00000002U)
+
+/* ld: Number of 64 bit words to load. */
+#define PVR_ROGUE_PDSINST_LD_COUNT8_MASK (0x00000007U)
+
+/* Source Base Address for memory fetch in DWORDS - MUST BE 128 BIT ALIGNED. */
+#define PVR_ROGUE_PDSINST_LD_SRCADD_MASK (UINT64_C(0x0000003FFFFFFFFF))
+
+/* Load Instruction DMA : Src0 */
+
+/* SLC cache policy. */
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_SHIFT (62U)
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CLRMSK \
+   (UINT64_C(0x3FFFFFFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_BYPASS \
+   (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED \
+   (UINT64_C(0x4000000000000000))
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SLCMODE_CACHED_RD_NA \
+   (UINT64_C(0xc000000000000000))
+
+/* The destination address in the temps (persistent or not) for the read data -
+ * MUST BE 128 BIT ALIGNED.
+ */
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_SHIFT (47U)
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_DEST_CLRMSK (UINT64_C(0xFFF07FFFFFFFFFFF))
+
+/* Cache Mode */
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_SHIFT (44U)
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CLRMSK (UINT64_C(0xFFFFCFFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_CACHED (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_BYPASS (UINT64_C(0x0000100000000000))
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_CMODE_FORCE_LINE_FILL \
+   (UINT64_C(0x0000200000000000))
+
+/* ld: Number of 64 bit words to load. */
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_SHIFT (41U)
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_COUNT8_CLRMSK \
+   (UINT64_C(0xFFFFF1FFFFFFFFFF))
+
+/* Source Base Address for memory fetch - MUST BE 128 BIT ALIGNED. */
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_SHIFT (2U)
+#define PVR_ROGUE_PDSINST_LD_LD_SRC0_SRCADD_CLRMSK \
+   (UINT64_C(0xFFFFFF0000000003))
+
+/* Special Instructions Op-code. */
+#define PVR_ROGUE_PDSINST_OPCODESP_MASK (0x0000000FU)
+
+/* Data Load from memory. */
+#define PVR_ROGUE_PDSINST_OPCODESP_LD UINT32_C(0x00000000)
+
+/* Data Store to memory. */
+#define PVR_ROGUE_PDSINST_OPCODESP_ST UINT32_C(0x00000001)
+
+/* Wait read or write data operations to complete. */
+#define PVR_ROGUE_PDSINST_OPCODESP_WDF UINT32_C(0x00000002)
+
+/* Load 16 bit immediate. */
+#define PVR_ROGUE_PDSINST_OPCODESP_LIMM UINT32_C(0x00000003)
+
+/* Lock the execute so only this instance can execute for this data master. */
+#define PVR_ROGUE_PDSINST_OPCODESP_LOCK UINT32_C(0x00000004)
+
+/* Release the lock taken by lock. */
+#define PVR_ROGUE_PDSINST_OPCODESP_RELEASE UINT32_C(0x00000005)
+
+/* Halt execution (program termination). */
+#define PVR_ROGUE_PDSINST_OPCODESP_HALT UINT32_C(0x00000006)
+
+/* Clear stream out predicate. */
+#define PVR_ROGUE_PDSINST_OPCODESP_STMC UINT32_C(0x00000007)
+
+/* Parallel Stream Out. */
+#define PVR_ROGUE_PDSINST_OPCODESP_STMP UINT32_C(0x00000008)
+
+/* Integer Divide. */
+#define PVR_ROGUE_PDSINST_OPCODESP_IDIV UINT32_C(0x00000009)
+
+/* Atomic Access. */
+#define PVR_ROGUE_PDSINST_OPCODESP_AA UINT32_C(0x0000000a)
+
+/* Issue Data Fence. */
+#define PVR_ROGUE_PDSINST_OPCODESP_IDF UINT32_C(0x0000000b)
+
+/* Issue Data Fence. */
+#define PVR_ROGUE_PDSINST_OPCODESP_POL (0x0000000cU)
+
+/*No Operation. */
+#define PVR_ROGUE_PDSINST_OPCODESP_NOP (0x0000000fU)
+
+/* Data Load Instruction (Opcode SP)
+ *
+ * for (i=0; i < count;i++) {
+ * cc ? *(src0 + i) = mem(src1 + i)
+ * }
+ *
+ * Load count 32 bit words from memory to the temporaries reading from the
+ * address in memory pointed to by SRCADD. If the final destination address
+ * (DEST + COUNT - 1) exceeds the amount of temps available the entire load is
+ * discarded.
+ */
+#define PVR_ROGUE_PDSINST_LD_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_LD_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_LD_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_LD_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_LD_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_LD_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_LD_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_LD_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_LD_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_LD_OP_DEFAULT (0x00000000U) /* ld */
+#define PVR_ROGUE_PDSINST_LD_SRC0_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_LD_SRC0_CLRMSK (0xFFFFFF80U)
+
+/* CMODE_ST   MCU (SLC) Cache Mode for stores. */
+#define PVR_ROGUE_PDSINST_CMODE_ST_MASK (0x00000003U)
+
+/* Write-through Policy */
+#define PVR_ROGUE_PDSINST_CMODE_ST_WRITE_THROUGH (0x00000000U)
+
+/* Write-back Policy. */
+#define PVR_ROGUE_PDSINST_CMODE_ST_WRITE_BACK (0x00000001U)
+
+/* Lazy write-back policy. */
+#define PVR_ROGUE_PDSINST_CMODE_ST_LAZY_WRITE_BACK (0x00000002U)
+
+/* ST: Number of 32 bit Words to store. */
+#define PVR_ROGUE_PDSINST_ST_COUNT4_MASK (0x0000000FU)
+
+/* Source Base Address for memory fetch in DWORDS. */
+#define PVR_ROGUE_PDSINST_ST_SRCADD_MASK (UINT64_C(0x0000003FFFFFFFFF))
+
+/* Store Instruction DMA : Src0 */
+
+/* SLC cache policy. */
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_SHIFT (62U)
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_CLRMSK \
+   (UINT64_C(0x3FFFFFFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_THROUGH \
+   (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SLCMODE_WRITE_BACK \
+   (UINT64_C(0x4000000000000000))
+
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_SHIFT (46U)
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_SRC_CLRMSK (UINT64_C(0xFFF03FFFFFFFFFFF))
+
+/* Cache Mode. */
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_SHIFT (44U)
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_CLRMSK (UINT64_C(0xFFFFCFFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_THROUGH \
+   (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_WRITE_BACK \
+   (UINT64_C(0x0000100000000000))
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_CMODE_LAZY_WRITE_BACK \
+   (UINT64_C(0x0000200000000000))
+
+/* ST: Number of 32 bit Words to store. */
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_SHIFT (40U)
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_COUNT4_CLRMSK \
+   (UINT64_C(0xFFFFF0FFFFFFFFFF))
+
+/* Destination Base Address for memory write. */
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_SHIFT (2U)
+#define PVR_ROGUE_PDSINST_ST_ST_SRC0_DSTADD_CLRMSK \
+   (UINT64_C(0xFFFFFF0000000003))
+
+/* Data Store Instruction (Opcode SP)
+ *
+ * for (i=0; i < count;i++) {
+ * cc ? mem(src1 + i) = *(src0 + i)
+ * }
+ *
+ * Store count 64 bit words from temporaries to memory (memory address starts at
+ * src1). If the instruction attempts to read data (in temps) outside of it's
+ * allocated region the entire store is discarded.
+ */
+#define PVR_ROGUE_PDSINST_ST_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_ST_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_ST_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_ST_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_ST_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_ST_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_ST_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_ST_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_ST_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_ST_OP_DEFAULT (0x00800000U) /* ST */
+#define PVR_ROGUE_PDSINST_ST_SRC0_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_ST_SRC0_CLRMSK (0xFFFFFF80U)
+
+/* Data Fence Instruction (Opcode SP)
+ *
+ * Cc ? wdf
+ *
+ * The data fence instruction gives the ability to track the return of dependent
+ * read data and to determine when data written from the core has made it to the
+ * MCU. This is required on reads as there is no implicit synchronization
+ * between read accesses to the primary attribute bank and data returned by
+ * dependent reads. For writes it is required where the program is enforcing
+ * synchronization with another program (which could be on the PDS or any other
+ * processor in the system). Note, this only guarantees order within the
+ * PDS. For order elsewhere reads need to be issued, and flush commands may have
+ * to be issued to the MCU
+ *
+ * The fence mechanism takes the form of a counter that is incremented whenever
+ * a read (ld) or write (ST) instruction is encountered by the instruction fetch
+ * decoder. When the read or write instruction returns, or writes all its data
+ * the counter is decremented. There is 1 counter per thread. Prior to accessing
+ * return data a WDF instruction must be issued, when this is seen by the
+ * instruction decoder it will check the current count value and will suspend
+ * execution if it is currently non zero, execution being resumed as soon as the
+ * counter reaches zero, and a slot is available.
+ *
+ * Example
+ *  Do a dependent read for data
+ *
+ * ldr0,#2,r3        Issue read
+ * ...               Try and do some other stuff
+ * wdf               Make sure read data has come back
+ * add32 r2,r1,r0    And use the returned result
+ *
+ */
+#define PVR_ROGUE_PDSINST_WDF_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_WDF_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_WDF_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_WDF_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_WDF_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_WDF_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_WDF_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_WDF_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_WDF_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_WDF_OP_DEFAULT (0x01000000U) /* WDF */
+
+/* PDS Global Register access control */
+
+/* Disable global register access */
+#define PVR_ROGUE_PDSINST_GR_DISABLE (0x00000000U)
+
+/* Enable global register access, global register specified by IMM16.*/
+#define PVR_ROGUE_PDSINST_GR_ENABLE (0x00000001U)
+
+/* Load Immediate (Opcode SP)
+ *
+ * cc ? GR = DISABLE : *src1 = src0
+ * cc ? GR = ENABLE  : *src1 = greg[IMM16]
+ *
+ * Load an immediate value (src0) into the temporary registers. If the GR flag
+ * is set, the PDS global register specified by IMM16 will be loaded instead.
+ * greg[0] = cluster number greg[1] = instance number
+ *
+ */
+#define PVR_ROGUE_PDSINST_LIMM_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_LIMM_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_LIMM_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_LIMM_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_LIMM_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_LIMM_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_LIMM_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_LIMM_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_LIMM_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_LIMM_OP_DEFAULT (0x01800000U) /* LIMM */
+#define PVR_ROGUE_PDSINST_LIMM_SRC1_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_LIMM_SRC1_CLRMSK (0xFF83FFFFU)
+#define PVR_ROGUE_PDSINST_LIMM_SRC0_SHIFT (2U)
+#define PVR_ROGUE_PDSINST_LIMM_SRC0_CLRMSK (0xFFFC0003U)
+#define PVR_ROGUE_PDSINST_LIMM_GR_SHIFT (1U)
+#define PVR_ROGUE_PDSINST_LIMM_GR_CLRMSK (0xFFFFFFFDU)
+#define PVR_ROGUE_PDSINST_LIMM_GR_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_LIMM_GR_ENABLE (0x00000002U)
+
+/* Lock Instruction (Opcode SP)
+ *
+ * cc ? lock
+ *
+ * The hardware contains an internal mutex per data master. When the lock
+ * instruction is issued, the thread will attempt to take control of the mutex
+ * (for the current data master). If it is already taken by another thread, then
+ * the thread is descheduled until it is available.
+ *
+ * The purpose of the lock (and release) instructions is to allow critical
+ * sections of code to execute serially to other code for the same data
+ * master. This is particularly useful when accessing the persistent (cross
+ * thread) temporaries. Note that there is no communication possible across data
+ * masters.
+ *
+ * It is illegal to place a DOUT instruction inside a LOCK, RELEASE section of
+ * code.
+ */
+#define PVR_ROGUE_PDSINST_LOCK_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_LOCK_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_LOCK_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_LOCK_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_LOCK_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_LOCK_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_LOCK_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_LOCK_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_LOCK_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_LOCK_OP_DEFAULT (0x02000000U) /* LOCK */
+
+/* Release Lock (Opcode SP)
+ *
+ * cc ? release
+ *
+ * The hardware contains an internal mutex per data master. If a thread has
+ * issued a lock instruction, then a release instruction must be issued to
+ * release the lock. See the corresponding lock instruction for more details
+ *
+ * It is illegal to place a DOUT instruction inside a LOCK, RELEASE section of
+ * code.
+ */
+#define PVR_ROGUE_PDSINST_RELEASE_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_RELEASE_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_RELEASE_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_RELEASE_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_RELEASE_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_RELEASE_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_RELEASE_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_RELEASE_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_RELEASE_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_RELEASE_OP_DEFAULT (0x02800000U) /* RELEASE */
+
+/* Special instruction - Halt
+ * Halt Execution (Opcode SP)
+ *
+ * cc ? halt
+ *
+ * The last instruction in a program must always be a halt instruction, or a
+ * DOUT/DDMAD instruction with the END flag set. This is required in order to
+ * indicate the end of the program.
+ */
+#define PVR_ROGUE_PDSINST_HALT_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_HALT_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_HALT_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_HALT_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_HALT_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_HALT_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_HALT_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_HALT_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_HALT_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_HALT_OP_DEFAULT (0x03000000U) /* HALT */
+
+/* Special instruction - Nop
+ * No Operation (Opcode SP)
+ *
+ * cc ? NOP
+ *
+ * This instruction does no operation, and introduces a wait cycle into the
+ * pipeline.
+ *
+ */
+#define PVR_ROGUE_PDSINST_NOP_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_NOP_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_NOP_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_NOP_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_NOP_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_NOP_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_NOP_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_NOP_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_NOP_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_NOP_OP_DEFAULT (0x07800000U) /* NOP */
+
+/* The SO bits to clear 0-3 streams 0-3, bit 4-global */
+#define PVR_ROGUE_PDSINST_SOMASK_MASK (0x0000001FU)
+
+/* Special instruction - Stream out predicate clear
+ *  (Opcode SP)
+ *
+ * cc ? NOP
+ *
+ * This instruction clears the stream out predicates to 0, according to the
+ * clear bits.
+ *
+ */
+#define PVR_ROGUE_PDSINST_STMC_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_STMC_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_STMC_OPCODE_DEFAULT (0xD0000000U) /* SP */
+#define PVR_ROGUE_PDSINST_STMC_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_STMC_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_STMC_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_STMC_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_STMC_OP_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_STMC_OP_CLRMSK (0xF87FFFFFU)
+#define PVR_ROGUE_PDSINST_STMC_OP_DEFAULT (0x03800000U) /* STMC */
+#define PVR_ROGUE_PDSINST_STMC_SOMASK_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_STMC_SOMASK_CLRMSK (0xFFFFFFE0U)
+
+/* A 1 TB address, with byte granularity. Address must be dword aligned when
+ * repeat is 0.
+ */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_ADDRESS_MASK \
+   (UINT64_C(0x000000FFFFFFFFFF))
+
+/* SLC cache policy */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_SLCMODE_SHIFT (62U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_SLCMODE_CLRMSK \
+   (UINT64_C(0x3FFFFFFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_SLCMODE_BYPASS \
+   (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_SLCMODE_CACHED \
+   (UINT64_C(0x4000000000000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_SLCMODE_CACHED_RD_NA \
+   (UINT64_C(0xc000000000000000))
+
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_ADDRESS_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRCADD_ADDRESS_CLRMSK \
+   (UINT64_C(0xFFFFFF0000000000))
+
+/* Size of external memory buffer in bytes (0 is 0 bytes) */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_MSIZE_MASK (0x7FFFFFFFU)
+
+/* When repeat is enabled the size of the DMA in bytes */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_WORDSIZE_MASK (0x00000003U)
+/* DMA of 1 byte */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_WORDSIZE_ONE (0x00000000U)
+/* DMA of 2 byte */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_WORDSIZE_TWO (0x00000001U)
+/* DMA of 3 byte */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_WORDSIZE_THREE (0x00000002U)
+/* DMA of 4 byte */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_WORDSIZE_FOUR (0x00000003U)
+
+/* DMA to unified store */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_DEST_UNIFIED_STORE (0x00000000U)
+
+/* DMA to common store */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_DEST_COMMON_STORE (0x00000001U)
+
+/* Primary instance data offset in 32 bit words (offset into the current
+ * instance).
+ */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_AO_MASK (0x00001FFFU)
+
+/* Only applies to unified store DMAs, must be clear for common store.
+ *
+ * DMA is issued natively, in its entirety.
+ */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_REPEAT_NOREPEAT (0x00000000U)
+/* BSIZE is the number of times the DMA is repeated. Word size is the size of
+ * the DMA. The DMA is expanded into BSIZE DMAs.
+ */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_REPEAT_REPEAT (0x00000001U)
+
+/* Size of fetch in dwords (0 is 0 dwords). */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_MASK (0x00000FFFU)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_RANGE (0U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_LOWER (0U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_BSIZE_UPPER (255U)
+
+/* Size of external buffer in bytes. */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_SHIFT (33U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_MSIZE_CLRMSK \
+   (UINT64_C(0x00000001FFFFFFFF))
+
+/* Perform OOB checking. */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_SHIFT (32U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_CLRMSK \
+   (UINT64_C(0xFFFFFFFEFFFFFFFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_TEST_EN \
+   (UINT64_C(0x0000000100000000))
+
+/* Last DMA in program. */
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_SHIFT (31U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_CLRMSK \
+   (UINT64_C(0xFFFFFFFF7FFFFFFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN \
+   (UINT64_C(0x0000000080000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_SHIFT (29U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_CLRMSK \
+   (UINT64_C(0xFFFFFFFF9FFFFFFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_ONE \
+   (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_TWO \
+   (UINT64_C(0x0000000020000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_THREE \
+   (UINT64_C(0x0000000040000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_WORDSIZE_FOUR \
+   (UINT64_C(0x0000000060000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_CLRMSK \
+   (UINT64_C(0xFFFFFFFFEFFFFFFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE \
+   (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_COMMON_STORE \
+   (UINT64_C(0x0000000010000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CLRMSK \
+   (UINT64_C(0xFFFFFFFFF3FFFFFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED \
+   (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_BYPASS \
+   (UINT64_C(0x0000000004000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_FORCE_LINE_FILL \
+   (UINT64_C(0x0000000008000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT (13U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_CLRMSK \
+   (UINT64_C(0xFFFFFFFFFC001FFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_SHIFT (12U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_CLRMSK \
+   (UINT64_C(0xFFFFFFFFFFFFEFFF))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_NOREPEAT \
+   (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_REPEAT_REPEAT \
+   (UINT64_C(0x0000000000001000))
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_CLRMSK \
+   (UINT64_C(0xFFFFFFFFFFFFF000))
+
+/* Stop execution flag
+ *
+ * Continue execution after this instruction.
+ */
+#define PVR_ROGUE_PDSINST_END_DISABLE (0x00000000U)
+
+/* Halt execution after this instruction. */
+#define PVR_ROGUE_PDSINST_END_ENABLE (0x00000001U)
+
+/* 64-bit Consts 0-63 Destination. */
+#define PVR_ROGUE_PDSINST_REGS64C_MASK (0x0000003FU)
+#define PVR_ROGUE_PDSINST_REGS64C_CONST64 (0U)
+#define PVR_ROGUE_PDSINST_REGS64C_CONST64_LOWER (0U)
+#define PVR_ROGUE_PDSINST_REGS64C_CONST64_UPPER (63U)
+
+/* Multiply-add then send to DOUTD (Opcode SP). Optionally perform out-of-bounds
+ * checking (DDMAD(T)). Multiply-add then send to DOUTD (Opcode SP).
+ *
+ * cc ?  if ( test == 1 ) then
+ * cc ?  if ( ((src0 * src1) + src2)[39:0] + (src3[11:0]<<2) <= src2[39:0] +
+ * src3[63:33] ) then cc ?  OOB = 0 cc ?  doutd = (src0 * src1) + src2, src3 cc
+ * ? else cc ?  OOB = 1 cc ?  endif cc ?  else cc ?  doutd = (src0 * src1) +
+ * src2 src3 cc ?  endif
+ *
+ * cc ?  doutd = (src0 * src1) + src2, src3
+ *
+ * This instruction performs a 32 bit multiply, followed by a 64 bit add. This
+ * result is combined with a 4th source and used to create the data for an DOUTD
+ * emit. A DOUTD is a command to a DMA engine, which reads data from memory and
+ * writes it into the USC Unified or Common Store.
+ *
+ * Additionally the DDMAD performs an out-of-bounds check on the DMA when the
+ * test flag is set . If a buffer overflow is predicated, the DMA is skipped and
+ * the OOB (DMA out of bounds) predicate is set.
+ */
+#define PVR_ROGUE_PDSINST_DDMAD_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_DDMAD_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_DDMAD_OPCODE_DEFAULT (0xE0000000U) /* DDMAD */
+#define PVR_ROGUE_PDSINST_DDMAD_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_DDMAD_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_DDMAD_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_DDMAD_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_DDMAD_END_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_DDMAD_END_CLRMSK (0xFBFFFFFFU)
+#define PVR_ROGUE_PDSINST_DDMAD_END_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_DDMAD_END_ENABLE (0x04000000U)
+
+/* 32-bit source to multiply - 32-bit range. */
+#define PVR_ROGUE_PDSINST_DDMAD_SRC0_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_DDMAD_SRC0_CLRMSK (0xFC03FFFFU)
+
+/* 32-bit source to multiply - 32-bit range. */
+#define PVR_ROGUE_PDSINST_DDMAD_SRC1_SHIFT (13U)
+#define PVR_ROGUE_PDSINST_DDMAD_SRC1_CLRMSK (0xFFFC1FFFU)
+
+/* 64-bit source to add - 64-bit range */
+#define PVR_ROGUE_PDSINST_DDMAD_SRC2_SHIFT (6U)
+#define PVR_ROGUE_PDSINST_DDMAD_SRC2_CLRMSK (0xFFFFE03FU)
+
+/* 64-bit constant register destination */
+#define PVR_ROGUE_PDSINST_DDMAD_SRC3_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DDMAD_SRC3_CLRMSK (0xFFFFFFC0U)
+
+/* When DOUTU_SAMPLE_RATE is INSTANCE or SELECTIVE - 32 bit temps per instance
+ * at 4 word granularity. When DOUTU_SAMPLE_RATE is FULL - 32 bit temps per
+ * sample at 4 word granularity.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_MASK (0x0000003FU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_ALIGNSHIFT (2U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_TEMPS_ALIGNSIZE (4U)
+
+/* Sample rate */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SAMPLE_RATE_MASK (0x00000003U)
+
+/* Instance rate */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SAMPLE_RATE_INSTANCE (0x00000000U)
+
+/* Selective sample rate */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SAMPLE_RATE_SELECTIVE (0x00000001U)
+
+/* Full sample rate */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SAMPLE_RATE_FULL (0x00000002U)
+
+/* Code base address (4 byte alignment). */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_EXE_OFF_MASK (0x3FFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_EXE_OFF_ALIGNSHIFT (2U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_EXE_OFF_ALIGNSIZE (4U)
+
+/* Use Interface doutu : Src0 */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_SHIFT (41U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_CLRMSK \
+   (UINT64_C(0xFFFFFDFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_DUAL_PHASE_EN \
+   (UINT64_C(0x0000020000000000))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_SHIFT (35U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_CLRMSK \
+   (UINT64_C(0xFFFFFE07FFFFFFFF))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSHIFT (2U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_TEMPS_ALIGNSIZE (4U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SHIFT (33U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_CLRMSK \
+   (UINT64_C(0xFFFFFFF9FFFFFFFF))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_INSTANCE \
+   (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_SELECTIVE \
+   (UINT64_C(0x0000000200000000))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_SAMPLE_RATE_FULL \
+   (UINT64_C(0x0000000400000000))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_SHIFT (2U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_CLRMSK \
+   (UINT64_C(0xFFFFFFFF00000003))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_ALIGNSHIFT (2U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTU_SRC0_EXE_OFF_ALIGNSIZE (4U)
+
+/* Use Interface doutu : Src1. */
+
+/* Secondary instance data offset in 32 bit words (offset of the instance). */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_DOFFSET_MASK (0x00001FFFU)
+
+/* Source Base Address for memory fetch. Address must be dword aligned when
+ * repeat is 0.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SBASE_MASK \
+   (UINT64_C(0x000000FFFFFFFFFF))
+
+/* DMA Interface DOutD : Src0 */
+
+/* SLC cache policy */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_SHIFT (62U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CLRMSK \
+   (UINT64_C(0x3FFFFFFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_BYPASS \
+   (UINT64_C(0x0000000000000000))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED \
+   (UINT64_C(0x4000000000000000))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SLCMODE_CACHED_RD_NA \
+   (UINT64_C(0xc000000000000000))
+
+/* Secondary instance data offset in 32 bit words (offset of the instance). */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_DOFFSET_SHIFT (40U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_DOFFSET_CLRMSK \
+   (UINT64_C(0xFFE000FFFFFFFFFF))
+
+/* Source Base Address for memory fetch. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SBASE_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC0_SBASE_CLRMSK \
+   (UINT64_C(0xFFFFFF0000000000))
+
+/* When repeat is enabled the size of the DMA in bytes. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_WORDSIZE_MASK (0x00000003U)
+
+/* DMA of 1 byte */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_WORDSIZE_ONE (0x00000000U)
+
+/* DMA of 2 byte */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_WORDSIZE_TWO (0x00000001U)
+
+/* DMA of 3 byte */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_WORDSIZE_THREE (0x00000002U)
+
+/* DMA of 4 byte */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_WORDSIZE_FOUR (0x00000003U)
+
+/* Unified Store */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_DEST_UNIFIED_STORE (0x00000000U)
+
+/* Common Store */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_DEST_COMMON_STORE (0x00000001U)
+
+/* Primary instance data offset in 32 bit words (offset into the current
+ * instance).
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_AO_MASK (0x00001FFFU)
+
+/* Only applies to unified store DMAs, ignore for common store. */
+
+/* DMA is issued natively, in its entirety. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_REPEAT_NOREPEAT (0x00000000U)
+
+/* BSIZE is the number of times the DMA is repeated. Word size is the size of
+ * the DMA. The DMA is expanded into BSIZE DMAs.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_REPEAT_REPEAT (0x00000001U)
+
+/* Size of fetch in dwords (0 means don't DMA, 1=1 etc.) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_BSIZE_MASK (0x00000FFFU)
+
+/* DMA Interface DOutD : Src1 */
+
+/* Last Write or DMA in program (This needs to only be set once on with the last
+ * DMA or last direct write, which ever is last).
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_SHIFT (31U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_CLRMSK (0x7FFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN (0x80000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_SHIFT (29U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_CLRMSK (0x9FFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_ONE (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_TWO (0x20000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_THREE (0x40000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_WORDSIZE_FOUR (0x60000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_CLRMSK (0xEFFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_UNIFIED_STORE \
+   (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_DEST_COMMON_STORE (0x10000000U)
+
+/* CMODE   Cache Mode */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CLRMSK (0xF3FFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_BYPASS (0x04000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_FORCE_LINE_FILL \
+   (0x08000000U)
+
+/* Primary instance data offset in 32 bit words (offset into the current
+ * instance).
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT (13U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_CLRMSK (0xFC001FFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_REPEAT_SHIFT (12U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_REPEAT_CLRMSK (0xFFFFEFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_REPEAT_NOREPEAT (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_REPEAT_REPEAT (0x00001000U)
+
+/* Size of fetch in dwords (0 means don't DMA, 1=1 etc.) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_CLRMSK (0xFFFFF000U)
+
+/* Lower 64-bit (63:0) data to be written. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SBASE0_MASK \
+   (UINT64_C(0xFFFFFFFFFFFFFFFF))
+
+/* Direct Write Interface doutw : Src0. */
+
+/* Lower 64-bit (63:0) data to be written */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC0_DATA_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC0_DATA_CLRMSK \
+   (UINT64_C(0x0000000000000000))
+
+/* Unified Store */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_DEST_UNIFIED_STORE (0x00000000U)
+
+/* Common Store */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_DEST_COMMON_STORE (0x00000001U)
+
+/* Primary instance data offset in 128 bit words (offset into the current
+ * instance).
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_AO_MASK (0x00001FFFU)
+
+/* DMA Interface DOutD : Src1. */
+
+/* Last Write or DMA in program (This needs to only be set once on with the last
+ * DMA or last direct write, which ever is last).
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_SHIFT (31U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_CLRMSK (0x7FFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN (0x80000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_CLRMSK (0xEFFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE \
+   (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE (0x10000000U)
+
+/* CMODE   Cache Mode */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CLRMSK (0xF3FFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_BYPASS (0x04000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_FORCE_LINE_FILL \
+   (0x08000000U)
+
+/* Primary instance data offset in 32 bit words (offset into the current
+ * instance). For 64 bit writes the address needs to be 64 bit aligned.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT (13U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_CLRMSK (0xFC001FFFU)
+
+/* 2-bit dword write mask. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_CLRMSK (0xFFFFFFFCU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER (0x00000001U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64 (0x00000002U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_NONE (0x00000003U)
+
+/* VDM Writeback Interface Doutv : Src0 */
+
+/* Number of Indices to use in Draw Indirect (0 = 0) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTV_SBASE_MASK (0xFFFFFFFFU)
+
+/* VDM Writeback Interface Doutv : Src1 */
+
+/* Number of Indices to use in Draw Indirect (0 = 0) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTV_SRC1_SBASE_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTV_SRC1_SBASE_CLRMSK (0x00000000U)
+
+/* Shade Model Control */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SHADEMODEL_MASK (0x00000003U)
+
+/* Vertex 0 is the flat shaded color source. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SHADEMODEL_FLAT_VERTEX0 \
+   (0x00000000U)
+
+/* Vertex 1 is the flat shaded color source. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SHADEMODEL_FLAT_VERTEX1 \
+   (0x00000001U)
+
+/* Vertex 2 is the flat shaded color source. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SHADEMODEL_FLAT_VERTEX2 \
+   (0x00000002U)
+
+/* Gouraud shaded. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SHADEMODEL_GOURAUD (0x00000003U)
+
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SIZE_MASK (0x00000003U)
+
+/* 1 Dimension (U) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SIZE_1D (0x00000000U)
+
+/* 2 Dimension (UV) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SIZE_2D (0x00000001U)
+
+/* 3 Dimension (UVS) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SIZE_3D (0x00000002U)
+
+/* 4 Dimension (UVST) */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SIZE_4D (0x00000003U)
+
+/* This issue is perspective correct. */
+
+/* No W */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_PERSPECTIVE_DISABLE (0x00000000U)
+
+/* Use W */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_PERSPECTIVE_ENABLE (0x00000001U)
+
+/* The offset within the vertex if all data is treated as F32 (even if submitted
+ * as F16).
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_F32_OFFSET_MASK (0x000000FFU)
+
+/* The offset within vertex taking into account the F16s and F32s present. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_F16_OFFSET_MASK (0x000000FFU)
+
+/* TSP Parameter Fetch Interface DOutI, This command is only legal in a
+ * coefficient loading program.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_MASK (0x1FFFFFFFU)
+
+/* Apply depth bias to this layer. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_DEPTHBIAS_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_DEPTHBIAS_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_DEPTHBIAS_EN (0x08000000U)
+
+/* Ignore the F16 and F32 offsets, and the WMODE and send the primitive id
+ * instead.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PRIMITIVEID_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PRIMITIVEID_CLRMSK (0xFBFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PRIMITIVEID_EN (0x04000000U)
+
+/* Shade Model for Layer. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_CLRMSK (0xFCFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_FLAT_VERTEX0 \
+   (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_FLAT_VERTEX1 \
+   (0x01000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_FLAT_VERTEX2 \
+   (0x02000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SHADEMODEL_GOURAUD (0x03000000U)
+
+/* Point sprite Forced. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_POINTSPRITE_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_POINTSPRITE_CLRMSK (0xFF7FFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_POINTSPRITE_EN (0x00800000U)
+
+/* Wrap S Coordinate. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPS_SHIFT (22U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPS_CLRMSK (0xFFBFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPS_EN (0x00400000U)
+
+/* Wrap V Coordinate. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPV_SHIFT (21U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPV_CLRMSK (0xFFDFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPV_EN (0x00200000U)
+
+/* Wrap U Coordinate. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPU_SHIFT (20U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPU_CLRMSK (0xFFEFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_WRAPU_EN (0x00100000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_CLRMSK (0xFFF3FFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_1D (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_2D (0x00040000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_3D (0x00080000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_SIZE_4D (0x000C0000U)
+
+/* Issue is for F16 precision values. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F16_SHIFT (17U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F16_CLRMSK (0xFFFDFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F16_EN (0x00020000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PERSPECTIVE_SHIFT (16U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PERSPECTIVE_CLRMSK (0xFFFEFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PERSPECTIVE_DISABLE \
+   (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_PERSPECTIVE_ENABLE (0x00010000U)
+/* The offset within the vertex if all data is treated as F32 (even if submitted
+ * as F16).
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F32_OFFSET_SHIFT (8U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F32_OFFSET_CLRMSK (0xFFFF00FFU)
+
+/* The offset within vertex taking into account the F16s and F32s present. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F16_OFFSET_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC_F16_OFFSET_CLRMSK (0xFFFFFF00U)
+
+/* The starting address to write the data into the common store allocation, in
+ * 128 bit words. Each 32 bit value consumes 128 bit words in the common store.
+ * The issues are pack, Issue 0, followed by Issue 1.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_DEST_MASK (0x000000FFU)
+
+/* TSP Parameter Fetch Interface DOutI : Src0 */
+
+/* This is the last issue for the triangle. */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_SHIFT (63U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_CLRMSK \
+   (UINT64_C(0x7FFFFFFFFFFFFFFF))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE_EN \
+   (UINT64_C(0x8000000000000000))
+
+/* The starting address to write the data into the common store allocation, in
+ * 128 bit words. Each 32 bit value consumes 128 bit words in the common store.
+ * The issues are pack, Issue 0, followed by Issue 1.
+ */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_SHIFT (54U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_DEST_CLRMSK \
+   (UINT64_C(0xC03FFFFFFFFFFFFF))
+
+/* Issue 0 */
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_CLRMSK \
+   (UINT64_C(0xFFFFFFFFE0000000))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_DEPTHBIAS_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_DEPTHBIAS_CLRMSK \
+   (UINT64_C(0xfffffffff7ffffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_PRIMITIVEID_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_PRIMITIVEID_CLRMSK \
+   (UINT64_C(0xfffffffffbffffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHADEMODEL_SHIFT (24U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SHADEMODEL_CLRMSK \
+   (UINT64_C(0xfffffffffcffffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_POINTSPRITE_SHIFT (23U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_POINTSPRITE_CLRMSK \
+   (UINT64_C(0xffffffffff7fffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPS_SHIFT (22U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPS_CLRMSK \
+   (UINT64_C(0xffffffffffbfffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPV_SHIFT (21U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPV_CLRMSK \
+   (UINT64_C(0xffffffffffdfffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPU_SHIFT (20U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_WRAPU_CLRMSK \
+   (UINT64_C(0xffffffffffefffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SIZE_SHIFT (18U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_SIZE_CLRMSK \
+   (UINT64_C(0xfffffffffff3ffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F16_SHIFT (17U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F16_CLRMSK \
+   (UINT64_C(0xfffffffffffdffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_PERSPECTIVE_SHIFT (16U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_PERSPECTIVE_CLRMSK \
+   (UINT64_C(0xfffffffffffeffff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F32_OFFSET_SHIFT (8U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F32_OFFSET_CLRMSK \
+   (UINT64_C(0xffffffffffff00ff))
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F16_OFFSET_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTI_SRC0_ISSUE0_F16_OFFSET_CLRMSK \
+   (UINT64_C(0xffffffffffffff00))
+
+/* TSP Parameter Fetch Interface DOutI : Src1 */
+
+/* 32-bit Temp or DOUT. */
+#define PVR_ROGUE_PDSINST_DSTDOUT_MASK (0x00000007U)
+
+/* DMA data from memory to the USC. */
+#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTD (0x00000000U)
+
+/* Write a value directly to the USC. */
+#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTW (0x00000001U)
+
+/* Start a USC program. */
+#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTU (0x00000002U)
+
+/* Issue a fence back to the VDM (with value). */
+#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTV (0x00000003U)
+
+/* Issue a command to the TSP Parameter Fetch and FPU to calculate and load
+ * coefficients to USC.
+ */
+#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTI (0x00000004U)
+
+/* Issue a fence back to the CDM. Used if compute is enabled. */
+#define PVR_ROGUE_PDSINST_DSTDOUT_DOUTC (0x00000005U)
+
+/* Issue DOUT to external devices (Opcode SP)
+ *
+ * cc ? dst = src0, src1
+ *
+ * PDS programs have to send data somewhere. This is primary function of the
+ * PDS. All programs must therefore execute some one of DOUT, DDMAD, STM
+ * commands. There are the following program types
+ *
+ * Vertex Shader, Geometry Shader, Hull Shader Programs These programs load data
+ * into memory. These will use the DOUTD or DDMAD commands. Ideally the DDMAD
+ * command is used as the most typical operation Src Address = Index * Stride +
+ * Base, and then DMA from this address. They also schedule the execution of the
+ * USSE and will issue a DOUTU command. This would normally be the last
+ * instruction in the program.
+ *
+ * Obviously the shader programs must not overflow their allocated memory.
+ * However, the USC will do cache look-aheads and so could attempt to fetch
+ * shader code from beyond the end of the program. This could cause a page fault
+ * if the last program instructions are very close to the end of the last valid
+ * memory page.
+ *
+ * To avoid this happening always ensure that the start address of the last
+ * instruction of a shader program does not occur in the last 26 bytes of a
+ * page.
+ *
+ * State/Uniform Loading Programs
+ * These programs load data into memory. These will use the typically use the
+ * DOUTD command
+ *
+ * Coefficient Loading Programs
+ * These programs run once per triangle. They load the A,B,C Coefficient for the
+ * iteration of the varyings into the USC. These programs issue DOUTI
+ * commands. These programs must not do any other sort of DOUT command
+ * (DOUTW/DOUTD/DOUTU).
+ *
+ * Pixel Shader Programs
+ * These programs once per group of pixels, schedule the execution of a pixel
+ * shader on the USC for a group of pixels. This program issues a DOUTU (and
+ * that is all).
+ */
+
+#define PVR_ROGUE_PDSINST_DOUT_OPCODE_SHIFT (28U)
+#define PVR_ROGUE_PDSINST_DOUT_OPCODE_CLRMSK (0x0FFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_OPCODE_DEFAULT (0xF0000000U) /* DOUT */
+#define PVR_ROGUE_PDSINST_DOUT_CC_SHIFT (27U)
+#define PVR_ROGUE_PDSINST_DOUT_CC_CLRMSK (0xF7FFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_CC_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_CC_ENABLE (0x08000000U)
+#define PVR_ROGUE_PDSINST_DOUT_END_SHIFT (26U)
+#define PVR_ROGUE_PDSINST_DOUT_END_CLRMSK (0xFBFFFFFFU)
+#define PVR_ROGUE_PDSINST_DOUT_END_DISABLE (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_END_ENABLE (0x04000000U)
+
+/* 32-bit source */
+#define PVR_ROGUE_PDSINST_DOUT_SRC1_SHIFT (16U)
+#define PVR_ROGUE_PDSINST_DOUT_SRC1_CLRMSK (0xFF00FFFFU)
+
+/* 64-bit source */
+#define PVR_ROGUE_PDSINST_DOUT_SRC0_SHIFT (8U)
+#define PVR_ROGUE_PDSINST_DOUT_SRC0_CLRMSK (0xFFFF80FFU)
+
+/* DOUT Destination */
+#define PVR_ROGUE_PDSINST_DOUT_DST_SHIFT (0U)
+#define PVR_ROGUE_PDSINST_DOUT_DST_CLRMSK (0xFFFFFFF8U)
+#define PVR_ROGUE_PDSINST_DOUT_DST_DOUTD (0x00000000U)
+#define PVR_ROGUE_PDSINST_DOUT_DST_DOUTW (0x00000001U)
+#define PVR_ROGUE_PDSINST_DOUT_DST_DOUTU (0x00000002U)
+#define PVR_ROGUE_PDSINST_DOUT_DST_DOUTV (0x00000003U)
+#define PVR_ROGUE_PDSINST_DOUT_DST_DOUTI (0x00000004U)
+#if defined(ROGUE_FEATURE_COMPUTE)
+#   define PVR_ROGUE_PDSINST_DOUT_DST_DOUTC (0x00000005U)
+#endif /* ROGUE_FEATURE_COMPUTE */
+
+/* Shift */
+
+#endif /* PVR_ROGUE_PDS_DEFS_H */
diff --git a/src/imagination/vulkan/pds/pvr_rogue_pds_disasm.h b/src/imagination/vulkan/pds/pvr_rogue_pds_disasm.h
new file mode 100644 (file)
index 0000000..719f3ff
--- /dev/null
@@ -0,0 +1,287 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_ROGUE_PDS_DISASM_H
+#define PVR_ROGUE_PDS_DISASM_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "util/log.h"
+
+/* Type of operand for an instruction. */
+#define PVR_PDS_OPERAND_TYPES   \
+   X(TEMP32, temp, 32)          \
+   X(PTEMP32, ptemp, 32)        \
+   X(CONST32, const, 32)        \
+   X(TEMP64, temp, 64)          \
+   X(PTEMP64, ptemp, 64)        \
+   X(CONST64, const, 64)        \
+   X(UNRESOLVED, UNRESOLVED, 0) \
+   X(LITERAL_NUM, literal, 0)
+
+#define X(enum, str, size) enum,
+enum pvr_operand_type { PVR_PDS_OPERAND_TYPES };
+#undef X
+
+#if defined(DUMP_PDS)
+#   define PVR_PDS_PRINT_INST(X) pvr_pds_print_instruction(X)
+#   define PVR_PDS_PRINT_DATA(X, Y, Z) \
+      mesa_logd("\t%s   : DATA = 0x%lX ADDRESS = 0x%X\n", X, (uint64_t)(Y), Z)
+#else
+#   define PVR_PDS_PRINT_INST(X)
+#   define PVR_PDS_PRINT_DATA(X, Y, Z)
+#endif
+
+#define PVR_INSTRUCTION_STMP
+#define PVR_INSTRUCTION_IDIV
+#define PVR_INSTRUCTION_AA
+#define PVR_INSTRUCTION_POL
+#define PVR_INSTRUCTION_IDF
+
+#define PVR_INSTRUCTIONS \
+   X(STM)                \
+   PVR_INSTRUCTION_STMP  \
+   PVR_INSTRUCTION_IDIV  \
+   PVR_INSTRUCTION_AA    \
+   PVR_INSTRUCTION_IDF   \
+   PVR_INSTRUCTION_POL   \
+   X(STMC)               \
+   X(LD)                 \
+   X(ST)                 \
+   X(ADD32)              \
+   X(ADD64)              \
+   X(MAD)                \
+   X(DDMAD)              \
+   X(DOUT)               \
+   X(CMP)                \
+   X(BRA)                \
+   X(LIMM)               \
+   X(SFTLP32)            \
+   X(SFTLP64)            \
+   X(WDF)                \
+   X(LOCK)               \
+   X(RELEASE)            \
+   X(HALT)               \
+   X(NOP)
+
+#define X(a) INS_##a,
+enum pvr_instruction_type { PVR_INSTRUCTIONS };
+#undef X
+
+struct pvr_predicate {
+   uint32_t predicate;
+   bool negate;
+};
+
+struct pvr_instruction;
+
+/* Operands are either sources or dst of an instruction. */
+struct pvr_operand {
+   enum pvr_operand_type type;
+
+   struct pvr_instruction *instruction;
+   uint64_t literal; /* Literal value if type == LITERAL_NUM */
+   int address; /* Address in word-sizes. */
+   unsigned absolute_address; /* Address in segment, */
+   unsigned index; /* Index within instruction, 0 = dst, 1 = src0 .. */
+   bool negate; /* True if the literal is negative. */
+};
+
+#define PVR_PDS_LOP  \
+   X(LOP_NONE, none) \
+   X(LOP_NOT, ~)     \
+   X(LOP_AND, &)     \
+   X(LOP_OR, |)      \
+   X(LOP_XOR, xor)   \
+   X(LOP_XNOR, xnor) \
+   X(LOP_NAND, nand) \
+   X(LOP_NOR, nor)
+
+#define X(lop, str) lop,
+enum pvr_pds_lop { PVR_PDS_LOP };
+#undef X
+
+#define PVR_PDS_DOUT_DSTS \
+   X(DOUT_D, doutd)       \
+   X(DOUT_W, doutw)       \
+   X(DOUT_U, doutu)       \
+   X(DOUT_V, doutv)       \
+   X(DOUT_I, douti)       \
+   X(DOUT_C, doutc)       \
+   X(DOUT_R, doutr)       \
+   X(DOUT_INVALID0, invalid)
+
+#define X(dout_dst, str) dout_dst,
+enum pvr_dout_type { PVR_PDS_DOUT_DSTS };
+#undef X
+
+#define PVR_PDS_MAX_INST_STR_LEN 256
+
+enum pvr_cop { COP_EQ, COP_GT, COP_LT, COP_NE };
+
+struct pvr_instruction {
+   enum pvr_instruction_type type;
+   struct pvr_instruction *next;
+};
+
+struct pvr_add {
+   struct pvr_instruction instruction;
+   struct pvr_operand *dst;
+   struct pvr_operand *src1;
+   struct pvr_operand *src0;
+   bool cc;
+   bool sna;
+   bool alum;
+};
+
+struct pvr_simple {
+   struct pvr_instruction instruction;
+   bool cc;
+};
+
+struct pvr_ldst {
+   struct pvr_instruction instruction;
+   bool cc;
+   struct pvr_operand *src0;
+   bool st;
+};
+
+struct pvr_mad {
+   struct pvr_instruction instruction;
+   struct pvr_operand *dst;
+   struct pvr_operand *src0;
+   struct pvr_operand *src1;
+   struct pvr_operand *src2;
+   bool cc;
+   bool sna;
+   bool alum;
+};
+
+struct pvr_stm {
+   struct pvr_instruction instruction;
+   struct pvr_operand *src0;
+   struct pvr_operand *src1;
+   struct pvr_operand *src2;
+   struct pvr_operand *src3;
+   unsigned stream_out;
+   bool tst;
+   bool cc;
+   bool ccs_global;
+   bool ccs_so;
+};
+
+struct pvr_stmc {
+   struct pvr_instruction instruction;
+   struct pvr_operand *src0;
+   bool cc;
+};
+
+struct pvr_bra {
+   struct pvr_instruction instruction;
+   struct pvr_predicate *srcc;
+   struct pvr_predicate *setc; /* negate ignored */
+   char *target;
+   signed address; /* signed relative address */
+};
+
+struct pvr_dout {
+   struct pvr_instruction instruction;
+   struct pvr_operand *src0;
+   struct pvr_operand *src1;
+   enum pvr_dout_type dst;
+   bool cc;
+   bool END;
+};
+
+struct pvr_ddmad {
+   struct pvr_instruction instruction;
+   struct pvr_operand *src0;
+   struct pvr_operand *src1;
+   struct pvr_operand *src2;
+   struct pvr_operand *src3;
+   bool cc;
+   bool END;
+};
+
+struct pvr_sftlp {
+   struct pvr_instruction instruction;
+   enum pvr_pds_lop lop;
+   struct pvr_operand *dst;
+   struct pvr_operand *src0;
+   struct pvr_operand *src1;
+   struct pvr_operand *src2;
+   bool cc;
+   bool IM;
+};
+
+struct pvr_limm {
+   struct pvr_instruction instruction;
+   bool cc;
+   bool GR;
+   struct pvr_operand *dst;
+   struct pvr_operand *src0;
+};
+
+struct pvr_cmp {
+   struct pvr_instruction instruction;
+   enum pvr_cop cop;
+   bool IM;
+   bool cc;
+   struct pvr_operand *src0;
+   struct pvr_operand *src1;
+};
+
+#define PVR_PDS_ERR_PARAM_RANGE 0 /* Error when register is out of range. */
+#define PVR_PDS_ERR_SP_UNKNOWN \
+   1 /* Error when opcode for sp instruction is unknown. */
+
+struct pvr_dissassembler_error {
+   uint32_t type; /* One of PDS_ERR_* */
+   enum pvr_instruction_type instruction; /* The type of instruction where
+                                             the error occurred. */
+   char *text; /* A string representation of the error. */
+   uint32_t parameter; /* The parameter of the instruction, 0 = dst,
+                          1 = src0.. */
+   uint32_t raw; /* The raw value that caused the error. */
+
+   void *context; /* The passed in context. */
+};
+
+/* Callback when an error happens. */
+typedef void (*PVR_ERR_CALLBACK)(struct pvr_dissassembler_error);
+
+void pvr_pds_free_instruction(struct pvr_instruction *inst);
+struct pvr_instruction *
+pvr_pds_disassemble_instruction2(void *context,
+                                 PVR_ERR_CALLBACK error_call_back,
+                                 uint32_t instruction);
+void pvr_pds_disassemble_instruction(char *buffer,
+                                     size_t instr_len,
+                                     struct pvr_instruction *instruction);
+
+#if defined(DUMP_PDS)
+void pvr_pds_print_instruction(uint32_t instr);
+#endif
+
+#endif /* PVR_ROGUE_PDS_DISASM_H */
diff --git a/src/imagination/vulkan/pds/pvr_rogue_pds_encode.h b/src/imagination/vulkan/pds/pvr_rogue_pds_encode.h
new file mode 100644 (file)
index 0000000..a7ba617
--- /dev/null
@@ -0,0 +1,538 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_ROGUE_PDS_ENCODE_H
+#define PVR_ROGUE_PDS_ENCODE_H
+
+#include <stdint.h>
+
+#include "pvr_rogue_pds_defs.h"
+#include "pvr_rogue_pds_disasm.h"
+#include "util/macros.h"
+
+static ALWAYS_INLINE uint32_t
+pvr_pds_inst_decode_field_range_regs64tp(uint32_t value)
+{
+   if (value <= PVR_ROGUE_PDSINST_REGS64TP_TEMP64_UPPER)
+      return PVR_ROGUE_PDSINST_REGS64TP_TEMP64;
+
+   if ((value >= PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER) &&
+       (value <= PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_UPPER)) {
+      return PVR_ROGUE_PDSINST_REGS64TP_PTEMP64;
+   }
+   return 2;
+}
+
+static ALWAYS_INLINE uint32_t
+pvr_pds_inst_decode_field_range_regs32(uint32_t value)
+{
+   if (value <= PVR_ROGUE_PDSINST_REGS32_CONST32_UPPER)
+      return PVR_ROGUE_PDSINST_REGS32_CONST32;
+
+   if ((value >= PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER) &&
+       (value <= PVR_ROGUE_PDSINST_REGS32_TEMP32_UPPER)) {
+      return PVR_ROGUE_PDSINST_REGS32_TEMP32;
+   }
+   if ((value >= PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER) &&
+       (value <= PVR_ROGUE_PDSINST_REGS32_PTEMP32_UPPER)) {
+      return PVR_ROGUE_PDSINST_REGS32_PTEMP32;
+   }
+   return 3;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_stflp64(uint32_t cc,
+                                                          uint32_t lop,
+                                                          uint32_t im,
+                                                          uint32_t src0,
+                                                          uint32_t src1,
+                                                          uint32_t src2,
+                                                          uint32_t dst)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_SFTLP64
+              << PVR_ROGUE_PDSINST_SFTLP64_OPCODE_SHIFT;
+   encoded |= ((dst & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+               << PVR_ROGUE_PDSINST_SFTLP64_DST_SHIFT);
+   encoded |= ((src2 & PVR_ROGUE_PDSINST_REGS32_MASK)
+               << PVR_ROGUE_PDSINST_SFTLP64_SRC2_SHIFT);
+   encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+               << PVR_ROGUE_PDSINST_SFTLP64_SRC1_SHIFT);
+   encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+               << PVR_ROGUE_PDSINST_SFTLP64_SRC0_SHIFT);
+   encoded |= ((im & 1U) << PVR_ROGUE_PDSINST_SFTLP64_IM_SHIFT);
+   encoded |= ((lop & PVR_ROGUE_PDSINST_LOP_MASK)
+               << PVR_ROGUE_PDSINST_SFTLP64_LOP_SHIFT);
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_SFTLP64_CC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t
+pvr_pds_inst_decode_field_range_regs32t(uint32_t value)
+{
+   if (value <= PVR_ROGUE_PDSINST_REGS32T_TEMP32_UPPER)
+      return PVR_ROGUE_PDSINST_REGS32T_TEMP32;
+
+   return 1;
+}
+
+static ALWAYS_INLINE uint32_t
+pvr_pds_inst_decode_field_range_regs32tp(uint32_t value)
+{
+   if (value <= PVR_ROGUE_PDSINST_REGS32TP_TEMP32_UPPER)
+      return PVR_ROGUE_PDSINST_REGS32TP_TEMP32;
+
+   if ((value >= PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER) &&
+       (value <= PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_UPPER)) {
+      return PVR_ROGUE_PDSINST_REGS32TP_PTEMP32;
+   }
+   return 2;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_stflp32(uint32_t im,
+                                                          uint32_t cc,
+                                                          uint32_t lop,
+                                                          uint32_t src0,
+                                                          uint32_t src1,
+                                                          uint32_t src2,
+                                                          uint32_t dst)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEB_SFTLP32
+              << PVR_ROGUE_PDSINST_SFTLP32_OPCODE_SHIFT;
+   encoded |= ((dst & PVR_ROGUE_PDSINST_REGS32T_MASK)
+               << PVR_ROGUE_PDSINST_SFTLP32_DST_SHIFT);
+   encoded |= ((src2 & PVR_ROGUE_PDSINST_REGS32TP_MASK)
+               << PVR_ROGUE_PDSINST_SFTLP32_SRC2_SHIFT);
+   encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32_MASK)
+               << PVR_ROGUE_PDSINST_SFTLP32_SRC1_SHIFT);
+   encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS32T_MASK)
+               << PVR_ROGUE_PDSINST_SFTLP32_SRC0_SHIFT);
+   encoded |= ((lop & PVR_ROGUE_PDSINST_LOP_MASK)
+               << PVR_ROGUE_PDSINST_SFTLP32_LOP_SHIFT);
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_SFTLP32_CC_SHIFT);
+   encoded |= ((im & 1U) << PVR_ROGUE_PDSINST_SFTLP32_IM_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_stm(uint32_t CCS_CCS_GLOBAL,
+                                                      uint32_t CCS_CCS_SO,
+                                                      uint32_t CCS_CCS_CC,
+                                                      uint32_t SO_TST,
+                                                      uint32_t SO,
+                                                      uint32_t SO_SRC0,
+                                                      uint32_t SO_SRC1,
+                                                      uint32_t SO_SRC2,
+                                                      uint32_t SO_SRC3)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEB_STM
+              << PVR_ROGUE_PDSINST_STM_OPCODE_SHIFT;
+   encoded |= ((SO_SRC3 & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+               << PVR_ROGUE_PDSINST_STM_SO_SRC3_SHIFT);
+   encoded |= ((SO_SRC2 & PVR_ROGUE_PDSINST_REGS32_MASK)
+               << PVR_ROGUE_PDSINST_STM_SO_SRC2_SHIFT);
+   encoded |= ((SO_SRC1 & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+               << PVR_ROGUE_PDSINST_STM_SO_SRC1_SHIFT);
+   encoded |= ((SO_SRC0 & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+               << PVR_ROGUE_PDSINST_STM_SO_SRC0_SHIFT);
+   encoded |=
+      ((SO & PVR_ROGUE_PDSINST_SO_MASK) << PVR_ROGUE_PDSINST_STM_SO_SHIFT);
+   encoded |= ((SO_TST & 1U) << PVR_ROGUE_PDSINST_STM_SO_TST_SHIFT);
+   encoded |= ((CCS_CCS_CC & 1U) << PVR_ROGUE_PDSINST_STM_CCS_CCS_CC_SHIFT);
+   encoded |= ((CCS_CCS_SO & 1U) << PVR_ROGUE_PDSINST_STM_CCS_CCS_SO_SHIFT);
+   encoded |=
+      ((CCS_CCS_GLOBAL & 1U) << PVR_ROGUE_PDSINST_STM_CCS_CCS_GLOBAL_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t
+pvr_pds_inst_decode_field_range_regs64(uint32_t value)
+{
+   if (value <= PVR_ROGUE_PDSINST_REGS64_CONST64_UPPER)
+      return PVR_ROGUE_PDSINST_REGS64_CONST64;
+
+   if ((value >= PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER) &&
+       (value <= PVR_ROGUE_PDSINST_REGS64_TEMP64_UPPER)) {
+      return PVR_ROGUE_PDSINST_REGS64_TEMP64;
+   }
+   if ((value >= PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER) &&
+       (value <= PVR_ROGUE_PDSINST_REGS64_PTEMP64_UPPER)) {
+      return PVR_ROGUE_PDSINST_REGS64_PTEMP64;
+   }
+   return 3;
+}
+
+static ALWAYS_INLINE uint32_t pvr_rogue_inst_encode_mad(uint32_t sna,
+                                                        uint32_t alum,
+                                                        uint32_t cc,
+                                                        uint32_t src0,
+                                                        uint32_t src1,
+                                                        uint32_t src2,
+                                                        uint32_t dst)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEA_MAD
+              << PVR_ROGUE_PDSINST_MAD_OPCODE_SHIFT;
+   encoded |= ((dst & PVR_ROGUE_PDSINST_REGS64T_MASK)
+               << PVR_ROGUE_PDSINST_MAD_DST_SHIFT);
+   encoded |= ((src2 & PVR_ROGUE_PDSINST_REGS64_MASK)
+               << PVR_ROGUE_PDSINST_MAD_SRC2_SHIFT);
+   encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32_MASK)
+               << PVR_ROGUE_PDSINST_MAD_SRC1_SHIFT);
+   encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS32_MASK)
+               << PVR_ROGUE_PDSINST_MAD_SRC0_SHIFT);
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_MAD_CC_SHIFT);
+   encoded |= ((alum & 1U) << PVR_ROGUE_PDSINST_MAD_ALUM_SHIFT);
+   encoded |= ((sna & 1U) << PVR_ROGUE_PDSINST_MAD_SNA_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_add64(uint32_t cc,
+                                                        uint32_t alum,
+                                                        uint32_t sna,
+                                                        uint32_t src0,
+                                                        uint32_t src1,
+                                                        uint32_t dst)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_ADD64
+              << PVR_ROGUE_PDSINST_ADD64_OPCODE_SHIFT;
+   encoded |= ((dst & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+               << PVR_ROGUE_PDSINST_ADD64_DST_SHIFT);
+   encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS64_MASK)
+               << PVR_ROGUE_PDSINST_ADD64_SRC1_SHIFT);
+   encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64_MASK)
+               << PVR_ROGUE_PDSINST_ADD64_SRC0_SHIFT);
+   encoded |= ((sna & 1U) << PVR_ROGUE_PDSINST_ADD64_SNA_SHIFT);
+   encoded |= ((alum & 1U) << PVR_ROGUE_PDSINST_ADD64_ALUM_SHIFT);
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_ADD64_CC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_add32(uint32_t cc,
+                                                        uint32_t alum,
+                                                        uint32_t sna,
+                                                        uint32_t src0,
+                                                        uint32_t src1,
+                                                        uint32_t dst)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_ADD32
+              << PVR_ROGUE_PDSINST_ADD32_OPCODE_SHIFT;
+   encoded |= ((dst & PVR_ROGUE_PDSINST_REGS32TP_MASK)
+               << PVR_ROGUE_PDSINST_ADD32_DST_SHIFT);
+   encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32_MASK)
+               << PVR_ROGUE_PDSINST_ADD32_SRC1_SHIFT);
+   encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS32_MASK)
+               << PVR_ROGUE_PDSINST_ADD32_SRC0_SHIFT);
+   encoded |= ((sna & 1U) << PVR_ROGUE_PDSINST_ADD32_SNA_SHIFT);
+   encoded |= ((alum & 1U) << PVR_ROGUE_PDSINST_ADD32_ALUM_SHIFT);
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_ADD32_CC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_cmp(uint32_t cc,
+                                                      uint32_t cop,
+                                                      uint32_t src0,
+                                                      uint32_t src1)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_CMP
+              << PVR_ROGUE_PDSINST_CMP_OPCODE_SHIFT;
+   encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS64_MASK)
+               << PVR_ROGUE_PDSINST_CMP_SRC1_SHIFT);
+   encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+               << PVR_ROGUE_PDSINST_CMP_SRC0_SHIFT);
+   encoded |= UINT32_C(0x0) << PVR_ROGUE_PDSINST_CMP_IM_SHIFT;
+   encoded |= UINT32_C(0x1) << PVR_ROGUE_PDSINST_CMP_SETCP_SHIFT;
+   encoded |=
+      ((cop & PVR_ROGUE_PDSINST_COP_MASK) << PVR_ROGUE_PDSINST_CMP_COP_SHIFT);
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_CMP_CC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_cmpi(uint32_t cc,
+                                                       uint32_t cop,
+                                                       uint32_t src0,
+                                                       uint32_t im16)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_CMP
+              << PVR_ROGUE_PDSINST_CMPI_OPCODE_SHIFT;
+   encoded |= ((im16 & PVR_ROGUE_PDSINST_IMM16_MASK)
+               << PVR_ROGUE_PDSINST_CMPI_IM16_SHIFT);
+   encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64TP_MASK)
+               << PVR_ROGUE_PDSINST_CMPI_SRC0_SHIFT);
+   encoded |= UINT32_C(0x1) << PVR_ROGUE_PDSINST_CMPI_IM_SHIFT;
+   encoded |= UINT32_C(0x1) << PVR_ROGUE_PDSINST_CMPI_SETCP_SHIFT;
+   encoded |=
+      ((cop & PVR_ROGUE_PDSINST_COP_MASK) << PVR_ROGUE_PDSINST_CMPI_COP_SHIFT);
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_CMPI_CC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_bra(uint32_t srcc,
+                                                      uint32_t neg,
+                                                      uint32_t setc,
+                                                      uint32_t addr)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_BRA
+              << PVR_ROGUE_PDSINST_BRA_OPCODE_SHIFT;
+   encoded |= ((addr & PVR_ROGUE_PDSINST_BRAADDR_MASK)
+               << PVR_ROGUE_PDSINST_BRA_ADDR_SHIFT);
+   encoded |= ((setc & PVR_ROGUE_PDSINST_PREDICATE_MASK)
+               << PVR_ROGUE_PDSINST_BRA_SETC_SHIFT);
+   encoded |= ((neg & 1U) << PVR_ROGUE_PDSINST_BRA_NEG_SHIFT);
+   encoded |= ((srcc & PVR_ROGUE_PDSINST_PREDICATE_MASK)
+               << PVR_ROGUE_PDSINST_BRA_SRCC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_ld(uint32_t cc, uint32_t src0)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP << PVR_ROGUE_PDSINST_LD_OPCODE_SHIFT;
+   encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64_MASK)
+               << PVR_ROGUE_PDSINST_LD_SRC0_SHIFT);
+   encoded |= PVR_ROGUE_PDSINST_OPCODESP_LD << PVR_ROGUE_PDSINST_LD_OP_SHIFT;
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_LD_CC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_st(uint32_t cc, uint32_t src0)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP << PVR_ROGUE_PDSINST_ST_OPCODE_SHIFT;
+   encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64_MASK)
+               << PVR_ROGUE_PDSINST_ST_SRC0_SHIFT);
+   encoded |= PVR_ROGUE_PDSINST_OPCODESP_ST << PVR_ROGUE_PDSINST_ST_OP_SHIFT;
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_ST_CC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_wdf(uint32_t cc)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP
+              << PVR_ROGUE_PDSINST_WDF_OPCODE_SHIFT;
+   encoded |= PVR_ROGUE_PDSINST_OPCODESP_WDF << PVR_ROGUE_PDSINST_WDF_OP_SHIFT;
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_WDF_CC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_limm(uint32_t cc,
+                                                       uint32_t src1,
+                                                       uint32_t src0,
+                                                       uint32_t gr)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP
+              << PVR_ROGUE_PDSINST_LIMM_OPCODE_SHIFT;
+   encoded |= ((gr & 1U) << PVR_ROGUE_PDSINST_LIMM_GR_SHIFT);
+   encoded |= ((src0 & PVR_ROGUE_PDSINST_IMM16_MASK)
+               << PVR_ROGUE_PDSINST_LIMM_SRC0_SHIFT);
+   encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32T_MASK)
+               << PVR_ROGUE_PDSINST_LIMM_SRC1_SHIFT);
+   encoded |= PVR_ROGUE_PDSINST_OPCODESP_LIMM
+              << PVR_ROGUE_PDSINST_LIMM_OP_SHIFT;
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_LIMM_CC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_lock(uint32_t cc)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP
+              << PVR_ROGUE_PDSINST_LOCK_OPCODE_SHIFT;
+   encoded |= PVR_ROGUE_PDSINST_OPCODESP_LOCK
+              << PVR_ROGUE_PDSINST_LOCK_OP_SHIFT;
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_LOCK_CC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_release(uint32_t cc)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP
+              << PVR_ROGUE_PDSINST_RELEASE_OPCODE_SHIFT;
+   encoded |= PVR_ROGUE_PDSINST_OPCODESP_RELEASE
+              << PVR_ROGUE_PDSINST_RELEASE_OP_SHIFT;
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_RELEASE_CC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_halt(uint32_t cc)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP
+              << PVR_ROGUE_PDSINST_HALT_OPCODE_SHIFT;
+   encoded |= PVR_ROGUE_PDSINST_OPCODESP_HALT
+              << PVR_ROGUE_PDSINST_HALT_OP_SHIFT;
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_HALT_CC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_stmc(uint32_t cc,
+                                                       uint32_t so_mask)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_SP
+              << PVR_ROGUE_PDSINST_STMC_OPCODE_SHIFT;
+   encoded |= ((so_mask & PVR_ROGUE_PDSINST_SOMASK_MASK)
+               << PVR_ROGUE_PDSINST_STMC_SOMASK_SHIFT);
+   encoded |= PVR_ROGUE_PDSINST_OPCODESP_STMC
+              << PVR_ROGUE_PDSINST_STMC_OP_SHIFT;
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_STMC_CC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t
+pvr_rogue_pds_inst_decode_field_range_regs64c(uint32_t value)
+{
+   if (value <= PVR_ROGUE_PDSINST_REGS64C_CONST64_UPPER)
+      return PVR_ROGUE_PDSINST_REGS64C_CONST64;
+
+   return 1;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_ddmad(uint32_t cc,
+                                                        uint32_t end,
+                                                        uint32_t src0,
+                                                        uint32_t src1,
+                                                        uint32_t src2,
+                                                        uint32_t src3)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_DDMAD
+              << PVR_ROGUE_PDSINST_DDMAD_OPCODE_SHIFT;
+   encoded |= ((src3 & PVR_ROGUE_PDSINST_REGS64C_MASK)
+               << PVR_ROGUE_PDSINST_DDMAD_SRC3_SHIFT);
+   encoded |= ((src2 & PVR_ROGUE_PDSINST_REGS64_MASK)
+               << PVR_ROGUE_PDSINST_DDMAD_SRC2_SHIFT);
+   encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32T_MASK)
+               << PVR_ROGUE_PDSINST_DDMAD_SRC1_SHIFT);
+   encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS32_MASK)
+               << PVR_ROGUE_PDSINST_DDMAD_SRC0_SHIFT);
+   encoded |= ((end & 1U) << PVR_ROGUE_PDSINST_DDMAD_END_SHIFT);
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_DDMAD_CC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_inst_encode_dout(uint32_t cc,
+                                                       uint32_t end,
+                                                       uint32_t src1,
+                                                       uint32_t src0,
+                                                       uint32_t dst)
+{
+   uint32_t encoded = 0;
+
+   encoded |= PVR_ROGUE_PDSINST_OPCODEC_DOUT
+              << PVR_ROGUE_PDSINST_DOUT_OPCODE_SHIFT;
+   encoded |= ((dst & PVR_ROGUE_PDSINST_DSTDOUT_MASK)
+               << PVR_ROGUE_PDSINST_DOUT_DST_SHIFT);
+   encoded |= ((src0 & PVR_ROGUE_PDSINST_REGS64_MASK)
+               << PVR_ROGUE_PDSINST_DOUT_SRC0_SHIFT);
+   encoded |= ((src1 & PVR_ROGUE_PDSINST_REGS32_MASK)
+               << PVR_ROGUE_PDSINST_DOUT_SRC1_SHIFT);
+   encoded |= ((end & 1U) << PVR_ROGUE_PDSINST_DOUT_END_SHIFT);
+   encoded |= ((cc & 1U) << PVR_ROGUE_PDSINST_DOUT_CC_SHIFT);
+
+   PVR_PDS_PRINT_INST(encoded);
+
+   return encoded;
+}
+
+#endif /* PVR_ROGUE_PDS_ENCODE_H */
diff --git a/src/imagination/vulkan/pds/pvr_xgl_pds.c b/src/imagination/vulkan/pds/pvr_xgl_pds.c
new file mode 100644 (file)
index 0000000..e153712
--- /dev/null
@@ -0,0 +1,1725 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "pvr_device_info.h"
+#include "pvr_pds.h"
+#include "pvr_rogue_pds_defs.h"
+#include "pvr_rogue_pds_disasm.h"
+#include "pvr_rogue_pds_encode.h"
+#include "util/log.h"
+#include "util/macros.h"
+
+#define R32_C(x) ((x) + PVR_ROGUE_PDSINST_REGS32_CONST32_LOWER)
+#define R32_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32_TEMP32_LOWER)
+#define R32_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32_PTEMP32_LOWER)
+
+#define R32TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_TEMP32_LOWER)
+#define R32TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS32TP_PTEMP32_LOWER)
+
+#define R64_C(x) ((x) + PVR_ROGUE_PDSINST_REGS64_CONST64_LOWER)
+#define R64_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64_TEMP64_LOWER)
+#define R64_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64_PTEMP64_LOWER)
+
+#define R64TP_T(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_TEMP64_LOWER)
+#define R64TP_P(x) ((x) + PVR_ROGUE_PDSINST_REGS64TP_PTEMP64_LOWER)
+
+/* 32-bit PTemp index for draw indirect base instance. */
+#define PVR_INDIRECT_BASE_INSTANCE_PTEMP 1U
+
+/* Number of constants to reserve per DDMAD instruction in the PDS Vertex. */
+#define PVR_PDS_DDMAD_NUM_CONSTS 8
+
+#if defined(TRACE_PDS)
+/* Some macros for a pretty printing. */
+
+#   define pvr_debug_pds_const(reg, size, annotation) \
+      mesa_logd("const[%d]   @  (%dbits)  %s", reg, size, annotation)
+#   define pvr_debug_pds_temp(reg, size, annotation) \
+      mesa_logd("temp[%d]    @  (%dbits)  %s", reg, size, annotation)
+#   define pvr_debug_pds_note(...) mesa_logd("              // " __VA_ARGS__)
+#   define pvr_debug_pds_flag(flags, flag) \
+      {                                    \
+         if ((flags & flag) == flag)       \
+            mesa_logd(" > " #flag);        \
+      }
+#   define pvr_debug(annotation) mesa_logd(annotation)
+
+#else
+#   define pvr_debug_pds_const(reg, size, annotation)
+#   define pvr_debug_pds_temp(reg, size, annotation)
+#   define pvr_debug_pds_note(...)
+#   define pvr_debug_pds_flag(flags, flag)
+#   define pvr_debug(annotation)
+#endif
+
+struct pvr_pds_const_map_entry_write_state {
+   const struct pvr_pds_info *PDS_info;
+   struct pvr_const_map_entry *entry;
+   size_t size_of_last_entry_in_bytes;
+   uint32_t entry_count;
+   size_t entries_size_in_bytes;
+};
+
+static void pvr_init_pds_const_map_entry_write_state(
+   struct pvr_pds_info *PDS_info,
+   struct pvr_pds_const_map_entry_write_state *entry_write_state)
+{
+   entry_write_state->PDS_info = PDS_info;
+   entry_write_state->entry = PDS_info->entries;
+   entry_write_state->size_of_last_entry_in_bytes = 0;
+   entry_write_state->entry_count = 0;
+   entry_write_state->entries_size_in_bytes = 0;
+}
+
+/* Returns a pointer to the next struct pvr_const_map_entry. */
+static void *pvr_prepare_next_pds_const_map_entry(
+   struct pvr_pds_const_map_entry_write_state *entry_write_state,
+   size_t size_of_next_entry_in_bytes)
+{
+   /* Move on to the next entry. */
+   uint8_t *next_entry = ((uint8_t *)entry_write_state->entry +
+                          entry_write_state->size_of_last_entry_in_bytes);
+   entry_write_state->entry = (struct pvr_const_map_entry *)next_entry;
+
+   entry_write_state->size_of_last_entry_in_bytes = size_of_next_entry_in_bytes;
+   entry_write_state->entry_count++;
+   entry_write_state->entries_size_in_bytes += size_of_next_entry_in_bytes;
+
+   /* Check if we can write into the next entry. */
+   assert(entry_write_state->entries_size_in_bytes <=
+          entry_write_state->PDS_info->entries_size_in_bytes);
+
+   return entry_write_state->entry;
+}
+
+static void pvr_write_pds_const_map_entry_vertex_attribute_address(
+   struct pvr_pds_const_map_entry_write_state *entry_write_state,
+   const struct pvr_pds_vertex_dma *DMA,
+   uint32_t const_val,
+   bool use_robust_vertex_fetch)
+{
+   pvr_debug_pds_note("DMA %d dwords, stride %d, offset %d, bindingIdx %d",
+                      DMA->size_in_dwords,
+                      DMA->stride,
+                      DMA->offset,
+                      DMA->binding_index);
+
+   if (use_robust_vertex_fetch) {
+      struct pvr_const_map_entry_robust_vertex_attribute_address
+         *robust_attribute_entry;
+
+      robust_attribute_entry =
+         pvr_prepare_next_pds_const_map_entry(entry_write_state,
+                                              sizeof(*robust_attribute_entry));
+      robust_attribute_entry->type =
+         PVR_PDS_CONST_MAP_ENTRY_TYPE_ROBUST_VERTEX_ATTRIBUTE_ADDRESS;
+      robust_attribute_entry->const_offset = const_val;
+      robust_attribute_entry->binding_index = DMA->binding_index;
+      robust_attribute_entry->component_size_in_bytes =
+         DMA->component_size_in_bytes;
+      robust_attribute_entry->offset = DMA->offset;
+      robust_attribute_entry->stride = DMA->stride;
+      robust_attribute_entry->size_in_dwords = DMA->size_in_dwords;
+      robust_attribute_entry->robustness_buffer_offset =
+         DMA->robustness_buffer_offset;
+   } else {
+      struct pvr_const_map_entry_vertex_attribute_address *attribute_entry;
+
+      attribute_entry =
+         pvr_prepare_next_pds_const_map_entry(entry_write_state,
+                                              sizeof(*attribute_entry));
+      attribute_entry->type =
+         PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS;
+      attribute_entry->const_offset = const_val;
+      attribute_entry->binding_index = DMA->binding_index;
+      attribute_entry->offset = DMA->offset;
+      attribute_entry->stride = DMA->stride;
+      attribute_entry->size_in_dwords = DMA->size_in_dwords;
+   }
+}
+
+static ALWAYS_INLINE uint32_t pvr_pds_encode_doutu(uint32_t cc,
+                                                   uint32_t end,
+                                                   uint32_t src0)
+{
+   return pvr_pds_inst_encode_dout(cc,
+                                   end,
+                                   0,
+                                   src0,
+                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTU);
+}
+
+static uint32_t
+pvr_encode_burst(struct pvr_pds_const_map_entry_write_state *entry_write_state,
+                 bool last_DMA,
+                 bool halt,
+                 unsigned int const32,
+                 unsigned int const64,
+                 unsigned int dma_size_in_dwords,
+                 unsigned int destination,
+                 unsigned int store)
+{
+   uint32_t literal_value;
+
+   /* Encode literal value. */
+   literal_value = dma_size_in_dwords
+                   << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_SHIFT;
+   literal_value |= destination
+                    << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_AO_SHIFT;
+   literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_CMODE_CACHED |
+                    store;
+
+   if (last_DMA)
+      literal_value |= PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_LAST_EN;
+
+   /* Create const map entry. */
+   struct pvr_const_map_entry_literal32 *literal_entry;
+
+   literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state,
+                                                        sizeof(*literal_entry));
+   literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+   literal_entry->const_offset = const32;
+   literal_entry->literal_value = literal_value;
+
+   /* Encode DOUTD */
+   return pvr_pds_inst_encode_dout(0,
+                                   halt,
+                                   R32_C(const32),
+                                   R64_C(const64),
+                                   PVR_ROGUE_PDSINST_DSTDOUT_DOUTD);
+}
+
+#define pvr_encode_burst_cs(psDataEntry,        \
+                            last_DMA,           \
+                            halt,               \
+                            const32,            \
+                            const64,            \
+                            dma_size_in_dwords, \
+                            destination)        \
+   pvr_encode_burst(                            \
+      psDataEntry,                              \
+      last_DMA,                                 \
+      halt,                                     \
+      const32,                                  \
+      const64,                                  \
+      dma_size_in_dwords,                       \
+      destination,                              \
+      PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_COMMON_STORE)
+
+static uint32_t pvr_encode_direct_write(
+   struct pvr_pds_const_map_entry_write_state *entry_write_state,
+   bool last_DMA,
+   bool halt,
+   unsigned int const32,
+   unsigned int const64,
+   uint32_t data_mask,
+   unsigned int destination,
+   uint32_t destination_store,
+   const struct pvr_device_info *dev_info)
+{
+   struct pvr_const_map_entry_literal32 *literal_entry;
+
+   uint32_t instruction =
+      pvr_pds_inst_encode_dout(0,
+                               halt,
+                               const32,
+                               const64,
+                               PVR_ROGUE_PDSINST_DSTDOUT_DOUTW);
+
+   literal_entry = pvr_prepare_next_pds_const_map_entry(entry_write_state,
+                                                        sizeof(*literal_entry));
+   literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+   literal_entry->const_offset = const32;
+   literal_entry->literal_value = destination_store;
+
+   if (PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls)) {
+      literal_entry->literal_value |=
+         PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_CMODE_CACHED;
+   }
+
+   literal_entry->literal_value |=
+      destination << PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_AO_SHIFT;
+
+   if (data_mask == 0x1) {
+      literal_entry->literal_value |=
+         PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_LOWER;
+   } else if (data_mask == 0x2) {
+      literal_entry->literal_value |=
+         PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_UPPER;
+   } else {
+      literal_entry->literal_value |=
+         PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_BSIZE_ALL64;
+   }
+
+   if (last_DMA) {
+      literal_entry->literal_value |=
+         PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_LAST_EN;
+   }
+
+   return instruction;
+}
+
+/* Constant and Temporary register allocation
+ * - reserve space for a 32-bit register or a 64-bit register
+ * - returned indices are offsets to 32-bit register locations
+ * - 64-bit registers need to be aligned to even indices.
+ */
+#define RESERVE_32BIT 1U
+#define RESERVE_64BIT 2U
+
+#if defined(DEBUG)
+#   define pvr_find_constant(usage, words, name) \
+      pvr_find_constant2(usage, words, name)
+#   define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, name)
+#else
+#   define pvr_find_constant(usage, words, name) \
+      pvr_find_constant2(usage, words, NULL);
+#   define pvr_get_temps(usage, words, name) pvr_get_temps2(usage, words, NULL)
+#endif
+
+static uint32_t
+pvr_find_constant2(uint8_t *const_usage, uint8_t words, const char *const_name)
+{
+   uint32_t const_index = ~0U;
+   uint32_t step = words;
+   uint8_t mask = (1 << words) - 1;
+
+   assert(words == 1 || words == 2);
+
+   /* Find a register at 'step' alignment that satisfies the mask. */
+   for (uint32_t i = 0; i < PVR_MAX_VERTEX_ATTRIB_DMAS; i++) {
+      for (uint32_t b = 0; b < PVR_PDS_DDMAD_NUM_CONSTS; b += step) {
+         if ((const_usage[i] & (mask << b)) != 0)
+            continue;
+         const_usage[i] |= (mask << b);
+         const_index = i * 8 + b;
+         pvr_debug_pds_const(const_index, words * 32, const_name);
+         return const_index;
+      }
+   }
+
+   unreachable("Unexpected: Space cannot be found for constant");
+   return ~0U;
+}
+
+#define PVR_MAX_PDS_TEMPS 32
+struct pvr_temp_usage {
+   uint32_t temp_usage;
+   uint8_t temp_used;
+   uint8_t temps_needed;
+};
+
+#define PVR_INVALID_TEMP UINT8_C(~0)
+
+static uint8_t pvr_get_temps2(struct pvr_temp_usage *temps,
+                              uint8_t temps_needed,
+                              const char *temp_name)
+{
+   uint8_t step = temps_needed;
+   uint8_t mask = (1 << temps_needed) - 1;
+
+   assert(temps_needed == 1 || temps_needed == 2);
+   assert(temps->temp_used + temps_needed <= PVR_MAX_PDS_TEMPS);
+
+   for (uint8_t i = 0; i < PVR_MAX_PDS_TEMPS; i += step) {
+      if ((temps->temp_usage & (mask << i)) != 0)
+         continue;
+
+      const size_t clzBits = 8 * sizeof(unsigned int);
+
+      temps->temp_usage |= (mask << i);
+      temps->temp_used += temps_needed;
+      temps->temps_needed =
+         clzBits - __builtin_clz((unsigned int)temps->temp_usage);
+
+      pvr_debug_pds_temp(i, temps_needed * 32, temp_name);
+
+      return i;
+   }
+
+   unreachable("Unexpected: Space cannot be found for temps");
+   return PVR_INVALID_TEMP;
+}
+
+/**
+ * Wrapper macro to add a toggle for "data mode", allowing us to calculate the
+ * size of a PDS program without actually attempting to store it.
+ *
+ * \param dest The array/memory pointer where the PDS program should be stored.
+ *             If the given code is NULL, automatically switch to count mode
+ *             instead of attempting to fill in unallocated memory.
+ * \param counter The local counter that holds the total instruction count.
+ * \param statement What function call/value should be stored at dest[counter]
+ *                  when condition is false.
+ */
+
+#define PVR_PDS_MODE_TOGGLE(dest, counter, statement) \
+   if (!dest) {                                       \
+      counter++;                                      \
+   } else {                                           \
+      dest[counter++] = statement;                    \
+      PVR_PDS_PRINT_INST(statement);                  \
+   }
+
+/**
+ * Generates the PDS vertex primary program for the dma's listed in the input
+ * structure. Produces the constant map for the Vulkan driver based upon the
+ * requirements of the instructions added to the program.
+ *
+ * PDS Data Layout
+ * ---------------
+ *
+ * The PDS data is optimized for the DDMAD layout, with the data for those
+ * instructions laid out first. The data required for other instructions is laid
+ * out in the entries unused by the DDMADs.
+ *
+ * DDMAD layout
+ * \verbatim
+ *     bank | index | usage
+ *     0    |  0:1  | temps (current index)[-]
+ *     2    |  2:3  | stride[32]
+ *     1    |  4:5  | base address[64]
+ *     3    |  6:7  | ctrl[64]
+ * \endverbatim
+ *
+ *  Each DMA whose stride > 0 requires one entry, laid out as above. We stride
+ *     over the banks to ensure that each ddmad reads each of its operands from a
+ *     different bank (i.e. remove bank clashes)
+ *
+ *     Note: This is "wasting" const[0:1] and const[2], however these free
+ *     registers will be used by other, non-ddmad instructions.
+ *
+ *     The const register usage is maintained in the au8ConstUsage array, the
+ * DDMAD instructions, for example, will utilize the top 5 registers in each
+ * block of 8 hence a 'usage mask' of 0xF8 (0b11111000).
+ *
+ *     Constant Map
+ *     ------------
+ *
+ *     The constant map is built up as we add PDS instructions and passed back
+ * for the driver to fill in the PDS data section with the correct parameters
+ * for each draw call.
+ *
+ * \param input_program PDS Program description.
+ * \param code Buffer to be filled in with the PDS program. If NULL is provided,
+ *             automatically switch to count-mode, preventing writes to
+ *             unallocated memory.
+ * \param info PDS info structure filled in for the driver, contains the
+ *             constant map.
+ * \param use_robust_vertex_fetch Do vertex fetches apply range checking.
+ * \param dev_info pvr device information struct.
+ */
+void pvr_pds_generate_vertex_primary_program(
+   struct pvr_pds_vertex_primary_program_input *input_program,
+   uint32_t *code,
+   struct pvr_pds_info *info,
+   bool use_robust_vertex_fetch,
+   const struct pvr_device_info *dev_info)
+{
+   struct pvr_pds_const_map_entry_write_state entry_write_state;
+   struct pvr_const_map_entry_doutu_address *doutu_address_entry;
+
+   uint32_t instruction = 0; /* index into code */
+   uint32_t index; /* index used for current attribute, either vertex or
+                    * instance.
+                    */
+
+   uint32_t total_dma_count = 0;
+   uint32_t running_dma_count = 0;
+
+   uint32_t write_instance_control = ~0;
+   uint32_t write_vertex_control = ~0;
+   uint32_t write_base_instance_control = ~0;
+   uint32_t write_base_vertex_control = ~0;
+   uint32_t pvr_write_draw_index_control = ~0;
+
+   uint32_t ddmad_count = 0;
+   uint32_t doutw_count = 0;
+
+   uint32_t base_instance = 0;
+   uint32_t base_vertex = 0;
+   uint32_t draw_index = 0;
+
+   uint8_t const_usage[PVR_MAX_VERTEX_ATTRIB_DMAS] = { 0 };
+
+   struct pvr_temp_usage temp_usage = { 0 };
+
+   uint32_t zero_temp = PVR_INVALID_TEMP;
+
+   uint32_t max_index_temp = PVR_INVALID_TEMP;
+   uint32_t current_index_temp = PVR_INVALID_TEMP;
+
+   uint32_t index_id_temp = PVR_INVALID_TEMP;
+   uint32_t base_instance_ID_temp = PVR_INVALID_TEMP;
+   uint32_t instance_ID_temp = PVR_INVALID_TEMP;
+
+   /* Debug tracing of program flags. */
+   pvr_debug("pvr_pds_generate_vertex_primary_program");
+   pvr_debug("=================================================");
+   pvr_debug_pds_flag(input_program->flags,
+                      PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED);
+   pvr_debug_pds_flag(input_program->flags,
+                      PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED);
+   pvr_debug_pds_flag(input_program->flags,
+                      PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT);
+   pvr_debug_pds_flag(input_program->flags,
+                      PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT);
+   pvr_debug_pds_flag(input_program->flags,
+                      PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED);
+   pvr_debug_pds_flag(input_program->flags,
+                      PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED);
+   pvr_debug_pds_flag(input_program->flags,
+                      PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED);
+   pvr_debug(" ");
+
+   pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
+
+   /* At a minimum we need 2 dwords for the DOUTU, but since we allocate in
+    * blocks of 4 we can reserve dwords for the instance/vertex DOUTW.
+    */
+   info->data_size_in_dwords = 4;
+
+   /* Reserve 2 temps - these are automatically filled in by the VDM
+    *
+    * For instanced draw calls we manually increment the instance id by the
+    * base-instance offset which is either provided as a constant, or in a
+    * ptemp (for draw indirect)
+    *
+    * temp - contents
+    * ---------------
+    * 0    - index id (pre-filled)
+    * 1    - base instance + instance id
+    */
+   index_id_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Index id");
+   instance_ID_temp =
+      pvr_get_temps(&temp_usage, RESERVE_32BIT, "VDM Instance id");
+
+   /* Reserve the lowest 2 dwords for DOUTU.
+    * [------XX]
+    */
+   const_usage[0] = 0x03;
+
+   /* Reserve consts for all the DDMAD's. */
+   for (uint32_t dma = 0; dma < input_program->dma_count; dma++) {
+      /* Mark the consts required by this ddmad "in-use".
+       * [XXXXX---]
+       */
+      const_usage[ddmad_count++] |= 0xf8;
+   }
+
+   /* Start off by assuming we can fit everything in the 8 dwords/ddmad
+    * footprint, if any DOUTD/DOUTW falls outside we will increase this
+    * counter.
+    */
+   if (ddmad_count)
+      info->data_size_in_dwords = PVR_PDS_DDMAD_NUM_CONSTS * ddmad_count;
+
+   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) {
+      doutw_count++;
+      write_vertex_control =
+         pvr_find_constant(const_usage, RESERVE_32BIT, "Vertex id DOUTW Ctrl");
+   }
+
+   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) {
+      doutw_count++;
+      write_instance_control = pvr_find_constant(const_usage,
+                                                 RESERVE_32BIT,
+                                                 "Instance id DOUTW Ctrl");
+   }
+
+   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
+      doutw_count++;
+      write_base_instance_control =
+         pvr_find_constant(const_usage,
+                           RESERVE_32BIT,
+                           "Base Instance DOUTW Ctrl");
+   }
+
+   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) {
+      doutw_count++;
+      write_base_vertex_control = pvr_find_constant(const_usage,
+                                                    RESERVE_32BIT,
+                                                    "Base Vertex DOUTW Ctrl");
+
+      /* Load base vertex from constant for non-indirect variants. */
+      if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) ==
+          0) {
+         struct pvr_const_map_entry_base_vertex *psBaseVertexEntry =
+            (struct pvr_const_map_entry_base_vertex *)entry_write_state.entry;
+
+         base_vertex =
+            pvr_find_constant(const_usage, RESERVE_32BIT, "base_vertex");
+
+         psBaseVertexEntry =
+            pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                                 sizeof(*psBaseVertexEntry));
+         psBaseVertexEntry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_VERTEX;
+         psBaseVertexEntry->const_offset = base_vertex;
+      }
+   }
+
+   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) {
+      doutw_count++;
+      pvr_write_draw_index_control =
+         pvr_find_constant(const_usage, RESERVE_32BIT, "Draw Index DOUTW Ctrl");
+
+      /* Set draw index to 0 for non-indirect variants. */
+      if ((input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) ==
+          0) {
+         struct pvr_const_map_entry_literal32 *literal_entry;
+
+         draw_index =
+            pvr_find_constant(const_usage, RESERVE_32BIT, "draw_index");
+
+         literal_entry =
+            pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                                 sizeof(*literal_entry));
+         literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+         literal_entry->const_offset = draw_index;
+         literal_entry->literal_value = 0;
+      }
+   }
+
+   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
+      /* Load absolute instance id into uiInstanceIdTemp. */
+      PVR_PDS_MODE_TOGGLE(
+         code,
+         instruction,
+         pvr_pds_inst_encode_add32(
+            /* cc    */ 0,
+            /* alum  */ 0,
+            /* sna   */ 0,
+            /* src0  */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
+            /* src1  */ R32_T(instance_ID_temp),
+            /* dst   */ R32TP_T(instance_ID_temp)));
+   } else if (input_program->flags &
+              PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
+      struct pvr_const_map_entry_base_instance *base_instance_entry =
+         (struct pvr_const_map_entry_base_instance *)entry_write_state.entry;
+
+      base_instance =
+         pvr_find_constant(const_usage, RESERVE_32BIT, "base_instance");
+
+      PVR_PDS_MODE_TOGGLE(code,
+                          instruction,
+                          pvr_pds_inst_encode_add32(
+                             /* cc    */ 0,
+                             /* alum  */ 0,
+                             /* sna   */ 0,
+                             /* src0  */ R32_C(base_instance),
+                             /* src1  */ R32_T(instance_ID_temp),
+                             /* dst   */ R32TP_T(instance_ID_temp)));
+
+      base_instance_entry =
+         pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                              sizeof(*base_instance_entry));
+      base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE;
+      base_instance_entry->const_offset = base_instance;
+   } else if (input_program->flags &
+              PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
+      struct pvr_const_map_entry_base_instance *base_instance_entry =
+         (struct pvr_const_map_entry_base_instance *)entry_write_state.entry;
+
+      base_instance = pvr_find_constant(const_usage,
+                                        RESERVE_32BIT,
+                                        "base_instance (Driver Const)");
+
+      /* Base instance provided by the driver. */
+      base_instance_entry =
+         pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                              sizeof(*base_instance_entry));
+      base_instance_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE;
+      base_instance_entry->const_offset = base_instance;
+   }
+
+   total_dma_count = ddmad_count;
+
+   total_dma_count += doutw_count;
+
+   if (use_robust_vertex_fetch) {
+      pvr_debug_pds_note("RobustBufferVertexFetch Initialization");
+
+      if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
+         zero_temp = pvr_get_temps(&temp_usage, RESERVE_32BIT, "zero_temp");
+
+         /* Load 0 into instance_ID_temp. */
+         PVR_PDS_MODE_TOGGLE(code,
+                             instruction,
+                             pvr_pds_inst_encode_limm(0, /* cc */
+                                                      zero_temp, /* SRC1 */
+                                                      0, /* SRC0 */
+                                                      0 /* GR */
+                                                      ));
+      } else {
+         zero_temp = pvr_get_temps(&temp_usage, RESERVE_64BIT, "zero_temp");
+
+         max_index_temp =
+            pvr_get_temps(&temp_usage, RESERVE_64BIT, "uMaxIndex");
+         current_index_temp =
+            pvr_get_temps(&temp_usage, RESERVE_64BIT, "uCurrentIndex");
+
+         PVR_PDS_MODE_TOGGLE(code,
+                             instruction,
+                             pvr_pds_inst_encode_stflp64(
+                                0, /* cc */
+                                PVR_ROGUE_PDSINST_LOP_XOR, /* LOP */
+                                1, /* IM */
+                                R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
+                                                          */
+                                R64TP_T(zero_temp >> 1), /* SRC1 (REGS64TP)
+                                                          */
+                                0, /* SRC2 (REGS32) */
+                                R64TP_T(zero_temp >> 1) /* DST (REG64TP) */
+                                ));
+         PVR_PDS_MODE_TOGGLE(code,
+                             instruction,
+                             pvr_pds_inst_encode_stflp64(
+                                0, /* cc */
+                                PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
+                                1, /* IM */
+                                R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
+                                                          */
+                                0, /* SRC1 (REGS64TP) */
+                                0, /* SRC2 (REGS32) */
+                                R64TP_T(current_index_temp >> 1) /* DST */
+                                /* (REG64TP) */
+                                ));
+         PVR_PDS_MODE_TOGGLE(code,
+                             instruction,
+                             pvr_pds_inst_encode_stflp64(
+                                0, /* cc */
+                                PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
+                                1, /* IM */
+                                R64TP_T(zero_temp >> 1), /* SRC0 (REGS64TP)
+                                                          */
+                                0, /* SRC1 (REGS64TP) */
+                                0, /* SRC2 (REGS32) */
+                                R64TP_T(max_index_temp >> 1) /* DST */
+                                /* (REG64TP) */
+                                ));
+      }
+   }
+
+   if (input_program->dma_count && use_robust_vertex_fetch) {
+      PVR_PDS_MODE_TOGGLE(
+         code,
+         instruction,
+         pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCC */
+                                 0, /* Neg */
+                                 PVR_HAS_FEATURE(dev_info, pds_ddmadt)
+                                    ? PVR_ROGUE_PDSINST_PREDICATE_OOB
+                                    : PVR_ROGUE_PDSINST_PREDICATE_P0, /* SETC */
+                                 1 /* Addr */
+                                 ));
+   }
+
+   for (uint32_t dma = 0; dma < input_program->dma_count; dma++) {
+      uint32_t const_base = dma * PVR_PDS_DDMAD_NUM_CONSTS;
+      uint32_t control_word;
+      struct pvr_const_map_entry_literal32 *literal_entry;
+
+      struct pvr_pds_vertex_dma *vertex_dma = &input_program->dma_list[dma];
+      bool last_DMA = (++running_dma_count == total_dma_count);
+
+      pvr_debug_pds_note("Vertex Attribute DMA %d (last=%d)", dma, last_DMA);
+
+      /* The id we use to index into this dma. */
+      if (vertex_dma->flags & PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE) {
+         pvr_debug_pds_note("Instance Rate (divisor = %d)",
+                            vertex_dma->divisor);
+
+         /* 4    - madd 0 - needs to be 64-bit aligned
+          * 5    - madd 1
+          */
+         if (vertex_dma->divisor > 1) {
+            const uint32_t adjusted_instance_ID_temp =
+               pvr_get_temps(&temp_usage,
+                             RESERVE_64BIT,
+                             "adjusted_instance_ID_temp");
+            const uint32_t MADD_temp =
+               pvr_get_temps(&temp_usage, RESERVE_64BIT, "MADD_temp");
+
+            /* 1. Remove base instance value from temp 1 to get instance id
+             * 2. Divide the instance id by the divisor - Iout = (Iin *
+             *    Multiplier) >> (shift+31)
+             * 3. Add the base instance back on.
+             *
+             * Need two zero temps for the add part of the later MAD.
+             */
+
+            PVR_PDS_MODE_TOGGLE(code,
+                                instruction,
+                                pvr_pds_inst_encode_add64(
+                                   /* cc    */ 0,
+                                   /* alum  */ 0,
+                                   /* sna   */ 1,
+                                   /* src0  */ R64_T(MADD_temp >> 1),
+                                   /* src1  */ R64_T(MADD_temp >> 1),
+                                   /* dst   */ R64TP_T(MADD_temp >> 1)));
+
+            if (input_program->flags &
+                PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
+               /* Subtract base instance from temp 1, put into
+                * adjusted_instance_ID_temp.
+                */
+               PVR_PDS_MODE_TOGGLE(
+                  code,
+                  instruction,
+                  pvr_pds_inst_encode_add32(
+                     /* cc    */ 0,
+                     /* alum  */ 0,
+                     /* sna   */ 1,
+                     /* src0  */ R32_T(instance_ID_temp),
+                     /* src1  */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
+                     /* dst   */ R32TP_T(adjusted_instance_ID_temp)));
+            } else if (input_program->flags &
+                       PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
+               /* Subtract base instance from temp 1, put into
+                * adjusted_instance_ID_temp.
+                */
+               PVR_PDS_MODE_TOGGLE(
+                  code,
+                  instruction,
+                  pvr_pds_inst_encode_add32(
+                     /* cc    */ 0,
+                     /* alum  */ 0,
+                     /* sna   */ 1,
+                     /* src0  */ R32_T(instance_ID_temp),
+                     /* src1  */ R32_C(base_instance),
+                     /* dst   */ R32TP_T(adjusted_instance_ID_temp)));
+            } else {
+               /* Copy instance from temp 1 to adjusted_instance_ID_temp.
+                */
+               PVR_PDS_MODE_TOGGLE(
+                  code,
+                  instruction,
+                  pvr_pds_inst_encode_add32(
+                     /* cc    */ 0,
+                     /* alum  */ 0,
+                     /* sna   */ 0,
+                     /* src0  */ R32_T(instance_ID_temp),
+                     /* src1  */ R32_T(MADD_temp), /* MADD_temp is set
+                                                    * to 0 at this point.
+                                                    */
+                     /* dst   */ R32TP_T(adjusted_instance_ID_temp)));
+            }
+
+            /* shift = the bit of the next highest power of two. */
+            uint32_t shift_unsigned =
+               (31 - __builtin_clz(vertex_dma->divisor - 1)) + 1;
+            int32_t shift = (int32_t)shift_unsigned;
+            uint32_t shift_2s_comp;
+
+            pvr_debug_pds_note(
+               "Perform instance rate divide (as integer multiply and rshift)");
+
+            const uint32_t multipier_constant =
+               pvr_find_constant(const_usage,
+                                 RESERVE_32BIT,
+                                 "MultiplierConstant (for InstanceDivisor)");
+
+            /* multiplier = ( 2^(shift + 31) + (divisor - 1) ) / divisor,
+               note: the division above is integer division. */
+            uint64_t multipier64 =
+               (uint64_t)((((uint64_t)1 << ((uint64_t)shift_unsigned + 31)) +
+                           ((uint64_t)vertex_dma->divisor - (uint64_t)1)) /
+                          (uint64_t)vertex_dma->divisor);
+            uint32_t multiplier = (uint32_t)multipier64;
+
+            pvr_debug_pds_note(" - Value of MultiplierConstant = %u",
+                               multiplier);
+            pvr_debug_pds_note(" - Value of Shift = %d", shift);
+
+            literal_entry =
+               pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                                    sizeof(*literal_entry));
+            literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+            literal_entry->const_offset = multipier_constant;
+            literal_entry->literal_value = multiplier;
+
+            /* (Iin * Multiplier) */
+            PVR_PDS_MODE_TOGGLE(
+               code,
+               instruction,
+               pvr_rogue_inst_encode_mad(0, /* Sign of add is positive */
+                                         0, /* Unsigned ALU mode */
+                                         0, /* Unconditional */
+                                         R32_C(multipier_constant),
+                                         R32_T(adjusted_instance_ID_temp),
+                                         R64_T(MADD_temp / 2),
+                                         R64TP_T(MADD_temp / 2)));
+
+            /*  >> (shift + 31) */
+            shift += 31;
+            shift *= -1;
+
+            if (shift < -31) {
+               /* >> (31) */
+               shift_2s_comp = 0xFFFE1;
+               PVR_PDS_MODE_TOGGLE(code,
+                                   instruction,
+                                   pvr_pds_inst_encode_stflp64(
+                                      /* cc */ 0,
+                                      /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
+                                      /* IM */ 1, /*  enable immediate */
+                                      /* SRC0 */ R64_T(MADD_temp / 2),
+                                      /* SRC1 */ 0, /* This won't be used
+                                                       in a shift
+                                                       operation. */
+                                      /* SRC2 (Shift) */ shift_2s_comp,
+                                      /* DST */ R64TP_T(MADD_temp / 2)));
+               shift += 31;
+            }
+
+            /* >> (shift + 31) */
+            shift_2s_comp = *((uint32_t *)&shift);
+            PVR_PDS_MODE_TOGGLE(code,
+                                instruction,
+                                pvr_pds_inst_encode_stflp64(
+                                   /* cc */ 0,
+                                   /* LOP */ PVR_ROGUE_PDSINST_LOP_NONE,
+                                   /* IM */ 1, /*  enable immediate */
+                                   /* SRC0 */ R64_T(MADD_temp / 2),
+                                   /* SRC1 */ 0, /* This won't be used
+                                                  * in a shift
+                                                  * operation. */
+                                   /* SRC2 (Shift) */ shift_2s_comp,
+                                   /* DST */ R64TP_T(MADD_temp / 2)));
+
+            if (input_program->flags &
+                PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
+               /* Add base instance. */
+               PVR_PDS_MODE_TOGGLE(
+                  code,
+                  instruction,
+                  pvr_pds_inst_encode_add32(
+                     /* cc    */ 0,
+                     /* alum  */ 0,
+                     /* sna   */ 0,
+                     /* src0  */ R32_T(MADD_temp),
+                     /* src1  */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
+                     /* dst   */ R32TP_T(MADD_temp)));
+            } else if (input_program->flags &
+                       PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
+               /* Add base instance. */
+               PVR_PDS_MODE_TOGGLE(code,
+                                   instruction,
+                                   pvr_pds_inst_encode_add32(
+                                      /* cc    */ 0,
+                                      /* alum  */ 0,
+                                      /* sna   */ 0,
+                                      /* src0  */ R32_T(MADD_temp),
+                                      /* src1  */ R32_C(base_instance),
+                                      /* dst   */ R32TP_T(MADD_temp)));
+            }
+
+            pvr_debug_pds_note(
+               "DMA Vertex Index will be sourced from 'MADD_temp'");
+            index = MADD_temp;
+         } else if (vertex_dma->divisor == 0) {
+            if (base_instance_ID_temp == PVR_INVALID_TEMP) {
+               base_instance_ID_temp = pvr_get_temps(&temp_usage,
+                                                     RESERVE_32BIT,
+                                                     "uBaseInstanceIDTemp");
+            }
+
+            /* Load 0 into instance_ID_temp. */
+            PVR_PDS_MODE_TOGGLE(code,
+                                instruction,
+                                pvr_pds_inst_encode_limm(
+                                   /* cc       */ 0,
+                                   /* src1     */ base_instance_ID_temp,
+                                   /* src0     */ 0,
+                                   /* gr       */ 0));
+
+            if (input_program->flags &
+                PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
+               /* Add base instance. */
+               PVR_PDS_MODE_TOGGLE(
+                  code,
+                  instruction,
+                  pvr_pds_inst_encode_add32(
+                     /* cc    */ 0,
+                     /* alum  */ 0,
+                     /* sna   */ 0,
+                     /* src0  */ R32_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP),
+                     /* src1  */ R32_T(base_instance_ID_temp),
+                     /* dst   */ R32TP_T(base_instance_ID_temp)));
+
+            } else if (input_program->flags &
+                       PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT) {
+               /* Add base instance. */
+               PVR_PDS_MODE_TOGGLE(
+                  code,
+                  instruction,
+                  pvr_pds_inst_encode_add32(
+                     /* cc    */ 0,
+                     /* alum  */ 0,
+                     /* sna   */ 0,
+                     /* src0  */ R32_C(base_instance),
+                     /* src1  */ R32_T(base_instance_ID_temp),
+                     /* dst   */ R32TP_T(base_instance_ID_temp)));
+            }
+
+            pvr_debug_pds_note(
+               "DMA Vertex Index will be sourced from 'uBaseInstanceIdTemp'");
+            index = base_instance_ID_temp;
+         } else {
+            pvr_debug_pds_note(
+               "DMA Vertex Index will be sourced from 'uInstanceIdTemp'");
+            index = instance_ID_temp;
+         }
+      } else {
+         pvr_debug_pds_note(
+            "DMA Vertex Index will be sourced from 'uIndexIdTemp'");
+         index = index_id_temp;
+      }
+
+      /* DDMAD Const Usage [__XX_---] */
+      pvr_write_pds_const_map_entry_vertex_attribute_address(
+         &entry_write_state,
+         vertex_dma,
+         const_base + 4,
+         use_robust_vertex_fetch);
+
+      /* DDMAD Const Usage [__XXX---] */
+      literal_entry =
+         pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                              sizeof(*literal_entry));
+      literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+      literal_entry->const_offset = const_base + 3;
+      literal_entry->literal_value = vertex_dma->stride;
+
+      control_word = vertex_dma->size_in_dwords
+                     << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
+      control_word |= vertex_dma->destination
+                      << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_AO_SHIFT;
+      control_word |= (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
+                       PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED);
+
+      /* DDMADT instructions will do a dummy doutd when OOB if
+       * PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN is set but as the driver
+       * would need to do another doutd after an OOB DDMADT to provide the 'in
+       * bounds' data the DDMADT can't be set as LAST.
+       *
+       * This requires us to include a final dummy DDMAD.LAST instruction.
+       *
+       * Pseudocode taken from SeriesXE2017.PDS Instruction Controller
+       * Specification.doc
+       *
+       *       DDMAD src0,src1,src2,src3
+       *
+       *       calculated_source_address := src0*src1+src2
+       *       base_address              := src2
+       *       dma_parameters            := src3[31:0]
+       *       buffer_size               := src3[63:33]
+       *       test                      := src3[32]
+       *
+       *       if (test == 1) {
+       *          // DDMAD(T)
+       *          if (calculated_source_address[39:0] + (burst_size<<2) <=
+       *         base_address[39:0] + buffer_size) {
+       *        OOB := 0
+       *        DOUTD calculated_source_address,dma_paramters
+       *     } else {
+       *        OOB := 1
+       *        if (last_instance == 1) {
+       *           dma_parameters[BURST_SIZE] := 0
+       *           DOUTD calculated_source_address,dma_paramters
+       *             }
+       *          }
+       *       } else {
+       *          // DDMAD
+       *          DOUTD calculated_source_address,dma_paramters
+       *       }
+       */
+
+      if (last_DMA && (!PVR_HAS_FEATURE(dev_info, pds_ddmadt) ||
+                       !use_robust_vertex_fetch)) {
+         pvr_debug_pds_note("LAST DDMAD");
+         control_word |= PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
+      }
+
+      /* DDMAD Const Usage [_XXXX---] */
+      literal_entry =
+         pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                              sizeof(*literal_entry));
+      literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+      literal_entry->const_offset = (const_base + 6);
+      literal_entry->literal_value = control_word;
+
+      if (PVR_HAS_FEATURE(dev_info, pds_ddmadt)) {
+         /* DDMAD Const Usage [XXXXX---]
+          * With DDMADT an extra 32bits of SRC3 contains the information for
+          * performing out-of-bounds tests on the DMA.
+          */
+
+         if (use_robust_vertex_fetch) {
+            struct pvr_pds_const_map_entry_vertex_attr_ddmadt_oob_buffer_size
+               *obb_buffer_size;
+            obb_buffer_size =
+               pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                                    sizeof(*obb_buffer_size));
+
+            obb_buffer_size->type =
+               PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTR_DDMADT_OOB_BUFFER_SIZE;
+            obb_buffer_size->const_offset = const_base + 7;
+            obb_buffer_size->binding_index = vertex_dma->binding_index;
+         } else {
+            literal_entry =
+               pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                                    sizeof(*literal_entry));
+            literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+            literal_entry->const_offset = const_base + 7;
+            literal_entry->literal_value = 0;
+         }
+
+         PVR_PDS_MODE_TOGGLE(
+            code,
+            instruction,
+            pvr_pds_inst_encode_ddmad(0, /* cc */
+                                      0, /* END */
+                                      R32_C(const_base + 3), /* SRC0 (REGS32) */
+                                      index, /* SRC1 (REGS32T) */
+                                      R64_C((const_base + 4) >> 1), /* SRC2
+                                                                     * (REGS64)
+                                                                     */
+                                      R64_C((const_base + 6) >> 1) /* SRC3
+                                                                    * (REGS64C)
+                                                                    */
+                                      ));
+
+         if (use_robust_vertex_fetch) {
+            /* If not out of bounds, skip next DDMAD instructions. */
+            PVR_PDS_MODE_TOGGLE(code,
+                                instruction,
+                                pvr_pds_inst_encode_ddmad(
+                                   1, /* cc */
+                                   0, /* END */
+                                   R32_C(const_base + 3), /* SRC0 (REGS32) */
+                                   R32_T(zero_temp), /* SRC1 (REGS32T) */
+                                   R64_C((const_base + 4) >> 1), /* SRC2
+                                                                  * (REGS64)
+                                                                  */
+                                   R64_C((const_base + 6) >> 1) /* SRC3
+                                                                 * (REGS64C)
+                                                                 */
+                                   ));
+
+            /* Now the driver must have a dummy DDMAD marked as last. */
+            if (last_DMA) {
+               uint32_t dummy_dma_const = pvr_find_constant(const_usage,
+                                                            RESERVE_64BIT,
+                                                            "uDummyDMAConst");
+               uint32_t zero_const =
+                  pvr_find_constant(const_usage, RESERVE_64BIT, "uZeroConst");
+
+               literal_entry =
+                  pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                                       sizeof(*literal_entry));
+               literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+               literal_entry->const_offset = zero_const;
+               literal_entry->literal_value = 0;
+
+               literal_entry =
+                  pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                                       sizeof(*literal_entry));
+               literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+               literal_entry->const_offset = zero_const + 1;
+               literal_entry->literal_value = 0;
+
+               literal_entry =
+                  pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                                       sizeof(*literal_entry));
+               literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+               literal_entry->const_offset = dummy_dma_const;
+               literal_entry->literal_value = 0;
+
+               literal_entry->literal_value |=
+                  0 << PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_BSIZE_SHIFT;
+               literal_entry->literal_value |=
+                  (PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_DEST_UNIFIED_STORE |
+                   PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_CMODE_CACHED);
+               literal_entry->literal_value |=
+                  PVR_ROGUE_PDSINST_DDMAD_FIELDS_SRC3_LAST_EN;
+
+               literal_entry =
+                  pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                                       sizeof(*literal_entry));
+               literal_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32;
+               literal_entry->const_offset = dummy_dma_const + 1;
+               literal_entry->literal_value = 0;
+
+               PVR_PDS_MODE_TOGGLE(code,
+                                   instruction,
+                                   pvr_pds_inst_encode_ddmad(
+                                      0, /* cc */
+                                      0, /* END */
+                                      R32_C(zero_const), /* SRC0 (REGS32)
+                                                          */
+                                      R32_T(zero_temp), /* SRC1 (REGS32T)
+                                                         */
+                                      R64_C((dummy_dma_const) >> 1), /* SRC2
+                                                                        (REGS64)
+                                                                     */
+                                      R64_C((dummy_dma_const) >> 1) /* SRC3
+                                                                       (REGS64C)
+                                                                    */
+                                      ));
+            }
+         }
+      } else {
+         if (use_robust_vertex_fetch) {
+            struct pvr_const_map_entry_vertex_attribute_max_index
+               *max_index_entry;
+
+            pvr_debug("RobustVertexFetch DDMAD");
+
+            const uint32_t max_index_const =
+               pvr_find_constant(const_usage, RESERVE_32BIT, "max_index_const");
+
+            max_index_entry =
+               pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                                    sizeof(*max_index_entry));
+            max_index_entry->const_offset = max_index_const;
+            max_index_entry->type =
+               PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_MAX_INDEX;
+            max_index_entry->binding_index = vertex_dma->binding_index;
+            max_index_entry->offset = vertex_dma->offset;
+            max_index_entry->stride = vertex_dma->stride;
+            max_index_entry->size_in_dwords = vertex_dma->size_in_dwords;
+            max_index_entry->component_size_in_bytes =
+               vertex_dma->component_size_in_bytes;
+
+            PVR_PDS_MODE_TOGGLE(
+               code,
+               instruction,
+               pvr_pds_inst_encode_add32(0, /* cc */
+                                         0, /* ALUM */
+                                         PVR_ROGUE_PDSINST_LOP_NONE, /* SNA */
+                                         R32_C(max_index_const), /* SRC0
+                                                                  * (REGS32)
+                                                                  */
+                                         R32_T(zero_temp), /* SRC1 (REGS32) */
+                                         R32TP_T(max_index_temp) /* DST
+                                                                  * (REG32TP)
+                                                                  */
+                                         ));
+
+            PVR_PDS_MODE_TOGGLE(code,
+                                instruction,
+                                pvr_pds_inst_encode_stflp32(
+                                   1, /* IM */
+                                   0, /* cc */
+                                   PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
+                                   index, /* SRC0 (REGS32T) */
+                                   0, /* SRC1 (REGS32) */
+                                   0, /* SRC2 (REG32TP) */
+                                   R32TP_T(current_index_temp) /* DST
+                                                                * (REG32TP)
+                                                                */
+                                   ));
+
+            PVR_PDS_MODE_TOGGLE(
+               code,
+               instruction,
+               pvr_pds_inst_encode_cmp(
+                  0, /* cc enable */
+                  PVR_ROGUE_PDSINST_COP_GT, /* Operation */
+                  R64TP_T(current_index_temp >> 1), /* SRC
+                                                     * (REGS64TP)
+                                                     */
+                  R64_T(max_index_temp >> 1) /* SRC1 (REGS64) */
+                  ));
+
+            PVR_PDS_MODE_TOGGLE(code,
+                                instruction,
+                                pvr_pds_inst_encode_stflp32(
+                                   1, /* IM */
+                                   1, /* cc */
+                                   PVR_ROGUE_PDSINST_LOP_NONE, /* LOP */
+                                   zero_temp, /* SRC0 (REGS32T) */
+                                   0, /* SRC1 (REGS32) */
+                                   0, /* SRC2 (REG32TP) */
+                                   R32TP_T(current_index_temp) /* DST
+                                                                * (REG32TP)
+                                                                */
+                                   ));
+
+            PVR_PDS_MODE_TOGGLE(code,
+                                instruction,
+                                pvr_pds_inst_encode_ddmad(
+                                   0, /* cc  */
+                                   0, /* END */
+                                   R32_C(const_base + 3), /* SRC0 (REGS32) */
+                                   current_index_temp, /* SRC1 (REGS32T) */
+                                   R64_C((const_base + 4) >> 1), /* SRC2
+                                                                  * (REGS64)
+                                                                  */
+                                   (const_base + 6) >> 1 /* SRC3 (REGS64C) */
+                                   ));
+         } else {
+            PVR_PDS_MODE_TOGGLE(code,
+                                instruction,
+                                pvr_pds_inst_encode_ddmad(
+                                   /* cc    */ 0,
+                                   /* end   */ 0,
+                                   /* src0  */ R32_C(const_base + 3),
+                                   /* src2  */ (index),
+                                   /* src1  */ R64_C((const_base + 4) >> 1),
+                                   /* src3  */ (const_base + 6) >> 1));
+         }
+      }
+   }
+
+   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_VERTEX_ID_REQUIRED) {
+      bool last_DMA = (++running_dma_count == total_dma_count);
+
+      PVR_PDS_MODE_TOGGLE(
+         code,
+         instruction,
+         pvr_encode_direct_write(
+            &entry_write_state,
+            last_DMA,
+            false,
+            R64_C(write_vertex_control),
+            R64_T(0),
+            0x1,
+            input_program->vertex_id_register,
+            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+            dev_info));
+   }
+
+   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_INSTANCE_ID_REQUIRED) {
+      bool last_DMA = (++running_dma_count == total_dma_count);
+
+      PVR_PDS_MODE_TOGGLE(
+         code,
+         instruction,
+         pvr_encode_direct_write(
+            &entry_write_state,
+            last_DMA,
+            false,
+            R64_C(write_instance_control),
+            R64_T(0),
+            0x2,
+            input_program->instance_id_register,
+            PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+            dev_info));
+   }
+
+   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_REQUIRED) {
+      bool last_DMA = (++running_dma_count == total_dma_count);
+
+      if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
+         /* Base instance comes from ptemp 1. */
+         PVR_PDS_MODE_TOGGLE(
+            code,
+            instruction,
+            pvr_encode_direct_write(
+               &entry_write_state,
+               last_DMA,
+               false,
+               R64_C(write_base_instance_control),
+               R64_P(PVR_INDIRECT_BASE_INSTANCE_PTEMP >> 1),
+               0x2,
+               input_program->base_instance_register,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+               dev_info));
+      } else {
+         uint32_t data_mask = (base_instance & 1) ? 0x2 : 0x1;
+
+         /* Base instance comes from driver constant. */
+         PVR_PDS_MODE_TOGGLE(
+            code,
+            instruction,
+            pvr_encode_direct_write(
+               &entry_write_state,
+               last_DMA,
+               false,
+               R64_C(write_base_instance_control),
+               R64_C(base_instance >> 1),
+               data_mask,
+               input_program->base_instance_register,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+               dev_info));
+      }
+   }
+
+   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_BASE_VERTEX_REQUIRED) {
+      bool last_DMA = (++running_dma_count == total_dma_count);
+
+      if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
+         /* Base vertex comes from ptemp 0 (initialized by PDS hardware). */
+         PVR_PDS_MODE_TOGGLE(
+            code,
+            instruction,
+            pvr_encode_direct_write(
+               &entry_write_state,
+               last_DMA,
+               false,
+               R64_C(write_base_vertex_control),
+               R64_P(0),
+               0x1,
+               input_program->base_vertex_register,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+               dev_info));
+      } else {
+         uint32_t data_mask = (base_vertex & 1) ? 0x2 : 0x1;
+
+         /* Base vertex comes from driver constant (literal 0). */
+         PVR_PDS_MODE_TOGGLE(
+            code,
+            instruction,
+            pvr_encode_direct_write(
+               &entry_write_state,
+               last_DMA,
+               false,
+               R64_C(write_base_vertex_control),
+               R64_C(base_vertex >> 1),
+               data_mask,
+               input_program->base_vertex_register,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+               dev_info));
+      }
+   }
+
+   if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDEX_REQUIRED) {
+      bool last_DMA = (++running_dma_count == total_dma_count);
+
+      if (input_program->flags & PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT) {
+         /* Draw index comes from ptemp 3. */
+         PVR_PDS_MODE_TOGGLE(
+            code,
+            instruction,
+            pvr_encode_direct_write(
+               &entry_write_state,
+               last_DMA,
+               false,
+               R64_C(pvr_write_draw_index_control),
+               R64_P(1),
+               0x2,
+               input_program->draw_index_register,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+               dev_info));
+      } else {
+         uint32_t data_mask = (draw_index & 1) ? 0x2 : 0x1;
+
+         /* Draw index comes from constant (literal 0). */
+         PVR_PDS_MODE_TOGGLE(
+            code,
+            instruction,
+            pvr_encode_direct_write(
+               &entry_write_state,
+               last_DMA,
+               false,
+               R64_C(pvr_write_draw_index_control),
+               R64_C(draw_index >> 1),
+               data_mask,
+               input_program->draw_index_register,
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTW_SRC1_DEST_UNIFIED_STORE,
+               dev_info));
+      }
+   }
+
+   doutu_address_entry =
+      pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                           sizeof(*doutu_address_entry));
+   doutu_address_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS;
+   doutu_address_entry->const_offset = 0;
+   doutu_address_entry->doutu_control = input_program->usc_task_control.src0;
+
+   if (use_robust_vertex_fetch) {
+      /* Restore IF0 */
+      PVR_PDS_MODE_TOGGLE(
+         code,
+         instruction,
+         pvr_pds_inst_encode_bra(PVR_ROGUE_PDSINST_PREDICATE_KEEP, /* SRCCC */
+                                 0, /* Neg */
+                                 PVR_ROGUE_PDSINST_PREDICATE_IF0, /* SETCC */
+                                 1 /* Addr */
+                                 ));
+   }
+
+   PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_encode_doutu(1, 1, 0));
+   PVR_PDS_MODE_TOGGLE(code, instruction, pvr_pds_inst_encode_halt(0));
+
+   assert(running_dma_count == total_dma_count);
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(const_usage); i++) {
+      if (const_usage[i] == 0)
+         break;
+
+      info->data_size_in_dwords =
+         8 * i + (32 - __builtin_clz((uint32_t)const_usage[i]));
+   }
+
+   info->temps_required = temp_usage.temps_needed;
+   info->entry_count = entry_write_state.entry_count;
+   info->entries_written_size_in_bytes =
+      entry_write_state.entries_size_in_bytes;
+   info->code_size_in_dwords = instruction;
+
+   pvr_debug("=================================================\n");
+}
+
+void pvr_pds_generate_descriptor_upload_program(
+   struct pvr_descriptor_program_input *input_program,
+   uint32_t *code_section,
+   struct pvr_pds_info *info)
+{
+   unsigned int num_consts64;
+   unsigned int num_consts32;
+   unsigned int next_const64;
+   unsigned int next_const32;
+   unsigned int instruction = 0;
+   uint32_t compile_time_buffer_index = 0;
+
+   unsigned int total_dma_count = 0;
+   unsigned int running_dma_count = 0;
+
+   struct pvr_pds_const_map_entry_write_state entry_write_state;
+
+   /* Calculate the total register usage so we can stick 32-bit consts
+    * after 64. Each DOUTD/DDMAD requires 1 32-bit constant and 1 64-bit
+    * constant.
+    */
+   num_consts32 = input_program->descriptor_set_count;
+   num_consts64 = input_program->descriptor_set_count;
+   total_dma_count = input_program->descriptor_set_count;
+
+   pvr_init_pds_const_map_entry_write_state(info, &entry_write_state);
+
+   for (unsigned int index = 0; index < input_program->buffer_count; index++) {
+      struct pvr_pds_buffer *buffer = &input_program->buffers[index];
+
+      /* This switch statement looks pointless but we want to optimize DMAs
+       * that can be done as a DOUTW.
+       */
+      switch (buffer->type) {
+      default: {
+         /* 1 DOUTD per compile time buffer: */
+         num_consts32++;
+         num_consts64++;
+         total_dma_count++;
+         break;
+      }
+      }
+   }
+
+   /* DOUTU for the secondary update program requires a 64-bit constant. */
+   if (input_program->secondary_program_present)
+      num_consts64++;
+
+   info->data_size_in_dwords = (num_consts64 * 2) + (num_consts32);
+
+   /* Start counting constants. */
+   next_const64 = 0;
+   next_const32 = num_consts64 * 2;
+
+   /* For each descriptor set perform a DOUTD. */
+   for (unsigned int descriptor_index = 0;
+        descriptor_index < input_program->descriptor_set_count;
+        descriptor_index++) {
+      struct pvr_const_map_entry_descriptor_set *descriptor_set_entry;
+      struct pvr_pds_descriptor_set *descriptor_set =
+         &input_program->descriptor_sets[descriptor_index];
+
+      bool last_DMA = (++running_dma_count == total_dma_count);
+      bool halt = last_DMA && !input_program->secondary_program_present;
+
+      descriptor_set_entry =
+         pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                              sizeof(*descriptor_set_entry));
+      descriptor_set_entry->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET;
+      descriptor_set_entry->const_offset = next_const64 * 2;
+      descriptor_set_entry->descriptor_set = descriptor_set->descriptor_set;
+      descriptor_set_entry->primary = descriptor_set->primary;
+      descriptor_set_entry->offset_in_dwords = descriptor_set->offset_in_dwords;
+
+      PVR_PDS_MODE_TOGGLE(code_section,
+                          instruction,
+                          pvr_encode_burst_cs(&entry_write_state,
+                                              last_DMA,
+                                              halt,
+                                              next_const32,
+                                              next_const64,
+                                              descriptor_set->size_in_dwords,
+                                              descriptor_set->destination));
+
+      next_const64++;
+      next_const32++;
+   }
+
+   for (unsigned int index = 0; index < input_program->buffer_count; index++) {
+      struct pvr_pds_buffer *buffer = &input_program->buffers[index];
+
+      bool last_DMA = (++running_dma_count == total_dma_count);
+      bool halt = last_DMA && !input_program->secondary_program_present;
+
+      switch (buffer->type) {
+      case PVR_BUFFER_TYPE_PUSH_CONSTS: {
+         struct pvr_const_map_entry_special_buffer *special_buffer_entry;
+
+         special_buffer_entry =
+            pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                                 sizeof(*special_buffer_entry));
+         special_buffer_entry->type =
+            PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
+         special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_PUSH_CONSTS;
+         special_buffer_entry->buffer_index = buffer->source_offset;
+         break;
+      }
+      case PVR_BUFFER_TYPE_DYNAMIC: {
+         struct pvr_const_map_entry_special_buffer *special_buffer_entry;
+
+         special_buffer_entry =
+            pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                                 sizeof(*special_buffer_entry));
+         special_buffer_entry->type =
+            PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
+         special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_DYNAMIC;
+         special_buffer_entry->buffer_index = buffer->source_offset;
+         break;
+      }
+      case PVR_BUFFER_TYPES_COMPILE_TIME: {
+         struct pvr_const_map_entry_special_buffer *special_buffer_entry;
+
+         special_buffer_entry =
+            pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                                 sizeof(*special_buffer_entry));
+         special_buffer_entry->type =
+            PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
+         special_buffer_entry->buffer_type = PVR_BUFFER_TYPES_COMPILE_TIME;
+         special_buffer_entry->buffer_index = compile_time_buffer_index++;
+         break;
+      }
+      case PVR_BUFFER_TYPES_BUFFER_LENGTHS: {
+         struct pvr_const_map_entry_special_buffer *special_buffer_entry;
+
+         special_buffer_entry =
+            pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                                 sizeof(*special_buffer_entry));
+         special_buffer_entry->type =
+            PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
+         special_buffer_entry->buffer_type = PVR_BUFFER_TYPES_BUFFER_LENGTHS;
+         break;
+      }
+      case PVR_BUFFER_TYPE_BLEND_CONSTS: {
+         struct pvr_const_map_entry_special_buffer *special_buffer_entry;
+
+         special_buffer_entry =
+            pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                                 sizeof(*special_buffer_entry));
+         special_buffer_entry->type =
+            PVR_PDS_CONST_MAP_ENTRY_TYPE_SPECIAL_BUFFER;
+         special_buffer_entry->buffer_type = PVR_BUFFER_TYPE_BLEND_CONSTS;
+         special_buffer_entry->buffer_index =
+            input_program->blend_constants_used_mask;
+         break;
+      }
+      case PVR_BUFFER_TYPE_UBO: {
+         struct pvr_const_map_entry_constant_buffer *constant_buffer_entry;
+
+         constant_buffer_entry = pvr_prepare_next_pds_const_map_entry(
+            &entry_write_state,
+            sizeof(*constant_buffer_entry));
+         constant_buffer_entry->type =
+            PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER;
+         constant_buffer_entry->buffer_id = buffer->buffer_id;
+         constant_buffer_entry->desc_set = buffer->desc_set;
+         constant_buffer_entry->binding = buffer->binding;
+         constant_buffer_entry->offset = buffer->source_offset;
+         constant_buffer_entry->size_in_dwords = buffer->size_in_dwords;
+         break;
+      }
+      case PVR_BUFFER_TYPES_UBO_ZEROING: {
+         struct pvr_const_map_entry_constant_buffer_zeroing
+            *constant_buffer_entry;
+
+         constant_buffer_entry = pvr_prepare_next_pds_const_map_entry(
+            &entry_write_state,
+            sizeof(*constant_buffer_entry));
+         constant_buffer_entry->type =
+            PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER_ZEROING;
+         constant_buffer_entry->buffer_id = buffer->buffer_id;
+         constant_buffer_entry->offset = buffer->source_offset;
+         constant_buffer_entry->size_in_dwords = buffer->size_in_dwords;
+         break;
+      }
+      }
+
+      entry_write_state.entry->const_offset = next_const64 * 2;
+
+      PVR_PDS_MODE_TOGGLE(code_section,
+                          instruction,
+                          pvr_encode_burst_cs(&entry_write_state,
+                                              last_DMA,
+                                              halt,
+                                              next_const32,
+                                              next_const64,
+                                              buffer->size_in_dwords,
+                                              buffer->destination));
+
+      next_const64++;
+      next_const32++;
+   }
+
+   if (total_dma_count != running_dma_count)
+      fprintf(stderr, "Mismatch in DMA count\n");
+
+   if (input_program->secondary_program_present) {
+      struct pvr_const_map_entry_doutu_address *doutu_address;
+
+      PVR_PDS_MODE_TOGGLE(code_section,
+                          instruction,
+                          pvr_pds_encode_doutu(false, true, next_const64));
+
+      doutu_address =
+         pvr_prepare_next_pds_const_map_entry(&entry_write_state,
+                                              sizeof(*doutu_address));
+      doutu_address->type = PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS;
+      doutu_address->const_offset = next_const64 * 2;
+      doutu_address->doutu_control = input_program->secondary_task_control.src0;
+
+      next_const64++;
+   }
+
+   if (instruction == 0 && input_program->must_not_be_empty) {
+      PVR_PDS_MODE_TOGGLE(code_section,
+                          instruction,
+                          pvr_pds_inst_encode_halt(
+                             /* cc */ false));
+   }
+
+   info->entry_count = entry_write_state.entry_count;
+   info->entries_written_size_in_bytes =
+      entry_write_state.entries_size_in_bytes;
+   info->code_size_in_dwords = instruction;
+}
diff --git a/src/imagination/vulkan/pvr_blit.c b/src/imagination/vulkan/pvr_blit.c
new file mode 100644 (file)
index 0000000..142f98b
--- /dev/null
@@ -0,0 +1,147 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_csb.h"
+#include "pvr_private.h"
+#include "util/list.h"
+#include "vk_alloc.h"
+#include "vk_command_buffer.h"
+#include "vk_command_pool.h"
+#include "vk_log.h"
+
+/* TODO: Investigate where this limit comes from. */
+#define PVR_MAX_TRANSFER_SIZE_IN_TEXELS 2048U
+
+void pvr_CmdBlitImage2KHR(VkCommandBuffer commandBuffer,
+                          const VkBlitImageInfo2KHR *pBlitImageInfo)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdCopyImageToBuffer2KHR(
+   VkCommandBuffer commandBuffer,
+   const VkCopyImageToBufferInfo2KHR *pCopyImageToBufferInfo)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,
+                          const VkCopyImageInfo2KHR *pCopyImageInfo)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
+                         VkBuffer dstBuffer,
+                         VkDeviceSize dstOffset,
+                         VkDeviceSize dataSize,
+                         const void *pData)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdFillBuffer(VkCommandBuffer commandBuffer,
+                       VkBuffer dstBuffer,
+                       VkDeviceSize dstOffset,
+                       VkDeviceSize fillSize,
+                       uint32_t data)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdCopyBufferToImage2KHR(
+   VkCommandBuffer commandBuffer,
+   const VkCopyBufferToImageInfo2KHR *pCopyBufferToImageInfo)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdClearColorImage(VkCommandBuffer commandBuffer,
+                            VkImage _image,
+                            VkImageLayout imageLayout,
+                            const VkClearColorValue *pColor,
+                            uint32_t rangeCount,
+                            const VkImageSubresourceRange *pRanges)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
+                                   VkImage image_h,
+                                   VkImageLayout imageLayout,
+                                   const VkClearDepthStencilValue *pDepthStencil,
+                                   uint32_t rangeCount,
+                                   const VkImageSubresourceRange *pRanges)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
+                           const VkCopyBufferInfo2KHR *pCopyBufferInfo)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferInfo->srcBuffer);
+   PVR_FROM_HANDLE(pvr_buffer, dst, pCopyBufferInfo->dstBuffer);
+   const size_t regions_size =
+      pCopyBufferInfo->regionCount * sizeof(*pCopyBufferInfo->pRegions);
+   struct pvr_transfer_cmd *transfer_cmd;
+
+   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+   transfer_cmd = vk_alloc(&cmd_buffer->vk.pool->alloc,
+                           sizeof(*transfer_cmd) + regions_size,
+                           8U,
+                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!transfer_cmd) {
+      cmd_buffer->state.status =
+         vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+      return;
+   }
+
+   transfer_cmd->src = src;
+   transfer_cmd->dst = dst;
+   transfer_cmd->region_count = pCopyBufferInfo->regionCount;
+   memcpy(transfer_cmd->regions, pCopyBufferInfo->pRegions, regions_size);
+
+   pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
+}
+
+void pvr_CmdClearAttachments(VkCommandBuffer commandBuffer,
+                             uint32_t attachmentCount,
+                             const VkClearAttachment *pAttachments,
+                             uint32_t rectCount,
+                             const VkClearRect *pRects)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
+                             const VkResolveImageInfo2KHR *pResolveImageInfo)
+{
+   assert(!"Unimplemented");
+}
diff --git a/src/imagination/vulkan/pvr_bo.c b/src/imagination/vulkan/pvr_bo.c
new file mode 100644 (file)
index 0000000..a745080
--- /dev/null
@@ -0,0 +1,199 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_bo.h"
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+
+static uint32_t pvr_bo_alloc_to_winsys_flags(uint64_t flags)
+{
+   uint32_t ws_flags = 0;
+
+   if (flags & PVR_BO_ALLOC_FLAG_CPU_ACCESS)
+      ws_flags |= PVR_WINSYS_BO_FLAG_CPU_ACCESS;
+
+   if (flags & PVR_BO_ALLOC_FLAG_GPU_UNCACHED)
+      ws_flags |= PVR_WINSYS_BO_FLAG_GPU_UNCACHED;
+
+   if (flags & PVR_BO_ALLOC_FLAG_PM_FW_PROTECT)
+      ws_flags |= PVR_WINSYS_BO_FLAG_PM_FW_PROTECT;
+
+   if (flags & PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC)
+      ws_flags |= PVR_WINSYS_BO_FLAG_ZERO_ON_ALLOC;
+
+   return ws_flags;
+}
+
+/**
+ * \brief Helper interface to allocate a GPU buffer and map it to both host and
+ * device virtual memory. Host mapping is conditional and is controlled by
+ * flags.
+ *
+ * \param[in] device      Logical device pointer.
+ * \param[in] heap        Heap to allocate device virtual address from.
+ * \param[in] size        Size of buffer to allocate.
+ * \param[in] alignment   Required alignment of the allocation. Must be a power
+ *                        of two.
+ * \param[in] flags       Controls allocation, CPU and GPU mapping behavior
+ *                        using PVR_BO_ALLOC_FLAG_*.
+ * \param[out] pvr_bo_out On success output buffer is returned in this pointer.
+ * \return VK_SUCCESS on success, or error code otherwise.
+ *
+ * \sa #pvr_bo_free()
+ */
+VkResult pvr_bo_alloc(struct pvr_device *device,
+                      struct pvr_winsys_heap *heap,
+                      uint64_t size,
+                      uint64_t alignment,
+                      uint64_t flags,
+                      struct pvr_bo **const pvr_bo_out)
+{
+   const uint32_t ws_flags = pvr_bo_alloc_to_winsys_flags(flags);
+   struct pvr_bo *pvr_bo;
+   pvr_dev_addr_t addr;
+   VkResult result;
+
+   pvr_bo = vk_alloc(&device->vk.alloc,
+                     sizeof(*pvr_bo),
+                     8,
+                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!pvr_bo)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   result = device->ws->ops->buffer_create(device->ws,
+                                           size,
+                                           alignment,
+                                           PVR_WINSYS_BO_TYPE_GPU,
+                                           ws_flags,
+                                           &pvr_bo->bo);
+   if (result != VK_SUCCESS)
+      goto err_vk_free;
+
+   if (flags & PVR_BO_ALLOC_FLAG_CPU_MAPPED) {
+      void *map = device->ws->ops->buffer_map(pvr_bo->bo);
+      if (!map) {
+         result = VK_ERROR_MEMORY_MAP_FAILED;
+         goto err_buffer_destroy;
+      }
+   }
+
+   pvr_bo->vma = device->ws->ops->heap_alloc(heap, size, alignment);
+   if (!pvr_bo->vma) {
+      result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+      goto err_buffer_unmap;
+   }
+
+   addr = device->ws->ops->vma_map(pvr_bo->vma, pvr_bo->bo, 0, size);
+   if (!addr.addr) {
+      result = VK_ERROR_MEMORY_MAP_FAILED;
+      goto err_heap_free;
+   }
+
+   *pvr_bo_out = pvr_bo;
+
+   return VK_SUCCESS;
+
+err_heap_free:
+   device->ws->ops->heap_free(pvr_bo->vma);
+
+err_buffer_unmap:
+   if (flags & PVR_BO_ALLOC_FLAG_CPU_MAPPED)
+      device->ws->ops->buffer_unmap(pvr_bo->bo);
+
+err_buffer_destroy:
+   device->ws->ops->buffer_destroy(pvr_bo->bo);
+
+err_vk_free:
+   vk_free(&device->vk.alloc, pvr_bo);
+
+   return result;
+}
+
+/**
+ * \brief Interface to map the buffer into host virtual address space.
+ *
+ * Buffer should have been created with the #PVR_BO_ALLOC_FLAG_CPU_ACCESS
+ * flag. It should also not already be mapped or it should have been unmapped
+ * using #pvr_bo_cpu_unmap() before mapping again.
+ *
+ * \param[in] device Logical device pointer.
+ * \param[in] pvr_bo Buffer to map.
+ * \return Valid host virtual address on success, or NULL otherwise.
+ *
+ * \sa #pvr_bo_alloc(), #PVR_BO_ALLOC_FLAG_CPU_MAPPED
+ */
+void *pvr_bo_cpu_map(struct pvr_device *device, struct pvr_bo *pvr_bo)
+{
+   assert(!pvr_bo->bo->map);
+
+   return device->ws->ops->buffer_map(pvr_bo->bo);
+}
+
+/**
+ * \brief Interface to unmap the buffer from host virtual address space.
+ *
+ * Buffer should have a valid mapping, created either using #pvr_bo_cpu_map() or
+ * by passing #PVR_BO_ALLOC_FLAG_CPU_MAPPED flag to #pvr_bo_alloc() at
+ * allocation time.
+ *
+ * Buffer can be remapped using #pvr_bo_cpu_map().
+ *
+ * \param[in] device Logical device pointer.
+ * \param[in] pvr_bo Buffer to unmap.
+ */
+void pvr_bo_cpu_unmap(struct pvr_device *device, struct pvr_bo *pvr_bo)
+{
+   assert(pvr_bo->bo->map);
+   device->ws->ops->buffer_unmap(pvr_bo->bo);
+}
+
+/**
+ * \brief Interface to free the buffer object.
+ *
+ * \param[in] device Logical device pointer.
+ * \param[in] pvr_bo Buffer to free.
+ *
+ * \sa #pvr_bo_alloc()
+ */
+void pvr_bo_free(struct pvr_device *device, struct pvr_bo *pvr_bo)
+{
+   if (!pvr_bo)
+      return;
+
+   device->ws->ops->vma_unmap(pvr_bo->vma);
+   device->ws->ops->heap_free(pvr_bo->vma);
+
+   if (pvr_bo->bo->map)
+      device->ws->ops->buffer_unmap(pvr_bo->bo);
+
+   device->ws->ops->buffer_destroy(pvr_bo->bo);
+
+   vk_free(&device->vk.alloc, pvr_bo);
+}
diff --git a/src/imagination/vulkan/pvr_bo.h b/src/imagination/vulkan/pvr_bo.h
new file mode 100644 (file)
index 0000000..2a00c24
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_BO_H
+#define PVR_BO_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "util/list.h"
+#include "util/macros.h"
+
+struct pvr_device;
+struct pvr_winsys_bo;
+struct pvr_winsys_vma;
+struct pvr_winsys_heap;
+
+struct pvr_bo {
+   /* Since multiple components (csb, caching logic, etc) can make use of
+    * linking buffers in a list, we add 'link' in pvr_bo to avoid an extra
+    * level of structure inheritance. It's the responsibility of the buffer
+    * user to manage the list and remove the buffer from the list before
+    * freeing it.
+    */
+   struct list_head link;
+
+   struct pvr_winsys_bo *bo;
+   struct pvr_winsys_vma *vma;
+};
+
+/**
+ * \brief Flag passed to #pvr_bo_alloc() to indicate that the buffer should be
+ * CPU accessible. This is required in order to map a buffer with
+ * #pvr_bo_cpu_map().
+ */
+#define PVR_BO_ALLOC_FLAG_CPU_ACCESS BITFIELD_BIT(0U)
+/**
+ * \brief Flag passed to #pvr_bo_alloc() to indicate that the buffer should
+ * be mapped to the CPU. Implies #PVR_BO_ALLOC_FLAG_CPU_ACCESS.
+ */
+#define PVR_BO_ALLOC_FLAG_CPU_MAPPED \
+   (BITFIELD_BIT(1U) | PVR_BO_ALLOC_FLAG_CPU_ACCESS)
+/**
+ * \brief Flag passed to #pvr_bo_alloc() to indicate that the buffer should be
+ * mapped to the GPU as uncached.
+ */
+#define PVR_BO_ALLOC_FLAG_GPU_UNCACHED BITFIELD_BIT(2U)
+/**
+ * \brief Flag passed to #pvr_bo_alloc() to indicate that the buffer GPU mapping
+ * should be restricted to only allow access to the Parameter Manager unit and
+ * firmware processor.
+ */
+#define PVR_BO_ALLOC_FLAG_PM_FW_PROTECT BITFIELD_BIT(3U)
+/**
+ * \brief Flag passed to #pvr_bo_alloc() to indicate that the buffer should be
+ * zeroed at allocation time.
+ */
+#define PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC BITFIELD_BIT(4U)
+
+VkResult pvr_bo_alloc(struct pvr_device *device,
+                      struct pvr_winsys_heap *heap,
+                      uint64_t size,
+                      uint64_t alignment,
+                      uint64_t flags,
+                      struct pvr_bo **const bo_out);
+void *pvr_bo_cpu_map(struct pvr_device *device, struct pvr_bo *bo);
+void pvr_bo_cpu_unmap(struct pvr_device *device, struct pvr_bo *bo);
+void pvr_bo_free(struct pvr_device *device, struct pvr_bo *bo);
+
+#endif /* PVR_BO_H */
diff --git a/src/imagination/vulkan/pvr_cmd_buffer.c b/src/imagination/vulkan/pvr_cmd_buffer.c
new file mode 100644 (file)
index 0000000..d167745
--- /dev/null
@@ -0,0 +1,4602 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <vulkan/vulkan.h>
+
+#include "c11_compat.h"
+#include "hwdef/rogue_hw_defs.h"
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_bo.h"
+#include "pvr_csb.h"
+#include "pvr_device_info.h"
+#include "pvr_end_of_tile.h"
+#include "pvr_formats.h"
+#include "pvr_hw_pass.h"
+#include "pvr_job_common.h"
+#include "pvr_job_render.h"
+#include "pvr_limits.h"
+#include "pvr_pds.h"
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+#include "util/compiler.h"
+#include "util/list.h"
+#include "util/macros.h"
+#include "util/u_dynarray.h"
+#include "util/u_pack_color.h"
+#include "vk_alloc.h"
+#include "vk_command_buffer.h"
+#include "vk_command_pool.h"
+#include "vk_format.h"
+#include "vk_log.h"
+#include "vk_object.h"
+#include "vk_util.h"
+
+/* Structure used to pass data into pvr_compute_generate_control_stream()
+ * function.
+ */
+struct pvr_compute_kernel_info {
+   pvr_dev_addr_t indirect_buffer_addr;
+   bool global_offsets_present;
+   uint32_t usc_common_size;
+   uint32_t usc_unified_size;
+   uint32_t pds_temp_size;
+   uint32_t pds_data_size;
+   bool usc_target_any;
+   bool is_fence;
+   uint32_t pds_data_offset;
+   uint32_t pds_code_offset;
+   enum PVRX(CDMCTRL_SD_TYPE) sd_type;
+   bool usc_common_shared;
+   uint32_t local_size[3];
+   uint32_t global_size[3];
+   uint32_t max_instances;
+};
+
+static void pvr_cmd_buffer_free_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
+                                        struct pvr_sub_cmd *sub_cmd)
+{
+   switch (sub_cmd->type) {
+   case PVR_SUB_CMD_TYPE_GRAPHICS:
+      pvr_csb_finish(&sub_cmd->gfx.control_stream);
+      pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.depth_bias_bo);
+      pvr_bo_free(cmd_buffer->device, sub_cmd->gfx.scissor_bo);
+      break;
+
+   case PVR_SUB_CMD_TYPE_COMPUTE:
+      pvr_csb_finish(&sub_cmd->compute.control_stream);
+      break;
+
+   case PVR_SUB_CMD_TYPE_TRANSFER:
+      list_for_each_entry_safe (struct pvr_transfer_cmd,
+                                transfer_cmd,
+                                &sub_cmd->transfer.transfer_cmds,
+                                link) {
+         list_del(&transfer_cmd->link);
+         vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
+      }
+      break;
+
+   default:
+      pvr_finishme("Unsupported sub-command type %d", sub_cmd->type);
+      break;
+   }
+
+   list_del(&sub_cmd->link);
+   vk_free(&cmd_buffer->vk.pool->alloc, sub_cmd);
+}
+
+static void pvr_cmd_buffer_free_sub_cmds(struct pvr_cmd_buffer *cmd_buffer)
+{
+   list_for_each_entry_safe (struct pvr_sub_cmd,
+                             sub_cmd,
+                             &cmd_buffer->sub_cmds,
+                             link) {
+      pvr_cmd_buffer_free_sub_cmd(cmd_buffer, sub_cmd);
+   }
+}
+
+static void pvr_cmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer)
+{
+   struct pvr_cmd_buffer *cmd_buffer =
+      container_of(vk_cmd_buffer, struct pvr_cmd_buffer, vk);
+
+   vk_free(&cmd_buffer->vk.pool->alloc,
+           cmd_buffer->state.render_pass_info.attachments);
+   vk_free(&cmd_buffer->vk.pool->alloc,
+           cmd_buffer->state.render_pass_info.clear_values);
+
+   pvr_cmd_buffer_free_sub_cmds(cmd_buffer);
+
+   list_for_each_entry_safe (struct pvr_bo, bo, &cmd_buffer->bo_list, link) {
+      list_del(&bo->link);
+      pvr_bo_free(cmd_buffer->device, bo);
+   }
+
+   util_dynarray_fini(&cmd_buffer->scissor_array);
+   util_dynarray_fini(&cmd_buffer->depth_bias_array);
+
+   vk_command_buffer_finish(&cmd_buffer->vk);
+   vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer);
+}
+
+static VkResult pvr_cmd_buffer_create(struct pvr_device *device,
+                                      struct vk_command_pool *pool,
+                                      VkCommandBufferLevel level,
+                                      VkCommandBuffer *pCommandBuffer)
+{
+   struct pvr_cmd_buffer *cmd_buffer;
+   VkResult result;
+
+   cmd_buffer = vk_zalloc(&pool->alloc,
+                          sizeof(*cmd_buffer),
+                          8U,
+                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!cmd_buffer)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   result = vk_command_buffer_init(&cmd_buffer->vk, pool, level);
+   if (result != VK_SUCCESS) {
+      vk_free(&pool->alloc, cmd_buffer);
+      return result;
+   }
+
+   cmd_buffer->vk.destroy = pvr_cmd_buffer_destroy;
+   cmd_buffer->device = device;
+
+   util_dynarray_init(&cmd_buffer->depth_bias_array, NULL);
+   util_dynarray_init(&cmd_buffer->scissor_array, NULL);
+
+   cmd_buffer->state.status = VK_SUCCESS;
+   cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INITIAL;
+
+   list_inithead(&cmd_buffer->sub_cmds);
+   list_inithead(&cmd_buffer->bo_list);
+
+   *pCommandBuffer = pvr_cmd_buffer_to_handle(cmd_buffer);
+
+   return VK_SUCCESS;
+}
+
+VkResult
+pvr_AllocateCommandBuffers(VkDevice _device,
+                           const VkCommandBufferAllocateInfo *pAllocateInfo,
+                           VkCommandBuffer *pCommandBuffers)
+{
+   VK_FROM_HANDLE(vk_command_pool, pool, pAllocateInfo->commandPool);
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   VkResult result = VK_SUCCESS;
+   uint32_t i;
+
+   for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
+      result = pvr_cmd_buffer_create(device,
+                                     pool,
+                                     pAllocateInfo->level,
+                                     &pCommandBuffers[i]);
+      if (result != VK_SUCCESS)
+         break;
+   }
+
+   if (result != VK_SUCCESS) {
+      while (i--) {
+         VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, pCommandBuffers[i]);
+         pvr_cmd_buffer_destroy(cmd_buffer);
+      }
+
+      for (i = 0; i < pAllocateInfo->commandBufferCount; i++)
+         pCommandBuffers[i] = VK_NULL_HANDLE;
+   }
+
+   return result;
+}
+
+static void pvr_cmd_buffer_update_barriers(struct pvr_cmd_buffer *cmd_buffer,
+                                           enum pvr_sub_cmd_type type)
+{
+   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+   uint32_t barriers;
+
+   switch (type) {
+   case PVR_SUB_CMD_TYPE_GRAPHICS:
+      barriers = PVR_PIPELINE_STAGE_GEOM_BIT | PVR_PIPELINE_STAGE_FRAG_BIT;
+      break;
+
+   case PVR_SUB_CMD_TYPE_COMPUTE:
+      barriers = PVR_PIPELINE_STAGE_COMPUTE_BIT;
+      break;
+
+   case PVR_SUB_CMD_TYPE_TRANSFER:
+      barriers = PVR_PIPELINE_STAGE_TRANSFER_BIT;
+      break;
+
+   default:
+      barriers = 0;
+      pvr_finishme("Unsupported sub-command type %d", type);
+      break;
+   }
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(state->barriers_needed); i++)
+      state->barriers_needed[i] |= barriers;
+}
+
+static VkResult pvr_cmd_buffer_upload_tables(struct pvr_device *device,
+                                             struct pvr_cmd_buffer *cmd_buffer)
+{
+   struct pvr_sub_cmd *sub_cmd = cmd_buffer->state.current_sub_cmd;
+   const uint32_t cache_line_size =
+      rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+   VkResult result;
+
+   assert(!sub_cmd->gfx.depth_bias_bo && !sub_cmd->gfx.scissor_bo);
+
+   if (cmd_buffer->depth_bias_array.size > 0) {
+      result =
+         pvr_gpu_upload(device,
+                        device->heaps.general_heap,
+                        util_dynarray_begin(&cmd_buffer->depth_bias_array),
+                        cmd_buffer->depth_bias_array.size,
+                        cache_line_size,
+                        &sub_cmd->gfx.depth_bias_bo);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
+   if (cmd_buffer->scissor_array.size > 0) {
+      result = pvr_gpu_upload(device,
+                              device->heaps.general_heap,
+                              util_dynarray_begin(&cmd_buffer->scissor_array),
+                              cmd_buffer->scissor_array.size,
+                              cache_line_size,
+                              &sub_cmd->gfx.scissor_bo);
+      if (result != VK_SUCCESS)
+         goto err_free_depth_bias_bo;
+   }
+
+   util_dynarray_clear(&cmd_buffer->depth_bias_array);
+   util_dynarray_clear(&cmd_buffer->scissor_array);
+
+   return VK_SUCCESS;
+
+err_free_depth_bias_bo:
+   pvr_bo_free(device, sub_cmd->gfx.depth_bias_bo);
+   sub_cmd->gfx.depth_bias_bo = NULL;
+
+   return result;
+}
+
+static VkResult pvr_cmd_buffer_emit_ppp_state(struct pvr_cmd_buffer *cmd_buffer)
+{
+   struct pvr_sub_cmd *sub_cmd = cmd_buffer->state.current_sub_cmd;
+   struct pvr_framebuffer *framebuffer =
+      cmd_buffer->state.render_pass_info.framebuffer;
+
+   pvr_csb_emit (&sub_cmd->gfx.control_stream, VDMCTRL_PPP_STATE0, state0) {
+      state0.addrmsb = framebuffer->ppp_state_bo->vma->dev_addr;
+      state0.word_count = framebuffer->ppp_state_size;
+   }
+
+   pvr_csb_emit (&sub_cmd->gfx.control_stream, VDMCTRL_PPP_STATE1, state1) {
+      state1.addrlsb = framebuffer->ppp_state_bo->vma->dev_addr;
+   }
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+pvr_cmd_buffer_upload_general(struct pvr_cmd_buffer *const cmd_buffer,
+                              const void *const data,
+                              const size_t size,
+                              struct pvr_bo **const pvr_bo_out)
+{
+   struct pvr_device *const device = cmd_buffer->device;
+   const uint32_t cache_line_size =
+      rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+   struct pvr_bo *pvr_bo;
+   VkResult result;
+
+   result = pvr_gpu_upload(device,
+                           device->heaps.general_heap,
+                           data,
+                           size,
+                           cache_line_size,
+                           &pvr_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   list_add(&pvr_bo->link, &cmd_buffer->bo_list);
+
+   *pvr_bo_out = pvr_bo;
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+pvr_cmd_buffer_upload_usc(struct pvr_cmd_buffer *const cmd_buffer,
+                          const void *const code,
+                          const size_t code_size,
+                          uint64_t code_alignment,
+                          struct pvr_bo **const pvr_bo_out)
+{
+   struct pvr_device *const device = cmd_buffer->device;
+   const uint32_t cache_line_size =
+      rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+   struct pvr_bo *pvr_bo;
+   VkResult result;
+
+   code_alignment = MAX2(code_alignment, cache_line_size);
+
+   result =
+      pvr_gpu_upload_usc(device, code, code_size, code_alignment, &pvr_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   list_add(&pvr_bo->link, &cmd_buffer->bo_list);
+
+   *pvr_bo_out = pvr_bo;
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+pvr_cmd_buffer_upload_pds(struct pvr_cmd_buffer *const cmd_buffer,
+                          const uint32_t *data,
+                          uint32_t data_size_dwords,
+                          uint32_t data_alignment,
+                          const uint32_t *code,
+                          uint32_t code_size_dwords,
+                          uint32_t code_alignment,
+                          uint64_t min_alignment,
+                          struct pvr_pds_upload *const pds_upload_out)
+{
+   struct pvr_device *const device = cmd_buffer->device;
+   VkResult result;
+
+   result = pvr_gpu_upload_pds(device,
+                               data,
+                               data_size_dwords,
+                               data_alignment,
+                               code,
+                               code_size_dwords,
+                               code_alignment,
+                               min_alignment,
+                               pds_upload_out);
+   if (result != VK_SUCCESS)
+      return result;
+
+   list_add(&pds_upload_out->pvr_bo->link, &cmd_buffer->bo_list);
+
+   return VK_SUCCESS;
+}
+
+static inline VkResult
+pvr_cmd_buffer_upload_pds_data(struct pvr_cmd_buffer *const cmd_buffer,
+                               const uint32_t *data,
+                               uint32_t data_size_dwords,
+                               uint32_t data_alignment,
+                               struct pvr_pds_upload *const pds_upload_out)
+{
+   return pvr_cmd_buffer_upload_pds(cmd_buffer,
+                                    data,
+                                    data_size_dwords,
+                                    data_alignment,
+                                    NULL,
+                                    0,
+                                    0,
+                                    data_alignment,
+                                    pds_upload_out);
+}
+
+static VkResult pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
+   struct pvr_cmd_buffer *const cmd_buffer,
+   const uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
+   struct pvr_pds_upload *const pds_upload_out)
+{
+   struct pvr_pds_event_program pixel_event_program = {
+      /* No data to DMA, just a DOUTU needed. */
+      .num_emit_word_pairs = 0,
+   };
+   const uint32_t staging_buffer_size =
+      cmd_buffer->device->pixel_event_data_size_in_dwords * sizeof(uint32_t);
+   const VkAllocationCallbacks *const allocator = &cmd_buffer->vk.pool->alloc;
+   struct pvr_device *const device = cmd_buffer->device;
+   /* FIXME: This should come from the compiler for the USC pixel program. */
+   const uint32_t usc_temp_count = 0;
+   struct pvr_bo *usc_eot_program;
+   uint8_t *usc_eot_program_ptr;
+   uint32_t *staging_buffer;
+   VkResult result;
+
+   result = pvr_cmd_buffer_upload_usc(cmd_buffer,
+                                      pvr_end_of_tile_program,
+                                      sizeof(pvr_end_of_tile_program),
+                                      4,
+                                      &usc_eot_program);
+   if (result != VK_SUCCESS)
+      return result;
+
+   assert((pbe_cs_words[1] & 0x3F) == 0x20);
+
+   /* FIXME: Stop patching the framebuffer address (this will require the
+    * end-of-tile program to be generated at run-time).
+    */
+   pvr_bo_cpu_map(device, usc_eot_program);
+   usc_eot_program_ptr = usc_eot_program->bo->map;
+   usc_eot_program_ptr[6] = (pbe_cs_words[0] >> 0) & 0xFF;
+   usc_eot_program_ptr[7] = (pbe_cs_words[0] >> 8) & 0xFF;
+   usc_eot_program_ptr[8] = (pbe_cs_words[0] >> 16) & 0xFF;
+   usc_eot_program_ptr[9] = (pbe_cs_words[0] >> 24) & 0xFF;
+   pvr_bo_cpu_unmap(device, usc_eot_program);
+
+   pvr_pds_setup_doutu(&pixel_event_program.task_control,
+                       usc_eot_program->vma->dev_addr.addr,
+                       usc_temp_count,
+                       PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
+                       false);
+
+   /* TODO: We could skip allocating this and generate directly into the device
+    * buffer thus removing one allocation and memcpy() per job. Would this
+    * speed up things in a noticeable way?
+    */
+   staging_buffer = vk_alloc(allocator,
+                             staging_buffer_size,
+                             8,
+                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!staging_buffer) {
+      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+      goto err_free_usc_pixel_program;
+   }
+
+   /* Generate the data segment. The code segment was uploaded earlier when
+    * setting up the PDS static heap data.
+    */
+   pvr_pds_generate_pixel_event_data_segment(&pixel_event_program,
+                                             staging_buffer,
+                                             &device->pdevice->dev_info);
+
+   result = pvr_cmd_buffer_upload_pds_data(
+      cmd_buffer,
+      staging_buffer,
+      cmd_buffer->device->pixel_event_data_size_in_dwords,
+      4,
+      pds_upload_out);
+   if (result != VK_SUCCESS)
+      goto err_free_pixel_event_staging_buffer;
+
+   vk_free(allocator, staging_buffer);
+
+   return VK_SUCCESS;
+
+err_free_pixel_event_staging_buffer:
+   vk_free(allocator, staging_buffer);
+
+err_free_usc_pixel_program:
+   list_del(&usc_eot_program->link);
+   pvr_bo_free(device, usc_eot_program);
+
+   return result;
+}
+
+static uint32_t pvr_get_hw_clear_color(VkFormat vk_format,
+                                       const VkClearValue *clear_value)
+{
+   union util_color uc = { .ui = 0 };
+
+   switch (vk_format) {
+   case VK_FORMAT_B8G8R8A8_UNORM:
+      util_pack_color(clear_value->color.float32,
+                      PIPE_FORMAT_R8G8B8A8_UNORM,
+                      &uc);
+      break;
+
+   default:
+      assert(!"Unsupported format");
+      uc.ui[0] = 0;
+      break;
+   }
+
+   return uc.ui[0];
+}
+
+static VkResult
+pvr_load_op_constants_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
+                                        uint32_t idx,
+                                        pvr_dev_addr_t *const addr_out)
+{
+   const struct pvr_render_pass_info *render_pass_info =
+      &cmd_buffer->state.render_pass_info;
+   const struct pvr_render_pass *pass = render_pass_info->pass;
+   const struct pvr_renderpass_hwsetup_render *hw_render =
+      &pass->hw_setup->renders[idx];
+   ASSERTED const struct pvr_load_op *load_op = hw_render->client_data;
+   const struct pvr_renderpass_colorinit *color_init =
+      &hw_render->color_init[0];
+   const struct pvr_render_pass_attachment *attachment =
+      &pass->attachments[color_init->driver_id];
+   const VkClearValue *clear_value =
+      &render_pass_info->clear_values[color_init->driver_id];
+   uint32_t hw_clear_value;
+   struct pvr_bo *clear_bo;
+   VkResult result;
+
+   pvr_finishme("Add missing load op data support");
+
+   assert(load_op->is_hw_object);
+   assert(hw_render->color_init_count == 1);
+
+   /* FIXME: add support for RENDERPASS_SURFACE_INITOP_LOAD. */
+   assert(color_init->op == RENDERPASS_SURFACE_INITOP_CLEAR);
+
+   /* FIXME: do this at the point we store the clear values? */
+   hw_clear_value = pvr_get_hw_clear_color(attachment->vk_format, clear_value);
+
+   result = pvr_cmd_buffer_upload_general(cmd_buffer,
+                                          &hw_clear_value,
+                                          sizeof(hw_clear_value),
+                                          &clear_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   *addr_out = clear_bo->vma->dev_addr;
+
+   return VK_SUCCESS;
+}
+
+static VkResult pvr_load_op_pds_data_create_and_upload(
+   struct pvr_cmd_buffer *cmd_buffer,
+   uint32_t idx,
+   pvr_dev_addr_t constants_addr,
+   struct pvr_pds_upload *const pds_upload_out)
+{
+   const struct pvr_render_pass_info *render_pass_info =
+      &cmd_buffer->state.render_pass_info;
+   const struct pvr_load_op *load_op =
+      render_pass_info->pass->hw_setup->renders[idx].client_data;
+   struct pvr_device *device = cmd_buffer->device;
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   struct pvr_pds_pixel_shader_sa_program program = { 0 };
+   uint32_t staging_buffer_size;
+   uint32_t *staging_buffer;
+   VkResult result;
+
+   program.num_texture_dma_kicks = 1;
+
+   pvr_csb_pack (&program.texture_dma_address[0],
+                 PDSINST_DOUT_FIELDS_DOUTD_SRC0,
+                 value) {
+      value.sbase = constants_addr;
+   }
+
+   pvr_csb_pack (&program.texture_dma_control[0],
+                 PDSINST_DOUT_FIELDS_DOUTD_SRC1,
+                 value) {
+      value.dest = PVRX(PDSINST_DOUTD_DEST_COMMON_STORE);
+      value.a0 = load_op->shareds_dest_offset;
+      value.bsize = load_op->shareds_count;
+   }
+
+   pvr_pds_set_sizes_pixel_shader_sa_texture_data(&program, dev_info);
+
+   staging_buffer_size = program.data_size * sizeof(*staging_buffer);
+
+   staging_buffer = vk_alloc(&cmd_buffer->vk.pool->alloc,
+                             staging_buffer_size,
+                             8,
+                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!staging_buffer)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   pvr_pds_generate_pixel_shader_sa_texture_state_data(&program,
+                                                       staging_buffer,
+                                                       dev_info);
+
+   result = pvr_cmd_buffer_upload_pds_data(cmd_buffer,
+                                           staging_buffer,
+                                           program.data_size,
+                                           1,
+                                           pds_upload_out);
+   if (result != VK_SUCCESS) {
+      vk_free(&cmd_buffer->vk.pool->alloc, staging_buffer);
+      return result;
+   }
+
+   vk_free(&cmd_buffer->vk.pool->alloc, staging_buffer);
+
+   return VK_SUCCESS;
+}
+
+/* FIXME: Should this function be specific to the HW background object, in
+ * which case its name should be changed, or should it have the load op
+ * structure passed in?
+ */
+static VkResult
+pvr_load_op_data_create_and_upload(struct pvr_cmd_buffer *cmd_buffer,
+                                   uint32_t idx,
+                                   struct pvr_pds_upload *const pds_upload_out)
+{
+   pvr_dev_addr_t constants_addr;
+   VkResult result;
+
+   result =
+      pvr_load_op_constants_create_and_upload(cmd_buffer, idx, &constants_addr);
+   if (result != VK_SUCCESS)
+      return result;
+
+   return pvr_load_op_pds_data_create_and_upload(cmd_buffer,
+                                                 idx,
+                                                 constants_addr,
+                                                 pds_upload_out);
+}
+
+static void pvr_pds_bgnd_pack_state(
+   const struct pvr_load_op *load_op,
+   const struct pvr_pds_upload *load_op_program,
+   uint64_t pds_reg_values[static const ROGUE_NUM_CR_PDS_BGRND_WORDS])
+{
+   pvr_csb_pack (&pds_reg_values[0], CR_PDS_BGRND0_BASE, value) {
+      value.shader_addr.addr = load_op->pds_frag_prog.data_offset;
+      value.texunicode_addr.addr = load_op->pds_tex_state_prog.code_offset;
+   }
+
+   pvr_csb_pack (&pds_reg_values[1], CR_PDS_BGRND1_BASE, value) {
+      value.texturedata_addr.addr = load_op_program->data_offset;
+   }
+
+   pvr_csb_pack (&pds_reg_values[2], CR_PDS_BGRND3_SIZEINFO, value) {
+      value.usc_sharedsize =
+         DIV_ROUND_UP(load_op->const_shareds_count,
+                      PVRX(CR_PDS_BGRND3_SIZEINFO_USC_SHAREDSIZE_UNIT_SIZE));
+      value.pds_texturestatesize = DIV_ROUND_UP(
+         load_op_program->data_size,
+         PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEXTURESTATESIZE_UNIT_SIZE));
+      value.pds_tempsize =
+         DIV_ROUND_UP(load_op->temps_count,
+                      PVRX(CR_PDS_BGRND3_SIZEINFO_PDS_TEMPSIZE_UNIT_SIZE));
+   }
+}
+
+/**
+ * \brief Calculates the stride in pixels based on the pitch in bytes and pixel
+ * format.
+ *
+ * \param[in] pitch     Width pitch in bytes.
+ * \param[in] vk_format Vulkan image format.
+ * \return Stride in pixels.
+ */
+static inline uint32_t pvr_stride_from_pitch(uint32_t pitch, VkFormat vk_format)
+{
+   const unsigned int cpp = vk_format_get_blocksize(vk_format);
+
+   assert(pitch % cpp == 0);
+
+   return pitch / cpp;
+}
+
+static void pvr_setup_pbe_state(
+   struct pvr_device *const device,
+   struct pvr_framebuffer *framebuffer,
+   uint32_t mrt_index,
+   const struct usc_mrt_resource *mrt_resource,
+   const struct pvr_image_view *const iview,
+   const VkRect2D *render_area,
+   const bool down_scale,
+   const uint32_t samples,
+   uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
+   uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS])
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   const struct pvr_image *image = iview->image;
+   uint32_t level_pitch = image->mip_levels[iview->vk.base_mip_level].pitch;
+
+   struct pvr_pbe_surf_params surface_params;
+   struct pvr_pbe_render_params render_params;
+   bool with_packed_usc_channel;
+   const uint8_t *swizzle;
+   uint32_t position;
+
+   /* down_scale should be true when performing a resolve, in which case there
+    * should be more than one sample.
+    */
+   assert((down_scale && samples > 1U) || (!down_scale && samples == 1U));
+
+   /* Setup surface parameters. */
+
+   if (PVR_HAS_FEATURE(dev_info, usc_f16sop_u8)) {
+      switch (iview->vk.format) {
+      case VK_FORMAT_B8G8R8A8_UNORM:
+         with_packed_usc_channel = true;
+         break;
+      case VK_FORMAT_D32_SFLOAT:
+         with_packed_usc_channel = false;
+         break;
+      default:
+         unreachable("Unsupported Vulkan image format");
+      }
+   } else {
+      with_packed_usc_channel = false;
+   }
+
+   swizzle = pvr_get_format_swizzle(iview->vk.format);
+   memcpy(surface_params.swizzle, swizzle, sizeof(surface_params.swizzle));
+
+   pvr_pbe_get_src_format_and_gamma(iview->vk.format,
+                                    PVR_PBE_GAMMA_NONE,
+                                    with_packed_usc_channel,
+                                    &surface_params.source_format,
+                                    &surface_params.gamma);
+
+   surface_params.is_normalized = vk_format_is_normalized(iview->vk.format);
+   surface_params.pbe_packmode = pvr_get_pbe_packmode(iview->vk.format);
+   surface_params.nr_components = vk_format_get_nr_components(iview->vk.format);
+
+   /* FIXME: Should we have an inline function to return the address of a mip
+    * level?
+    */
+   surface_params.addr.addr =
+      image->vma->dev_addr.addr +
+      image->mip_levels[iview->vk.base_mip_level].offset;
+
+   surface_params.mem_layout = image->memlayout;
+   surface_params.stride = pvr_stride_from_pitch(level_pitch, iview->vk.format);
+   surface_params.depth = iview->vk.extent.depth;
+   surface_params.width = iview->vk.extent.width;
+   surface_params.height = iview->vk.extent.height;
+   surface_params.z_only_render = false;
+   surface_params.down_scale = down_scale;
+   surface_params.msaa_mode = samples;
+
+   /* Setup render parameters. */
+
+   if (mrt_resource->type == USC_MRT_RESOURCE_TYPE_MEMORY) {
+      position = mrt_resource->u.mem.offset_in_dwords;
+   } else {
+      assert(mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER);
+      assert(mrt_resource->u.reg.offset == 0);
+
+      position = mrt_resource->u.reg.out_reg;
+   }
+
+   assert(position <= 3 || PVR_HAS_FEATURE(dev_info, eight_output_registers));
+
+   switch (position) {
+   case 0:
+   case 4:
+      render_params.source_start = PVR_PBE_STARTPOS_BIT0;
+      break;
+   case 1:
+   case 5:
+      render_params.source_start = PVR_PBE_STARTPOS_BIT32;
+      break;
+   case 2:
+   case 6:
+      render_params.source_start = PVR_PBE_STARTPOS_BIT64;
+      break;
+   case 3:
+   case 7:
+      render_params.source_start = PVR_PBE_STARTPOS_BIT96;
+      break;
+   default:
+      assert(!"Invalid output register");
+      break;
+   }
+
+   render_params.min_x_clip = MAX2(0, render_area->offset.x);
+   render_params.min_y_clip = MAX2(0, render_area->offset.y);
+   render_params.max_x_clip =
+      MIN2(framebuffer->width,
+           render_area->offset.x + render_area->extent.width) -
+      1;
+   render_params.max_y_clip =
+      MIN2(framebuffer->height,
+           render_area->offset.y + render_area->extent.height) -
+      1;
+
+   render_params.slice = 0;
+   render_params.mrt_index = mrt_index;
+
+   pvr_pbe_pack_state(device,
+                      &surface_params,
+                      &render_params,
+                      pbe_cs_words,
+                      pbe_reg_words);
+}
+
+static struct pvr_render_target *
+pvr_get_render_target(const struct pvr_render_pass *pass,
+                      const struct pvr_framebuffer *framebuffer,
+                      uint32_t idx)
+{
+   const struct pvr_renderpass_hwsetup_render *hw_render =
+      &pass->hw_setup->renders[idx];
+   uint32_t rt_idx = 0;
+
+   switch (hw_render->sample_count) {
+   case 1:
+   case 2:
+   case 4:
+   case 8:
+      rt_idx = util_logbase2(hw_render->sample_count);
+      break;
+
+   default:
+      unreachable("Unsupported sample count");
+      break;
+   }
+
+   return &framebuffer->render_targets[rt_idx];
+}
+
+static uint32_t
+pvr_pass_get_pixel_output_width(const struct pvr_render_pass *pass,
+                                uint32_t idx,
+                                const struct pvr_device_info *dev_info)
+{
+   const struct pvr_renderpass_hwsetup_render *hw_render =
+      &pass->hw_setup->renders[idx];
+   /* Default value based on the maximum value found in all existing cores. The
+    * maximum is used as this is being treated as a lower bound, making it a
+    * "safer" choice than the minimum value found in all existing cores.
+    */
+   const uint32_t min_output_regs =
+      PVR_GET_FEATURE_VALUE(dev_info, usc_min_output_registers_per_pix, 2U);
+   const uint32_t width = MAX2(hw_render->output_regs_count, min_output_regs);
+
+   return util_next_power_of_two(width);
+}
+
+static VkResult pvr_sub_cmd_gfx_job_init(struct pvr_device *device,
+                                         struct pvr_cmd_buffer *cmd_buffer,
+                                         struct pvr_sub_cmd *sub_cmd)
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   struct pvr_render_pass_info *render_pass_info =
+      &cmd_buffer->state.render_pass_info;
+   const struct pvr_renderpass_hwsetup_render *hw_render =
+      &render_pass_info->pass->hw_setup->renders[sub_cmd->gfx.hw_render_idx];
+   struct pvr_render_job *job = &sub_cmd->gfx.job;
+   struct pvr_pds_upload pds_pixel_event_program;
+
+   uint32_t pbe_cs_words[PVR_MAX_COLOR_ATTACHMENTS]
+                        [ROGUE_NUM_PBESTATE_STATE_WORDS];
+   struct pvr_render_target *render_target;
+   VkResult result;
+
+   assert(hw_render->eot_surface_count < ARRAY_SIZE(pbe_cs_words));
+
+   for (uint32_t i = 0; i < hw_render->eot_surface_count; i++) {
+      const struct pvr_renderpass_hwsetup_eot_surface *surface =
+         &hw_render->eot_surfaces[i];
+      const struct pvr_image_view *iview =
+         render_pass_info->attachments[surface->attachment_index];
+      const struct usc_mrt_resource *mrt_resource =
+         &hw_render->eot_setup.mrt_resources[surface->mrt_index];
+      uint32_t samples = 1;
+
+      if (surface->need_resolve)
+         pvr_finishme("Set up job resolve information.");
+
+      pvr_setup_pbe_state(device,
+                          render_pass_info->framebuffer,
+                          surface->mrt_index,
+                          mrt_resource,
+                          iview,
+                          &render_pass_info->render_area,
+                          surface->need_resolve,
+                          samples,
+                          pbe_cs_words[i],
+                          job->pbe_reg_words[i]);
+   }
+
+   /* FIXME: The fragment program only supports a single surface at present. */
+   assert(hw_render->eot_surface_count == 1);
+   result = pvr_sub_cmd_gfx_per_job_fragment_programs_create_and_upload(
+      cmd_buffer,
+      pbe_cs_words[0],
+      &pds_pixel_event_program);
+   if (result != VK_SUCCESS)
+      return result;
+
+   job->pds_pixel_event_data_offset = pds_pixel_event_program.data_offset;
+
+   /* FIXME: Don't do this if there is a barrier load. */
+   if (render_pass_info->enable_bg_tag) {
+      const struct pvr_load_op *load_op = hw_render->client_data;
+      struct pvr_pds_upload load_op_program;
+
+      /* FIXME: Should we free the PDS pixel event data or let it be freed
+       * when the pool gets emptied?
+       */
+      result = pvr_load_op_data_create_and_upload(cmd_buffer,
+                                                  sub_cmd->gfx.hw_render_idx,
+                                                  &load_op_program);
+      if (result != VK_SUCCESS)
+         return result;
+
+      pvr_pds_bgnd_pack_state(load_op,
+                              &load_op_program,
+                              job->pds_bgnd_reg_values);
+   }
+
+   job->enable_bg_tag = render_pass_info->enable_bg_tag;
+   job->process_empty_tiles = render_pass_info->process_empty_tiles;
+
+   render_target = pvr_get_render_target(render_pass_info->pass,
+                                         render_pass_info->framebuffer,
+                                         sub_cmd->gfx.hw_render_idx);
+   job->rt_dataset = render_target->rt_dataset;
+
+   job->ctrl_stream_addr =
+      pvr_csb_get_start_address(&sub_cmd->gfx.control_stream);
+
+   /* FIXME: Need to set up the border color table at device creation
+    * time. Set to invalid for the time being.
+    */
+   job->border_colour_table_addr = PVR_DEV_ADDR_INVALID;
+
+   if (sub_cmd->gfx.depth_bias_bo)
+      job->depth_bias_table_addr = sub_cmd->gfx.depth_bias_bo->vma->dev_addr;
+   else
+      job->depth_bias_table_addr = PVR_DEV_ADDR_INVALID;
+
+   if (sub_cmd->gfx.scissor_bo)
+      job->scissor_table_addr = sub_cmd->gfx.scissor_bo->vma->dev_addr;
+   else
+      job->scissor_table_addr = PVR_DEV_ADDR_INVALID;
+
+   job->pixel_output_width =
+      pvr_pass_get_pixel_output_width(render_pass_info->pass,
+                                      sub_cmd->gfx.hw_render_idx,
+                                      dev_info);
+
+   if (hw_render->ds_surface_id != -1) {
+      struct pvr_image_view *iview =
+         render_pass_info->attachments[hw_render->ds_surface_id];
+      const struct pvr_image *image = iview->image;
+
+      if (vk_format_has_depth(image->vk.format)) {
+         uint32_t level_pitch =
+            image->mip_levels[iview->vk.base_mip_level].pitch;
+
+         /* FIXME: Is this sufficient for depth buffers? */
+         job->depth_addr = image->dev_addr;
+
+         job->depth_stride =
+            pvr_stride_from_pitch(level_pitch, iview->vk.format);
+         job->depth_height = iview->vk.extent.height;
+         job->depth_physical_width =
+            u_minify(image->physical_extent.width, iview->vk.base_mip_level);
+         job->depth_physical_height =
+            u_minify(image->physical_extent.height, iview->vk.base_mip_level);
+         job->depth_layer_size = image->layer_size;
+
+         if (hw_render->ds_surface_id < render_pass_info->clear_value_count) {
+            VkClearValue *clear_values =
+               &render_pass_info->clear_values[hw_render->ds_surface_id];
+
+            job->depth_clear_value = clear_values->depthStencil.depth;
+         } else {
+            job->depth_clear_value = 1.0f;
+         }
+
+         job->depth_vk_format = iview->vk.format;
+
+         job->depth_memlayout = image->memlayout;
+      } else {
+         job->depth_addr = PVR_DEV_ADDR_INVALID;
+         job->depth_stride = 0;
+         job->depth_height = 0;
+         job->depth_physical_width = 0;
+         job->depth_physical_height = 0;
+         job->depth_layer_size = 0;
+         job->depth_clear_value = 1.0f;
+         job->depth_vk_format = VK_FORMAT_UNDEFINED;
+         job->depth_memlayout = PVR_MEMLAYOUT_LINEAR;
+      }
+
+      if (vk_format_has_stencil(image->vk.format)) {
+         /* FIXME: Is this sufficient for stencil buffers? */
+         job->stencil_addr = image->dev_addr;
+      } else {
+         job->stencil_addr = PVR_DEV_ADDR_INVALID;
+      }
+
+      job->samples = image->vk.samples;
+   } else {
+      pvr_finishme("Set up correct number of samples for render job");
+
+      job->depth_addr = PVR_DEV_ADDR_INVALID;
+      job->depth_stride = 0;
+      job->depth_height = 0;
+      job->depth_physical_width = 0;
+      job->depth_physical_height = 0;
+      job->depth_layer_size = 0;
+      job->depth_clear_value = 1.0f;
+      job->depth_vk_format = VK_FORMAT_UNDEFINED;
+      job->depth_memlayout = PVR_MEMLAYOUT_LINEAR;
+
+      job->stencil_addr = PVR_DEV_ADDR_INVALID;
+
+      job->samples = 1;
+   }
+
+   if (sub_cmd->gfx.max_tiles_in_flight ==
+       PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U)) {
+      /* Use the default limit based on the partition store. */
+      job->max_tiles_in_flight = 0U;
+   } else {
+      job->max_tiles_in_flight = sub_cmd->gfx.max_tiles_in_flight;
+   }
+
+   job->frag_uses_atomic_ops = sub_cmd->gfx.frag_uses_atomic_ops;
+   job->disable_compute_overlap = false;
+   job->max_shared_registers = cmd_buffer->state.max_shared_regs;
+   job->run_frag = true;
+   job->geometry_terminate = true;
+
+   return VK_SUCCESS;
+}
+
+/* Number of shareds used in the Issue Data Fence(IDF)/Wait Data Fence(WDF)
+ * kernel.
+ */
+#define PVR_IDF_WDF_IN_REGISTER_CONST_COUNT 12U
+
+static void pvr_sub_cmd_compute_job_init(struct pvr_device *device,
+                                         struct pvr_cmd_buffer *cmd_buffer,
+                                         struct pvr_sub_cmd *sub_cmd)
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+
+   if (sub_cmd->compute.uses_barrier) {
+      sub_cmd->compute.submit_info.flags |=
+         PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
+   }
+
+   pvr_csb_pack (&sub_cmd->compute.submit_info.regs.cdm_ctrl_stream_base,
+                 CR_CDM_CTRL_STREAM_BASE,
+                 value) {
+      value.addr = pvr_csb_get_start_address(&sub_cmd->compute.control_stream);
+   }
+
+   /* FIXME: Need to set up the border color table at device creation
+    * time. Set to invalid for the time being.
+    */
+   pvr_csb_pack (&sub_cmd->compute.submit_info.regs.tpu_border_colour_table,
+                 CR_TPU_BORDER_COLOUR_TABLE_CDM,
+                 value) {
+      value.border_colour_table_address = PVR_DEV_ADDR_INVALID;
+   }
+
+   sub_cmd->compute.num_shared_regs = MAX2(PVR_IDF_WDF_IN_REGISTER_CONST_COUNT,
+                                           cmd_buffer->state.max_shared_regs);
+
+   cmd_buffer->state.max_shared_regs = 0U;
+
+   if (PVR_HAS_FEATURE(dev_info, compute_morton_capable))
+      sub_cmd->compute.submit_info.regs.cdm_item = 0;
+
+   pvr_csb_pack (&sub_cmd->compute.submit_info.regs.tpu, CR_TPU, value) {
+      value.tag_cem_4k_face_packing = true;
+   }
+
+   if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
+       PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
+       rogue_get_num_phantoms(dev_info) > 1 &&
+       sub_cmd->compute.uses_atomic_ops) {
+      /* Each phantom has its own MCU, so atomicity can only be guaranteed
+       * when all work items are processed on the same phantom. This means we
+       * need to disable all USCs other than those of the first phantom, which
+       * has 4 clusters.
+       */
+      pvr_csb_pack (&sub_cmd->compute.submit_info.regs.compute_cluster,
+                    CR_COMPUTE_CLUSTER,
+                    value) {
+         value.mask = 0xFU;
+      }
+   } else {
+      pvr_csb_pack (&sub_cmd->compute.submit_info.regs.compute_cluster,
+                    CR_COMPUTE_CLUSTER,
+                    value) {
+         value.mask = 0U;
+      }
+   }
+
+   if (PVR_HAS_FEATURE(dev_info, gpu_multicore_support) &&
+       sub_cmd->compute.uses_atomic_ops) {
+      sub_cmd->compute.submit_info.flags |= PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE;
+   }
+}
+
+#define PIXEL_ALLOCATION_SIZE_MAX_IN_BLOCKS \
+   (1024 / PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE))
+
+static uint32_t pvr_compute_slot_size(const struct pvr_device_info *dev_info,
+                                      uint32_t coeff_regs_count,
+                                      bool use_barrier,
+                                      const uint32_t local_size[static 3U])
+{
+   uint32_t max_workgroups_per_task = ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK;
+   uint32_t max_avail_coeff_regs =
+      rogue_get_cdm_max_local_mem_size_regs(dev_info);
+   uint32_t localstore_chunks_count =
+      DIV_ROUND_UP(coeff_regs_count << 2,
+                   PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE));
+   uint32_t total_workitems = local_size[0U] * local_size[1U] * local_size[2U];
+
+   /* Ensure that we cannot have more workgroups in a slot than the available
+    * number of coefficients allow us to have.
+    */
+   if (coeff_regs_count > 0U) {
+      /* If TA or 3D can overlap with CDM, or if the TA is running a geometry
+       * shader then we need to consider this in calculating max allowed
+       * work-groups.
+       */
+      if (PVR_HAS_QUIRK(dev_info, 52354) &&
+          (PVR_HAS_FEATURE(dev_info, compute_overlap) ||
+           PVR_HAS_FEATURE(dev_info, gs_rta_support))) {
+         /* Solve for n (number of work-groups per task). All values are in
+          * size of common store alloc blocks:
+          *
+          * n + (2n + 7) * (local_memory_size_max - 1) =
+          *    (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
+          * ==>
+          * n + 2n * (local_memory_size_max - 1) =
+          *    (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
+          *    - (7 * (local_memory_size_max - 1))
+          * ==>
+          * n * (1 + 2 * (local_memory_size_max - 1)) =
+          *    (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
+          *    - (7 * (local_memory_size_max - 1))
+          * ==>
+          * n = ((coefficient_memory_pool_size) -
+          *    (7 * pixel_allocation_size_max) -
+          *    (7 * (local_memory_size_max - 1)) / (1 +
+          * 2 * (local_memory_size_max - 1)))
+          */
+         uint32_t max_common_store_blocks =
+            DIV_ROUND_UP(max_avail_coeff_regs * 4U,
+                         PVRX(CDMCTRL_KERNEL0_USC_COMMON_SIZE_UNIT_SIZE));
+
+         /* (coefficient_memory_pool_size) - (7 * pixel_allocation_size_max)
+          */
+         max_common_store_blocks -= ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES *
+                                    PIXEL_ALLOCATION_SIZE_MAX_IN_BLOCKS;
+
+         /* - (7 * (local_memory_size_max - 1)) */
+         max_common_store_blocks -= (ROGUE_MAX_OVERLAPPED_PIXEL_TASK_INSTANCES *
+                                     (localstore_chunks_count - 1U));
+
+         /* Divide by (1 + 2 * (local_memory_size_max - 1)) */
+         max_workgroups_per_task = max_common_store_blocks /
+                                   (1U + 2U * (localstore_chunks_count - 1U));
+
+         max_workgroups_per_task =
+            MIN2(max_workgroups_per_task,
+                 ROGUE_CDM_MAX_PACKED_WORKGROUPS_PER_TASK);
+
+      } else {
+         max_workgroups_per_task =
+            MIN2((max_avail_coeff_regs / coeff_regs_count),
+                 max_workgroups_per_task);
+      }
+   }
+
+   /* max_workgroups_per_task should at least be one. */
+   assert(max_workgroups_per_task >= 1U);
+
+   if (total_workitems >= ROGUE_MAX_INSTANCES_PER_TASK) {
+      /* In this case, the work group size will have been padded up to the
+       * next ROGUE_MAX_INSTANCES_PER_TASK so we just set max instances to be
+       * ROGUE_MAX_INSTANCES_PER_TASK.
+       */
+      return ROGUE_MAX_INSTANCES_PER_TASK;
+   }
+
+   /* In this case, the number of instances in the slot must be clamped to
+    * accommodate whole work-groups only.
+    */
+   if (PVR_HAS_QUIRK(dev_info, 49032) || use_barrier) {
+      max_workgroups_per_task =
+         MIN2(max_workgroups_per_task,
+              ROGUE_MAX_INSTANCES_PER_TASK / total_workitems);
+      return total_workitems * max_workgroups_per_task;
+   }
+
+   return MIN2(total_workitems * max_workgroups_per_task,
+               ROGUE_MAX_INSTANCES_PER_TASK);
+}
+
+static void
+pvr_compute_generate_control_stream(struct pvr_csb *csb,
+                                    const struct pvr_compute_kernel_info *info)
+{
+   /* Compute kernel 0. */
+   pvr_csb_emit (csb, CDMCTRL_KERNEL0, kernel0) {
+      kernel0.indirect_present = !!info->indirect_buffer_addr.addr;
+      kernel0.global_offsets_present = info->global_offsets_present;
+      kernel0.usc_common_size = info->usc_common_size;
+      kernel0.usc_unified_size = info->usc_unified_size;
+      kernel0.pds_temp_size = info->pds_temp_size;
+      kernel0.pds_data_size = info->pds_data_size;
+
+      if (info->usc_target_any)
+         kernel0.usc_target = PVRX(CDMCTRL_USC_TARGET_ANY);
+      else
+         kernel0.usc_target = PVRX(CDMCTRL_USC_TARGET_ALL);
+
+      kernel0.fence = info->is_fence;
+   }
+
+   /* Compute kernel 1. */
+   pvr_csb_emit (csb, CDMCTRL_KERNEL1, kernel1) {
+      kernel1.data_addr.addr = info->pds_data_offset;
+      kernel1.sd_type = info->sd_type;
+
+      if (!info->is_fence)
+         kernel1.usc_common_shared = info->usc_common_shared;
+   }
+
+   /* Compute kernel 2. */
+   pvr_csb_emit (csb, CDMCTRL_KERNEL2, kernel2) {
+      kernel2.code_addr.addr = info->pds_code_offset;
+   }
+
+   if (info->indirect_buffer_addr.addr) {
+      /* Compute kernel 6. */
+      pvr_csb_emit (csb, CDMCTRL_KERNEL6, kernel6) {
+         kernel6.indirect_addrmsb = info->indirect_buffer_addr;
+      }
+
+      /* Compute kernel 7. */
+      pvr_csb_emit (csb, CDMCTRL_KERNEL7, kernel7) {
+         kernel7.indirect_addrlsb = info->indirect_buffer_addr;
+      }
+   } else {
+      /* Compute kernel 3. */
+      pvr_csb_emit (csb, CDMCTRL_KERNEL3, kernel3) {
+         assert(info->global_size[0U] > 0U);
+         kernel3.workgroup_x = info->global_size[0U] - 1U;
+      }
+
+      /* Compute kernel 4. */
+      pvr_csb_emit (csb, CDMCTRL_KERNEL4, kernel4) {
+         assert(info->global_size[1U] > 0U);
+         kernel4.workgroup_y = info->global_size[1U] - 1U;
+      }
+
+      /* Compute kernel 5. */
+      pvr_csb_emit (csb, CDMCTRL_KERNEL5, kernel5) {
+         assert(info->global_size[2U] > 0U);
+         kernel5.workgroup_z = info->global_size[2U] - 1U;
+      }
+   }
+
+   /* Compute kernel 8. */
+   pvr_csb_emit (csb, CDMCTRL_KERNEL8, kernel8) {
+      if (info->max_instances == ROGUE_MAX_INSTANCES_PER_TASK)
+         kernel8.max_instances = 0U;
+      else
+         kernel8.max_instances = info->max_instances;
+
+      assert(info->local_size[0U] > 0U);
+      kernel8.workgroup_size_x = info->local_size[0U] - 1U;
+      assert(info->local_size[1U] > 0U);
+      kernel8.workgroup_size_y = info->local_size[1U] - 1U;
+      assert(info->local_size[2U] > 0U);
+      kernel8.workgroup_size_z = info->local_size[2U] - 1U;
+   }
+}
+
+static void pvr_compute_generate_fence(struct pvr_cmd_buffer *cmd_buffer,
+                                       bool deallocate_shareds)
+{
+   const struct pvr_pds_upload *program =
+      &cmd_buffer->device->pds_compute_fence_program;
+   const struct pvr_device_info *dev_info =
+      &cmd_buffer->device->pdevice->dev_info;
+   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+   struct pvr_csb *csb = &state->current_sub_cmd->compute.control_stream;
+
+   struct pvr_compute_kernel_info info = {
+      .indirect_buffer_addr.addr = 0ULL,
+      .global_offsets_present = false,
+      .usc_common_size = 0U,
+      .usc_unified_size = 0U,
+      .pds_temp_size = 0U,
+      .pds_data_size =
+         DIV_ROUND_UP(program->data_size << 2,
+                      PVRX(CDMCTRL_KERNEL0_PDS_DATA_SIZE_UNIT_SIZE)),
+      .usc_target_any = true,
+      .is_fence = true,
+      .pds_data_offset = program->data_offset,
+      .sd_type = PVRX(CDMCTRL_SD_TYPE_PDS),
+      .usc_common_shared = deallocate_shareds,
+      .pds_code_offset = program->code_offset,
+      .global_size = { 1U, 1U, 1U },
+      .local_size = { 1U, 1U, 1U },
+   };
+
+   /* We don't need to pad work-group size for this case. */
+   /* Here we calculate the slot size. This can depend on the use of barriers,
+    * local memory, BRN's or other factors.
+    */
+   info.max_instances =
+      pvr_compute_slot_size(dev_info, 0U, false, info.local_size);
+
+   pvr_compute_generate_control_stream(csb, &info);
+}
+
+static VkResult pvr_cmd_buffer_end_sub_cmd(struct pvr_cmd_buffer *cmd_buffer)
+{
+   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+   struct pvr_sub_cmd *sub_cmd = state->current_sub_cmd;
+   struct pvr_device *device = cmd_buffer->device;
+   VkResult result;
+
+   /* FIXME: Is this NULL check required because this function is called from
+    * pvr_resolve_unemitted_resolve_attachments()? See comment about this
+    * function being called twice in a row in pvr_CmdEndRenderPass().
+    */
+   if (!sub_cmd)
+      return VK_SUCCESS;
+
+   switch (sub_cmd->type) {
+   case PVR_SUB_CMD_TYPE_GRAPHICS:
+      if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
+         result = pvr_csb_emit_return(&sub_cmd->gfx.control_stream);
+         if (result != VK_SUCCESS) {
+            state->status = result;
+            return result;
+         }
+
+         break;
+      }
+
+      /* TODO: Check if the sub_cmd can be skipped based on
+       * sub_cmd->gfx.empty_cmd flag.
+       */
+
+      result = pvr_cmd_buffer_upload_tables(device, cmd_buffer);
+      if (result != VK_SUCCESS) {
+         state->status = result;
+         return result;
+      }
+
+      result = pvr_cmd_buffer_emit_ppp_state(cmd_buffer);
+      if (result != VK_SUCCESS) {
+         state->status = result;
+         return result;
+      }
+
+      result = pvr_csb_emit_terminate(&sub_cmd->gfx.control_stream);
+      if (result != VK_SUCCESS) {
+         state->status = result;
+         return result;
+      }
+
+      result = pvr_sub_cmd_gfx_job_init(device, cmd_buffer, sub_cmd);
+      if (result != VK_SUCCESS) {
+         state->status = result;
+         return result;
+      }
+
+      break;
+
+   case PVR_SUB_CMD_TYPE_COMPUTE:
+      pvr_compute_generate_fence(cmd_buffer, true);
+
+      result = pvr_csb_emit_terminate(&sub_cmd->compute.control_stream);
+      if (result != VK_SUCCESS) {
+         state->status = result;
+         return result;
+      }
+
+      pvr_sub_cmd_compute_job_init(device, cmd_buffer, sub_cmd);
+      break;
+
+   case PVR_SUB_CMD_TYPE_TRANSFER:
+      break;
+
+   default:
+      pvr_finishme("Unsupported sub-command type %d", sub_cmd->type);
+      break;
+   }
+
+   state->current_sub_cmd = NULL;
+
+   return VK_SUCCESS;
+}
+
+static void pvr_reset_graphics_dirty_state(struct pvr_cmd_buffer_state *state,
+                                           bool start_geom)
+{
+   if (start_geom) {
+      /*
+       * Initial geometry phase State.
+       * It's the driver's responsibility to ensure that the state of the
+       * hardware is correctly initialized at the start of every geometry
+       * phase. This is required to prevent stale state from a previous
+       * geometry phase erroneously affecting the next geometry phase. The
+       * following fields in PPP State Header, and their corresponding state
+       * words, must be supplied in the first PPP State Update of a geometry
+       * phase that contains any geometry (draw calls). Any field not listed
+       * below is safe to ignore.
+       *
+       *       TA_PRES_STREAM_OUT_SIZE
+       *       TA_PRES_PPPCTRL
+       *       TA_PRES_VARYING_WORD2
+       *       TA_PRES_VARYING_WORD1
+       *       TA_PRES_VARYING_WORD0
+       *       TA_PRES_OUTSELECTS
+       *       TA_PRES_WCLAMP
+       *       TA_VIEWPORT_COUNT
+       *       TA_PRES_VIEWPORT
+       *       TA_PRES_REGION_CLIP
+       *       TA_PRES_PDSSTATEPTR0
+       *       TA_PRES_ISPCTLFB
+       *       TA_PRES_ISPCTLFA
+       *       TA_PRES_ISPCTL
+       *
+       * If a geometry phase does not contain any geometry, this restriction
+       * can be ignored. If the first draw call in a geometry phase will only
+       * update the depth or stencil buffers i.e. ISP_TAGWRITEDISABLE is set
+       * in the ISP State Control Word, the PDS State Pointers
+       * (TA_PRES_PDSSTATEPTR*) in the first PPP State Update do not need to
+       * be supplied, since they will never reach the PDS in the fragment
+       * phase.
+       */
+
+      state->emit_state_bits = 0;
+
+      state->emit_state.stream_out = true;
+      state->emit_state.ppp_control = true;
+      state->emit_state.varying_word2 = true;
+      state->emit_state.varying_word1 = true;
+      state->emit_state.varying_word0 = true;
+      state->emit_state.output_selects = true;
+      state->emit_state.wclamp = true;
+      state->emit_state.viewport = true;
+      state->emit_state.region_clip = true;
+      state->emit_state.pds_fragment_stateptr0 = true;
+      state->emit_state.isp_fb = true;
+      state->emit_state.isp = true;
+   } else {
+      state->emit_state.ppp_control = true;
+      state->emit_state.varying_word1 = true;
+      state->emit_state.varying_word0 = true;
+      state->emit_state.output_selects = true;
+      state->emit_state.viewport = true;
+      state->emit_state.region_clip = true;
+      state->emit_state.pds_fragment_stateptr0 = true;
+      state->emit_state.isp_fb = true;
+      state->emit_state.isp = true;
+   }
+
+   memset(&state->ppp_state, 0U, sizeof(state->ppp_state));
+
+   state->dirty.vertex_bindings = true;
+   state->dirty.gfx_pipeline_binding = true;
+   state->dirty.viewport = true;
+}
+
+static VkResult pvr_cmd_buffer_start_sub_cmd(struct pvr_cmd_buffer *cmd_buffer,
+                                             enum pvr_sub_cmd_type type)
+{
+   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+   struct pvr_device *device = cmd_buffer->device;
+   struct pvr_sub_cmd *sub_cmd;
+   VkResult result;
+
+   /* Check the current status of the buffer. */
+   if (state->status != VK_SUCCESS)
+      return state->status;
+
+   pvr_cmd_buffer_update_barriers(cmd_buffer, type);
+
+   if (state->current_sub_cmd) {
+      if (state->current_sub_cmd->type == type) {
+         /* Continue adding to the current sub command. */
+         return VK_SUCCESS;
+      }
+
+      /* End the current sub command. */
+      result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
+   sub_cmd = vk_zalloc(&cmd_buffer->vk.pool->alloc,
+                       sizeof(*sub_cmd),
+                       8,
+                       VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!sub_cmd) {
+      state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
+      return state->status;
+   }
+
+   sub_cmd->type = type;
+
+   switch (type) {
+   case PVR_SUB_CMD_TYPE_GRAPHICS:
+
+      sub_cmd->gfx.depth_usage = PVR_DEPTH_STENCIL_USAGE_UNDEFINED;
+      sub_cmd->gfx.stencil_usage = PVR_DEPTH_STENCIL_USAGE_UNDEFINED;
+      sub_cmd->gfx.modifies_depth = false;
+      sub_cmd->gfx.modifies_stencil = false;
+      sub_cmd->gfx.max_tiles_in_flight =
+         PVR_GET_FEATURE_VALUE(&device->pdevice->dev_info,
+                               isp_max_tiles_in_flight,
+                               1);
+      sub_cmd->gfx.hw_render_idx = state->render_pass_info.current_hw_subpass;
+      sub_cmd->gfx.framebuffer = state->render_pass_info.framebuffer;
+      sub_cmd->gfx.empty_cmd = true;
+
+      pvr_reset_graphics_dirty_state(state, true);
+      pvr_csb_init(device,
+                   PVR_CMD_STREAM_TYPE_GRAPHICS,
+                   &sub_cmd->gfx.control_stream);
+      break;
+
+   case PVR_SUB_CMD_TYPE_COMPUTE:
+      pvr_csb_init(device,
+                   PVR_CMD_STREAM_TYPE_COMPUTE,
+                   &sub_cmd->compute.control_stream);
+      break;
+
+   case PVR_SUB_CMD_TYPE_TRANSFER:
+      list_inithead(&sub_cmd->transfer.transfer_cmds);
+      break;
+
+   default:
+      pvr_finishme("Unsupported sub-command type %d", type);
+      break;
+   }
+
+   list_addtail(&sub_cmd->link, &cmd_buffer->sub_cmds);
+   state->current_sub_cmd = sub_cmd;
+
+   return VK_SUCCESS;
+}
+
+VkResult pvr_cmd_buffer_alloc_mem(struct pvr_cmd_buffer *cmd_buffer,
+                                  struct pvr_winsys_heap *heap,
+                                  uint64_t size,
+                                  uint32_t flags,
+                                  struct pvr_bo **const pvr_bo_out)
+{
+   const uint32_t cache_line_size =
+      rogue_get_slc_cache_line_size(&cmd_buffer->device->pdevice->dev_info);
+   struct pvr_bo *pvr_bo;
+   VkResult result;
+
+   result = pvr_bo_alloc(cmd_buffer->device,
+                         heap,
+                         size,
+                         cache_line_size,
+                         flags,
+                         &pvr_bo);
+   if (result != VK_SUCCESS) {
+      cmd_buffer->state.status = result;
+      return result;
+   }
+
+   list_add(&pvr_bo->link, &cmd_buffer->bo_list);
+
+   *pvr_bo_out = pvr_bo;
+
+   return VK_SUCCESS;
+}
+
+VkResult pvr_ResetCommandBuffer(VkCommandBuffer commandBuffer,
+                                VkCommandBufferResetFlags flags)
+{
+   assert(!"Unimplemented");
+   return VK_SUCCESS;
+}
+
+static void pvr_cmd_bind_compute_pipeline(
+   const struct pvr_compute_pipeline *const compute_pipeline,
+   struct pvr_cmd_buffer *const cmd_buffer)
+{
+   cmd_buffer->state.compute_pipeline = compute_pipeline;
+   cmd_buffer->state.dirty.compute_pipeline_binding = true;
+}
+
+static void pvr_cmd_bind_graphics_pipeline(
+   const struct pvr_graphics_pipeline *const gfx_pipeline,
+   struct pvr_cmd_buffer *const cmd_buffer)
+{
+   struct pvr_dynamic_state *const dest_state =
+      &cmd_buffer->state.dynamic.common;
+   const struct pvr_dynamic_state *const src_state =
+      &gfx_pipeline->dynamic_state;
+   struct pvr_cmd_buffer_state *const cmd_buffer_state = &cmd_buffer->state;
+   const uint32_t state_mask = src_state->mask;
+
+   cmd_buffer_state->gfx_pipeline = gfx_pipeline;
+   cmd_buffer_state->dirty.gfx_pipeline_binding = true;
+
+   /* FIXME: Handle PVR_DYNAMIC_STATE_BIT_VIEWPORT. */
+   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_VIEWPORT)) {
+      assert(!"Unimplemented");
+   }
+
+   /* FIXME: Handle PVR_DYNAMIC_STATE_BIT_SCISSOR. */
+   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_SCISSOR)) {
+      assert(!"Unimplemented");
+   }
+
+   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_LINE_WIDTH)) {
+      dest_state->line_width = src_state->line_width;
+
+      cmd_buffer_state->dirty.line_width = true;
+   }
+
+   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS)) {
+      memcpy(&dest_state->depth_bias,
+             &src_state->depth_bias,
+             sizeof(src_state->depth_bias));
+
+      cmd_buffer_state->dirty.depth_bias = true;
+   }
+
+   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS)) {
+      STATIC_ASSERT(
+         __same_type(dest_state->blend_constants, src_state->blend_constants));
+
+      typed_memcpy(dest_state->blend_constants,
+                   src_state->blend_constants,
+                   ARRAY_SIZE(dest_state->blend_constants));
+
+      cmd_buffer_state->dirty.blend_constants = true;
+   }
+
+   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK)) {
+      dest_state->compare_mask.front = src_state->compare_mask.front;
+      dest_state->compare_mask.back = src_state->compare_mask.back;
+
+      cmd_buffer_state->dirty.compare_mask = true;
+   }
+
+   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK)) {
+      dest_state->write_mask.front = src_state->write_mask.front;
+      dest_state->write_mask.back = src_state->write_mask.back;
+
+      cmd_buffer_state->dirty.write_mask = true;
+   }
+
+   if (!(state_mask & PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE)) {
+      dest_state->reference.front = src_state->reference.front;
+      dest_state->reference.back = src_state->reference.back;
+
+      cmd_buffer_state->dirty.reference = true;
+   }
+}
+
+void pvr_CmdBindPipeline(VkCommandBuffer commandBuffer,
+                         VkPipelineBindPoint pipelineBindPoint,
+                         VkPipeline _pipeline)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   PVR_FROM_HANDLE(pvr_pipeline, pipeline, _pipeline);
+
+   switch (pipelineBindPoint) {
+   case VK_PIPELINE_BIND_POINT_COMPUTE:
+      pvr_cmd_bind_compute_pipeline(to_pvr_compute_pipeline(pipeline),
+                                    cmd_buffer);
+      break;
+
+   case VK_PIPELINE_BIND_POINT_GRAPHICS:
+      pvr_cmd_bind_graphics_pipeline(to_pvr_graphics_pipeline(pipeline),
+                                     cmd_buffer);
+      break;
+
+   default:
+      unreachable("Invalid bind point.");
+      break;
+   }
+}
+
+#if defined(DEBUG)
+static void check_viewport_quirk_70165(const struct pvr_device *device,
+                                       const VkViewport *pViewport)
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   float min_vertex_x, max_vertex_x, min_vertex_y, max_vertex_y;
+   float min_screen_space_value, max_screen_space_value;
+   float sign_to_unsigned_offset, fixed_point_max;
+   float guardband_width, guardband_height;
+
+   if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+      /* Max representable value in 13.4 fixed point format.
+       * Round-down to avoid precision issues.
+       * Calculated as (2 ** 13) - 2*(2 ** -4)
+       */
+      fixed_point_max = 8192.0f - 2.0f / 16.0f;
+
+      if (PVR_HAS_FEATURE(dev_info, screen_size8K)) {
+         if (pViewport->width <= 4096 && pViewport->height <= 4096) {
+            guardband_width = pViewport->width / 4.0f;
+            guardband_height = pViewport->height / 4.0f;
+
+            /* 2k of the range is negative */
+            sign_to_unsigned_offset = 2048.0f;
+         } else {
+            guardband_width = 0.0f;
+            guardband_height = 0.0f;
+
+            /* For > 4k renders, the entire range is positive */
+            sign_to_unsigned_offset = 0.0f;
+         }
+      } else {
+         guardband_width = pViewport->width / 4.0f;
+         guardband_height = pViewport->height / 4.0f;
+
+         /* 2k of the range is negative */
+         sign_to_unsigned_offset = 2048.0f;
+      }
+   } else {
+      /* Max representable value in 16.8 fixed point format
+       * Calculated as (2 ** 16) - (2 ** -8)
+       */
+      fixed_point_max = 65535.99609375f;
+      guardband_width = pViewport->width / 4.0f;
+      guardband_height = pViewport->height / 4.0f;
+
+      /* 4k/20k of the range is negative */
+      sign_to_unsigned_offset = (float)PVR_MAX_NEG_OFFSCREEN_OFFSET;
+   }
+
+   min_screen_space_value = -sign_to_unsigned_offset;
+   max_screen_space_value = fixed_point_max - sign_to_unsigned_offset;
+
+   min_vertex_x = pViewport->x - guardband_width;
+   max_vertex_x = pViewport->x + pViewport->width + guardband_width;
+   min_vertex_y = pViewport->y - guardband_height;
+   max_vertex_y = pViewport->y + pViewport->height + guardband_height;
+   if (min_vertex_x < min_screen_space_value ||
+       max_vertex_x > max_screen_space_value ||
+       min_vertex_y < min_screen_space_value ||
+       max_vertex_y > max_screen_space_value) {
+      mesa_logw("Viewport is affected by BRN70165, geometry outside "
+                "the viewport could be corrupted");
+   }
+}
+#endif
+
+void pvr_CmdSetViewport(VkCommandBuffer commandBuffer,
+                        uint32_t firstViewport,
+                        uint32_t viewportCount,
+                        const VkViewport *pViewports)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   const uint32_t total_count = firstViewport + viewportCount;
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+   assert(firstViewport < PVR_MAX_VIEWPORTS && viewportCount > 0);
+   assert(total_count >= 1 && total_count <= PVR_MAX_VIEWPORTS);
+
+   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+#if defined(DEBUG)
+   if (PVR_HAS_QUIRK(&cmd_buffer->device->pdevice->dev_info, 70165)) {
+      for (uint32_t viewport = 0; viewport < viewportCount; viewport++) {
+         check_viewport_quirk_70165(cmd_buffer->device, &pViewports[viewport]);
+      }
+   }
+#endif
+
+   if (state->dynamic.common.viewport.count < total_count)
+      state->dynamic.common.viewport.count = total_count;
+
+   memcpy(&state->dynamic.common.viewport.viewports[firstViewport],
+          pViewports,
+          viewportCount * sizeof(*pViewports));
+
+   state->dirty.viewport = true;
+}
+
+void pvr_CmdSetScissor(VkCommandBuffer commandBuffer,
+                       uint32_t firstScissor,
+                       uint32_t scissorCount,
+                       const VkRect2D *pScissors)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   const uint32_t total_count = firstScissor + scissorCount;
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+   assert(firstScissor < PVR_MAX_VIEWPORTS && scissorCount > 0);
+   assert(total_count >= 1 && total_count <= PVR_MAX_VIEWPORTS);
+
+   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+   if (state->dynamic.common.scissor.count < total_count)
+      state->dynamic.common.scissor.count = total_count;
+
+   memcpy(&state->dynamic.common.scissor.scissors[firstScissor],
+          pScissors,
+          scissorCount * sizeof(*pScissors));
+
+   state->dirty.scissor = true;
+}
+
+void pvr_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+   state->dynamic.common.line_width = lineWidth;
+   state->dirty.line_width = true;
+}
+
+void pvr_CmdSetDepthBias(VkCommandBuffer commandBuffer,
+                         float depthBiasConstantFactor,
+                         float depthBiasClamp,
+                         float depthBiasSlopeFactor)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+   state->dynamic.common.depth_bias.constant_factor = depthBiasConstantFactor;
+   state->dynamic.common.depth_bias.clamp = depthBiasClamp;
+   state->dynamic.common.depth_bias.slope_factor = depthBiasSlopeFactor;
+   state->dirty.depth_bias = true;
+}
+
+void pvr_CmdSetBlendConstants(VkCommandBuffer commandBuffer,
+                              const float blendConstants[4])
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+   STATIC_ASSERT(ARRAY_SIZE(state->dynamic.common.blend_constants) == 4);
+   memcpy(state->dynamic.common.blend_constants,
+          blendConstants,
+          sizeof(state->dynamic.common.blend_constants));
+
+   state->dirty.blend_constants = true;
+}
+
+void pvr_CmdSetDepthBounds(VkCommandBuffer commandBuffer,
+                           float minDepthBounds,
+                           float maxDepthBounds)
+{
+   mesa_logd("No support for depth bounds testing.");
+}
+
+void pvr_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer,
+                                  VkStencilFaceFlags faceMask,
+                                  uint32_t compareMask)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+      state->dynamic.common.compare_mask.front = compareMask;
+
+   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+      state->dynamic.common.compare_mask.back = compareMask;
+
+   state->dirty.compare_mask = true;
+}
+
+void pvr_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer,
+                                VkStencilFaceFlags faceMask,
+                                uint32_t writeMask)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+      state->dynamic.common.write_mask.front = writeMask;
+
+   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+      state->dynamic.common.write_mask.back = writeMask;
+
+   state->dirty.write_mask = true;
+}
+
+void pvr_CmdSetStencilReference(VkCommandBuffer commandBuffer,
+                                VkStencilFaceFlags faceMask,
+                                uint32_t reference)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
+      state->dynamic.common.reference.front = reference;
+
+   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
+      state->dynamic.common.reference.back = reference;
+
+   state->dirty.reference = true;
+}
+
+void pvr_CmdBindDescriptorSets(VkCommandBuffer commandBuffer,
+                               VkPipelineBindPoint pipelineBindPoint,
+                               VkPipelineLayout _layout,
+                               uint32_t firstSet,
+                               uint32_t descriptorSetCount,
+                               const VkDescriptorSet *pDescriptorSets,
+                               uint32_t dynamicOffsetCount,
+                               const uint32_t *pDynamicOffsets)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   struct pvr_descriptor_state *descriptor_state;
+
+   assert(firstSet + descriptorSetCount <= PVR_MAX_DESCRIPTOR_SETS);
+
+   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+   switch (pipelineBindPoint) {
+   case VK_PIPELINE_BIND_POINT_GRAPHICS:
+   case VK_PIPELINE_BIND_POINT_COMPUTE:
+      break;
+
+   default:
+      unreachable("Unsupported bind point.");
+      break;
+   }
+
+   if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) {
+      descriptor_state = &cmd_buffer->state.gfx_desc_state;
+      cmd_buffer->state.dirty.gfx_desc_dirty = true;
+   } else {
+      descriptor_state = &cmd_buffer->state.compute_desc_state;
+      cmd_buffer->state.dirty.compute_desc_dirty = true;
+   }
+
+   for (uint32_t i = 0; i < descriptorSetCount; i++) {
+      PVR_FROM_HANDLE(pvr_descriptor_set, set, pDescriptorSets[i]);
+      uint32_t index = firstSet + i;
+
+      if (descriptor_state->descriptor_sets[index] != set) {
+         descriptor_state->descriptor_sets[index] = set;
+         descriptor_state->valid_mask |= (1u << index);
+      }
+   }
+}
+
+void pvr_CmdBindVertexBuffers(VkCommandBuffer commandBuffer,
+                              uint32_t firstBinding,
+                              uint32_t bindingCount,
+                              const VkBuffer *pBuffers,
+                              const VkDeviceSize *pOffsets)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   struct pvr_vertex_binding *const vb = cmd_buffer->state.vertex_bindings;
+
+   /* We have to defer setting up vertex buffer since we need the buffer
+    * stride from the pipeline.
+    */
+
+   assert(firstBinding < PVR_MAX_VERTEX_INPUT_BINDINGS &&
+          bindingCount <= PVR_MAX_VERTEX_INPUT_BINDINGS);
+
+   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+   for (uint32_t i = 0; i < bindingCount; i++) {
+      vb[firstBinding + i].buffer = pvr_buffer_from_handle(pBuffers[i]);
+      vb[firstBinding + i].offset = pOffsets[i];
+   }
+
+   cmd_buffer->state.dirty.vertex_bindings = true;
+}
+
+void pvr_CmdBindIndexBuffer(VkCommandBuffer commandBuffer,
+                            VkBuffer buffer,
+                            VkDeviceSize offset,
+                            VkIndexType indexType)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   PVR_FROM_HANDLE(pvr_buffer, index_buffer, buffer);
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+   assert(offset < index_buffer->size);
+   assert(indexType == VK_INDEX_TYPE_UINT32 ||
+          indexType == VK_INDEX_TYPE_UINT16);
+
+   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+   state->index_buffer_binding.buffer = index_buffer;
+   state->index_buffer_binding.offset = offset;
+   state->index_buffer_binding.type = indexType;
+   state->dirty.index_buffer_binding = true;
+}
+
+void pvr_CmdPushConstants(VkCommandBuffer commandBuffer,
+                          VkPipelineLayout layout,
+                          VkShaderStageFlags stageFlags,
+                          uint32_t offset,
+                          uint32_t size,
+                          const void *pValues)
+{
+#if defined(DEBUG)
+   const uint64_t ending = (uint64_t)offset + (uint64_t)size;
+#endif
+
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+
+   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+   pvr_assert(ending <= PVR_MAX_PUSH_CONSTANTS_SIZE);
+
+   memcpy(&state->push_constants.data[offset], pValues, size);
+
+   state->push_constants.dirty_stages |= stageFlags;
+}
+
+static VkResult
+pvr_cmd_buffer_setup_attachments(struct pvr_cmd_buffer *cmd_buffer,
+                                 const struct pvr_render_pass *pass,
+                                 const struct pvr_framebuffer *framebuffer)
+{
+   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+   struct pvr_render_pass_info *info = &state->render_pass_info;
+
+   assert(pass->attachment_count == framebuffer->attachment_count);
+
+   /* Free any previously allocated attachments. */
+   vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.attachments);
+
+   if (pass->attachment_count == 0) {
+      info->attachments = NULL;
+      return VK_SUCCESS;
+   }
+
+   info->attachments =
+      vk_zalloc(&cmd_buffer->vk.pool->alloc,
+                pass->attachment_count * sizeof(*info->attachments),
+                8,
+                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!info->attachments) {
+      /* Propagate VK_ERROR_OUT_OF_HOST_MEMORY to vkEndCommandBuffer */
+      state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
+      return state->status;
+   }
+
+   if (framebuffer) {
+      for (uint32_t i = 0; i < pass->attachment_count; i++)
+         info->attachments[i] = framebuffer->attachments[i];
+   }
+
+   return VK_SUCCESS;
+}
+
+static VkResult pvr_init_render_targets(struct pvr_device *device,
+                                        struct pvr_render_pass *pass,
+                                        struct pvr_framebuffer *framebuffer)
+{
+   for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
+      struct pvr_render_target *render_target =
+         pvr_get_render_target(pass, framebuffer, i);
+
+      pthread_mutex_lock(&render_target->mutex);
+
+      if (!render_target->valid) {
+         const struct pvr_renderpass_hwsetup_render *hw_render =
+            &pass->hw_setup->renders[i];
+         VkResult result;
+
+         result = pvr_render_target_dataset_create(device,
+                                                   framebuffer->width,
+                                                   framebuffer->height,
+                                                   hw_render->sample_count,
+                                                   framebuffer->layers,
+                                                   &render_target->rt_dataset);
+         if (result != VK_SUCCESS) {
+            pthread_mutex_unlock(&render_target->mutex);
+            return result;
+         }
+
+         render_target->valid = true;
+      }
+
+      pthread_mutex_unlock(&render_target->mutex);
+   }
+
+   return VK_SUCCESS;
+}
+
+static const struct pvr_renderpass_hwsetup_subpass *
+pvr_get_hw_subpass(const struct pvr_render_pass *pass, const uint32_t subpass)
+{
+   const struct pvr_renderpass_hw_map *map =
+      &pass->hw_setup->subpass_map[subpass];
+
+   return &pass->hw_setup->renders[map->render].subpasses[map->subpass];
+}
+
+static void pvr_perform_start_of_render_attachment_clear(
+   struct pvr_cmd_buffer *cmd_buffer,
+   const struct pvr_framebuffer *framebuffer,
+   uint32_t index,
+   bool is_depth_stencil,
+   uint32_t *index_list_clear_mask)
+{
+   struct pvr_render_pass_info *info = &cmd_buffer->state.render_pass_info;
+   const struct pvr_render_pass *pass = info->pass;
+   const struct pvr_renderpass_hwsetup_render *hw_render;
+   const struct pvr_renderpass_hwsetup *hw_setup;
+   struct pvr_image_view *iview;
+   uint32_t view_idx;
+   uint32_t height;
+   uint32_t width;
+
+   hw_setup = pass->hw_setup;
+   hw_render =
+      &hw_setup->renders[hw_setup->subpass_map[info->subpass_idx].render];
+
+   if (is_depth_stencil) {
+      bool stencil_clear;
+      bool depth_clear;
+      bool is_stencil;
+      bool is_depth;
+
+      assert(hw_render->ds_surface_id != -1);
+      assert(index == 0);
+
+      view_idx = hw_render->ds_surface_id;
+
+      is_depth = vk_format_has_depth(pass->attachments[view_idx].vk_format);
+      is_stencil = vk_format_has_stencil(pass->attachments[view_idx].vk_format);
+      depth_clear = hw_render->depth_init == RENDERPASS_SURFACE_INITOP_CLEAR;
+      stencil_clear = hw_render->stencil_init ==
+                      RENDERPASS_SURFACE_INITOP_CLEAR;
+
+      /* Attempt to clear the ds attachment. Do not erroneously discard an
+       * attachment that has no depth clear but has a stencil attachment.
+       */
+      /* if not (a ∧ c) ∨ (b ∧ d) */
+      if (!((is_depth && depth_clear) || (is_stencil && stencil_clear)))
+         return;
+   } else if (hw_render->color_init[index].op !=
+              RENDERPASS_SURFACE_INITOP_CLEAR) {
+      return;
+   } else {
+      view_idx = hw_render->color_init[index].driver_id;
+   }
+
+   iview = info->attachments[view_idx];
+   width = iview->vk.extent.width;
+   height = iview->vk.extent.height;
+
+   /* FIXME: It would be nice if this function and pvr_sub_cmd_gfx_job_init()
+    * were doing the same check (even if it's just an assert) to determine if a
+    * clear is needed.
+    */
+   /* If this is single-layer fullscreen, we already do the clears in
+    * pvr_sub_cmd_gfx_job_init().
+    */
+   if (info->render_area.offset.x == 0 && info->render_area.offset.y == 0 &&
+       info->render_area.extent.width == width &&
+       info->render_area.extent.height == height && framebuffer->layers == 1) {
+      return;
+   }
+
+   pvr_finishme("Unimplemented path!");
+}
+
+static void
+pvr_perform_start_of_render_clears(struct pvr_cmd_buffer *cmd_buffer)
+{
+   struct pvr_render_pass_info *info = &cmd_buffer->state.render_pass_info;
+   const struct pvr_framebuffer *framebuffer = info->framebuffer;
+   const struct pvr_render_pass *pass = info->pass;
+   const struct pvr_renderpass_hwsetup *hw_setup = pass->hw_setup;
+   const struct pvr_renderpass_hwsetup_render *hw_render;
+
+   /* Mask of attachment clears using index lists instead of background object
+    * to clear.
+    */
+   uint32_t index_list_clear_mask = 0;
+
+   hw_render =
+      &hw_setup->renders[hw_setup->subpass_map[info->subpass_idx].render];
+   if (!hw_render) {
+      info->process_empty_tiles = false;
+      info->enable_bg_tag = false;
+      return;
+   }
+
+   for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
+      pvr_perform_start_of_render_attachment_clear(cmd_buffer,
+                                                   framebuffer,
+                                                   i,
+                                                   false,
+                                                   &index_list_clear_mask);
+   }
+
+   info->enable_bg_tag = !!hw_render->color_init_count;
+
+   /* If we're not using index list for all clears/loads then we need to run
+    * the background object on empty tiles.
+    */
+   if (hw_render->color_init_count &&
+       index_list_clear_mask != ((1u << hw_render->color_init_count) - 1u)) {
+      info->process_empty_tiles = true;
+   } else {
+      info->process_empty_tiles = false;
+   }
+
+   if (hw_render->ds_surface_id != -1) {
+      uint32_t ds_index_list = 0;
+
+      pvr_perform_start_of_render_attachment_clear(cmd_buffer,
+                                                   framebuffer,
+                                                   0,
+                                                   true,
+                                                   &ds_index_list);
+   }
+
+   if (index_list_clear_mask)
+      pvr_finishme("Add support for generating loadops shaders!");
+}
+
+static void pvr_stash_depth_format(struct pvr_cmd_buffer_state *state)
+{
+   const struct pvr_render_pass *pass = state->render_pass_info.pass;
+   const struct pvr_renderpass_hwsetup_render *hw_render =
+      &pass->hw_setup->renders[state->current_sub_cmd->gfx.hw_render_idx];
+
+   if (hw_render->ds_surface_id != -1) {
+      struct pvr_image_view **iviews = state->render_pass_info.attachments;
+
+      state->depth_format = iviews[hw_render->ds_surface_id]->vk.format;
+   }
+}
+
+static bool pvr_loadops_contain_clear(struct pvr_renderpass_hwsetup *hw_setup)
+{
+   for (uint32_t i = 0; i < hw_setup->render_count; i++) {
+      struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i];
+      uint32_t render_targets_count =
+         hw_render->init_setup.render_targets_count;
+
+      for (uint32_t j = 0;
+           j < (hw_render->color_init_count * render_targets_count);
+           j += render_targets_count) {
+         for (uint32_t k = 0; k < hw_render->init_setup.render_targets_count;
+              k++) {
+            if (hw_render->color_init[j + k].op ==
+                RENDERPASS_SURFACE_INITOP_CLEAR) {
+               return true;
+            }
+         }
+      }
+      if (hw_render->depth_init == RENDERPASS_SURFACE_INITOP_CLEAR ||
+          hw_render->stencil_init == RENDERPASS_SURFACE_INITOP_CLEAR) {
+         return true;
+      }
+   }
+
+   return false;
+}
+
+static VkResult
+pvr_cmd_buffer_set_clear_values(struct pvr_cmd_buffer *cmd_buffer,
+                                const VkRenderPassBeginInfo *pRenderPassBegin)
+{
+   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+
+   /* Free any previously allocated clear values. */
+   vk_free(&cmd_buffer->vk.pool->alloc, state->render_pass_info.clear_values);
+
+   if (pRenderPassBegin->clearValueCount) {
+      const size_t size = pRenderPassBegin->clearValueCount *
+                          sizeof(*state->render_pass_info.clear_values);
+
+      state->render_pass_info.clear_values =
+         vk_zalloc(&cmd_buffer->vk.pool->alloc,
+                   size,
+                   8,
+                   VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+      if (!state->render_pass_info.clear_values) {
+         state->status = vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY);
+         return state->status;
+      }
+
+      memcpy(state->render_pass_info.clear_values,
+             pRenderPassBegin->pClearValues,
+             size);
+   } else {
+      state->render_pass_info.clear_values = NULL;
+   }
+
+   state->render_pass_info.clear_value_count =
+      pRenderPassBegin->clearValueCount;
+
+   return VK_SUCCESS;
+}
+
+void pvr_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
+                             const VkRenderPassBeginInfo *pRenderPassBeginInfo,
+                             const VkSubpassBeginInfoKHR *pSubpassBeginInfo)
+{
+   PVR_FROM_HANDLE(pvr_framebuffer,
+                   framebuffer,
+                   pRenderPassBeginInfo->framebuffer);
+   PVR_FROM_HANDLE(pvr_render_pass, pass, pRenderPassBeginInfo->renderPass);
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   const struct pvr_renderpass_hwsetup_subpass *hw_subpass;
+   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+   VkResult result;
+
+   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+   assert(!state->render_pass_info.pass);
+   assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
+
+   /* FIXME: Create a separate function for everything using pass->subpasses,
+    * look at cmd_buffer_begin_subpass() for example. */
+   state->render_pass_info.pass = pass;
+   state->render_pass_info.framebuffer = framebuffer;
+   state->render_pass_info.subpass_idx = 0;
+   state->render_pass_info.render_area = pRenderPassBeginInfo->renderArea;
+   state->render_pass_info.current_hw_subpass = 0;
+   state->render_pass_info.pipeline_bind_point =
+      pass->subpasses[0].pipeline_bind_point;
+   state->render_pass_info.userpass_spawn = pass->subpasses[0].userpass_spawn;
+   state->dirty.userpass_spawn = true;
+
+   result = pvr_cmd_buffer_setup_attachments(cmd_buffer, pass, framebuffer);
+   if (result != VK_SUCCESS)
+      return;
+
+   state->status =
+      pvr_init_render_targets(cmd_buffer->device, pass, framebuffer);
+   if (state->status != VK_SUCCESS)
+      return;
+
+   result = pvr_cmd_buffer_set_clear_values(cmd_buffer, pRenderPassBeginInfo);
+   if (result != VK_SUCCESS)
+      return;
+
+   assert(pass->subpasses[0].pipeline_bind_point ==
+          VK_PIPELINE_BIND_POINT_GRAPHICS);
+
+   result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS);
+   if (result != VK_SUCCESS)
+      return;
+
+   /* Run subpass 0 "soft" background object after the actual background
+    * object.
+    */
+   hw_subpass = pvr_get_hw_subpass(pass, 0);
+   if (hw_subpass->client_data)
+      pvr_finishme("Unimplemented path!");
+
+   pvr_perform_start_of_render_clears(cmd_buffer);
+   pvr_stash_depth_format(&cmd_buffer->state);
+
+   if (!pvr_loadops_contain_clear(pass->hw_setup)) {
+      state->dynamic.scissor_accum_state = PVR_SCISSOR_ACCUM_CHECK_FOR_CLEAR;
+      state->dynamic.scissor_accum_bounds.offset.x = 0;
+      state->dynamic.scissor_accum_bounds.offset.y = 0;
+      state->dynamic.scissor_accum_bounds.extent.width = 0;
+      state->dynamic.scissor_accum_bounds.extent.height = 0;
+   } else {
+      state->dynamic.scissor_accum_state = PVR_SCISSOR_ACCUM_DISABLED;
+   }
+}
+
+static void pvr_cmd_buffer_reset(struct pvr_cmd_buffer *cmd_buffer)
+{
+   if (cmd_buffer->status != PVR_CMD_BUFFER_STATUS_INITIAL) {
+      /* FIXME: For now we always free all resources as if
+       * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT was set.
+       */
+      pvr_cmd_buffer_free_sub_cmds(cmd_buffer);
+
+      list_for_each_entry_safe (struct pvr_bo, bo, &cmd_buffer->bo_list, link) {
+         list_del(&bo->link);
+         pvr_bo_free(cmd_buffer->device, bo);
+      }
+
+      util_dynarray_clear(&cmd_buffer->scissor_array);
+      util_dynarray_clear(&cmd_buffer->depth_bias_array);
+
+      cmd_buffer->state.status = VK_SUCCESS;
+      cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INITIAL;
+   }
+}
+
+VkResult pvr_BeginCommandBuffer(VkCommandBuffer commandBuffer,
+                                const VkCommandBufferBeginInfo *pBeginInfo)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   struct pvr_cmd_buffer_state *state;
+   VkResult result;
+
+   pvr_cmd_buffer_reset(cmd_buffer);
+
+   cmd_buffer->usage_flags = pBeginInfo->flags;
+   state = &cmd_buffer->state;
+
+   /* VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT must be ignored for
+    * primary level command buffers.
+    *
+    * From the Vulkan 1.0 spec:
+    *
+    *    VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT specifies that a
+    *    secondary command buffer is considered to be entirely inside a render
+    *    pass. If this is a primary command buffer, then this bit is ignored.
+    */
+   if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
+      cmd_buffer->usage_flags &=
+         ~VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
+   }
+
+   if (cmd_buffer->usage_flags &
+       VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
+      const VkCommandBufferInheritanceInfo *inheritance_info =
+         pBeginInfo->pInheritanceInfo;
+      struct pvr_render_pass *pass;
+
+      pass = pvr_render_pass_from_handle(inheritance_info->renderPass);
+      state->render_pass_info.pass = pass;
+      state->render_pass_info.framebuffer =
+         pvr_framebuffer_from_handle(inheritance_info->framebuffer);
+      state->render_pass_info.subpass_idx = inheritance_info->subpass;
+      state->render_pass_info.userpass_spawn =
+         pass->subpasses[inheritance_info->subpass].userpass_spawn;
+
+      result =
+         pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
+   memset(state->barriers_needed,
+          0xFF,
+          sizeof(*state->barriers_needed) * ARRAY_SIZE(state->barriers_needed));
+
+   cmd_buffer->status = PVR_CMD_BUFFER_STATUS_RECORDING;
+
+   return VK_SUCCESS;
+}
+
+VkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer,
+                                         struct pvr_transfer_cmd *transfer_cmd)
+{
+   VkResult result;
+
+   result = pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_TRANSFER);
+   if (result != VK_SUCCESS)
+      return result;
+
+   list_addtail(&transfer_cmd->link,
+                &cmd_buffer->state.current_sub_cmd->transfer.transfer_cmds);
+
+   return VK_SUCCESS;
+}
+
+void pvr_CmdDispatch(VkCommandBuffer commandBuffer,
+                     uint32_t groupCountX,
+                     uint32_t groupCountY,
+                     uint32_t groupCountZ)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
+                             VkBuffer _buffer,
+                             VkDeviceSize offset)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdDraw(VkCommandBuffer commandBuffer,
+                 uint32_t vertexCount,
+                 uint32_t instanceCount,
+                 uint32_t firstVertex,
+                 uint32_t firstInstance)
+{
+   assert(!"Unimplemented");
+}
+
+static void
+pvr_update_draw_state(struct pvr_cmd_buffer_state *const state,
+                      const struct pvr_cmd_buffer_draw_state *const draw_state)
+{
+   /* We don't have a state to tell us that base_instance is being used so it
+    * gets used as a boolean - 0 means we'll use a pds program that skips the
+    * base instance addition. If the base_instance gets used (and the last
+    * draw's base_instance was 0) then we switch to the BASE_INSTANCE attrib
+    * program.
+    *
+    * If base_instance changes then we only need to update the data section.
+    *
+    * The only draw call state that doesn't really matter is the start vertex
+    * as that is handled properly in the VDM state in all cases.
+    */
+   if ((state->draw_state.draw_indexed != draw_state->draw_indexed) ||
+       (state->draw_state.draw_indirect != draw_state->draw_indirect) ||
+       (state->draw_state.base_instance == 0 &&
+        draw_state->base_instance != 0)) {
+      state->dirty.draw_variant = true;
+   } else if (state->draw_state.base_instance != draw_state->base_instance) {
+      state->dirty.draw_base_instance = true;
+   }
+
+   state->draw_state = *draw_state;
+}
+
+static uint32_t pvr_calc_shared_regs_count(
+   const struct pvr_graphics_pipeline *const gfx_pipeline)
+{
+   const struct pvr_pipeline_stage_state *const vertex_state =
+      &gfx_pipeline->vertex_shader_state.stage_state;
+   uint32_t shared_regs = vertex_state->const_shared_reg_count +
+                          vertex_state->const_shared_reg_offset;
+
+   if (gfx_pipeline->fragment_shader_state.bo) {
+      const struct pvr_pipeline_stage_state *const fragment_state =
+         &gfx_pipeline->fragment_shader_state.stage_state;
+      uint32_t fragment_regs = fragment_state->const_shared_reg_count +
+                               fragment_state->const_shared_reg_offset;
+
+      shared_regs = MAX2(shared_regs, fragment_regs);
+   }
+
+   return shared_regs;
+}
+
+#define PVR_WRITE(_buffer, _value, _offset, _max)                \
+   do {                                                          \
+      __typeof__(_value) __value = _value;                       \
+      uint64_t __offset = _offset;                               \
+      uint32_t __nr_dwords = sizeof(__value) / sizeof(uint32_t); \
+      static_assert(__same_type(*_buffer, __value),              \
+                    "Buffer and value type mismatch");           \
+      assert((__offset + __nr_dwords) <= (_max));                \
+      assert((__offset % __nr_dwords) == 0U);                    \
+      _buffer[__offset / __nr_dwords] = __value;                 \
+   } while (0)
+
+static VkResult
+pvr_setup_vertex_buffers(struct pvr_cmd_buffer *cmd_buffer,
+                         const struct pvr_graphics_pipeline *const gfx_pipeline)
+{
+   const struct pvr_vertex_shader_state *const vertex_state =
+      &gfx_pipeline->vertex_shader_state;
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+   const struct pvr_pds_info *const pds_info = state->pds_shader.info;
+   const uint8_t *entries;
+   uint32_t *dword_buffer;
+   uint64_t *qword_buffer;
+   struct pvr_bo *pvr_bo;
+   VkResult result;
+
+   result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
+                                     cmd_buffer->device->heaps.pds_heap,
+                                     pds_info->data_size_in_dwords,
+                                     PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+                                     &pvr_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   dword_buffer = (uint32_t *)pvr_bo->bo->map;
+   qword_buffer = (uint64_t *)pvr_bo->bo->map;
+
+   entries = (uint8_t *)pds_info->entries;
+
+   for (uint32_t i = 0; i < pds_info->entry_count; i++) {
+      const struct pvr_const_map_entry *const entry_header =
+         (struct pvr_const_map_entry *)entries;
+
+      switch (entry_header->type) {
+      case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: {
+         const struct pvr_const_map_entry_literal32 *const literal =
+            (struct pvr_const_map_entry_literal32 *)entries;
+
+         PVR_WRITE(dword_buffer,
+                   literal->literal_value,
+                   literal->const_offset,
+                   pds_info->data_size_in_dwords);
+
+         entries += sizeof(*literal);
+         break;
+      }
+
+      case PVR_PDS_CONST_MAP_ENTRY_TYPE_DOUTU_ADDRESS: {
+         const struct pvr_const_map_entry_doutu_address *const doutu_addr =
+            (struct pvr_const_map_entry_doutu_address *)entries;
+         pvr_dev_addr_t exec_addr = vertex_state->bo->vma->dev_addr;
+         uint64_t addr = 0ULL;
+
+         exec_addr.addr += vertex_state->entry_offset;
+         pvr_set_usc_execution_address64(&addr, exec_addr.addr);
+
+         PVR_WRITE(qword_buffer,
+                   addr | doutu_addr->doutu_control,
+                   doutu_addr->const_offset,
+                   pds_info->data_size_in_dwords);
+
+         entries += sizeof(*doutu_addr);
+         break;
+      }
+
+      case PVR_PDS_CONST_MAP_ENTRY_TYPE_BASE_INSTANCE: {
+         const struct pvr_const_map_entry_base_instance *const base_instance =
+            (struct pvr_const_map_entry_base_instance *)entries;
+
+         PVR_WRITE(dword_buffer,
+                   state->draw_state.base_instance,
+                   base_instance->const_offset,
+                   pds_info->data_size_in_dwords);
+
+         entries += sizeof(*base_instance);
+         break;
+      }
+
+      case PVR_PDS_CONST_MAP_ENTRY_TYPE_VERTEX_ATTRIBUTE_ADDRESS: {
+         const struct pvr_const_map_entry_vertex_attribute_address
+            *const attribute =
+               (struct pvr_const_map_entry_vertex_attribute_address *)entries;
+         const struct pvr_vertex_binding *const binding =
+            &state->vertex_bindings[attribute->binding_index];
+         uint64_t addr = binding->buffer->dev_addr.addr;
+
+         addr += binding->offset;
+         addr += attribute->offset;
+
+         PVR_WRITE(qword_buffer,
+                   addr,
+                   attribute->const_offset,
+                   pds_info->data_size_in_dwords);
+
+         entries += sizeof(*attribute);
+         break;
+      }
+
+      default:
+         unreachable("Unsupported data section map");
+         break;
+      }
+   }
+
+   state->pds_vertex_attrib_offset =
+      pvr_bo->vma->dev_addr.addr -
+      cmd_buffer->device->heaps.pds_heap->base_addr.addr;
+
+   pvr_bo_cpu_unmap(cmd_buffer->device, pvr_bo);
+
+   return VK_SUCCESS;
+}
+
+static VkResult pvr_setup_descriptor_mappings(
+   struct pvr_cmd_buffer *const cmd_buffer,
+   enum pvr_stage_allocation stage,
+   const struct pvr_stage_allocation_uniform_state *uniform_state,
+   uint32_t *const uniform_data_offset_out)
+{
+   const struct pvr_pds_info *const pds_info = &uniform_state->pds_info;
+   const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+   const struct pvr_descriptor_state *desc_state;
+   const uint8_t *entries;
+   uint32_t *dword_buffer;
+   uint64_t *qword_buffer;
+   struct pvr_bo *pvr_bo;
+   VkResult result;
+
+   if (!pds_info->data_size_in_dwords)
+      return VK_SUCCESS;
+
+   result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
+                                     cmd_buffer->device->heaps.pds_heap,
+                                     pds_info->data_size_in_dwords,
+                                     PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+                                     &pvr_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   dword_buffer = (uint32_t *)pvr_bo->bo->map;
+   qword_buffer = (uint64_t *)pvr_bo->bo->map;
+
+   entries = (uint8_t *)pds_info->entries;
+
+   switch (stage) {
+   case PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY:
+   case PVR_STAGE_ALLOCATION_FRAGMENT:
+      desc_state = &cmd_buffer->state.gfx_desc_state;
+      break;
+
+   case PVR_STAGE_ALLOCATION_COMPUTE:
+      desc_state = &cmd_buffer->state.compute_desc_state;
+      break;
+
+   default:
+      unreachable("Unsupported stage.");
+      break;
+   }
+
+   for (uint32_t i = 0; i < pds_info->entry_count; i++) {
+      const struct pvr_const_map_entry *const entry_header =
+         (struct pvr_const_map_entry *)entries;
+
+      switch (entry_header->type) {
+      case PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32: {
+         const struct pvr_const_map_entry_literal32 *const literal =
+            (struct pvr_const_map_entry_literal32 *)entries;
+
+         PVR_WRITE(dword_buffer,
+                   literal->literal_value,
+                   literal->const_offset,
+                   pds_info->data_size_in_dwords);
+
+         entries += sizeof(*literal);
+         break;
+      }
+
+      case PVR_PDS_CONST_MAP_ENTRY_TYPE_CONSTANT_BUFFER: {
+         const struct pvr_const_map_entry_constant_buffer *const_buffer_entry =
+            (struct pvr_const_map_entry_constant_buffer *)entries;
+         const uint32_t desc_set = const_buffer_entry->desc_set;
+         const uint32_t binding = const_buffer_entry->binding;
+         const struct pvr_descriptor_set *descriptor_set;
+         const struct pvr_descriptor *descriptor;
+         pvr_dev_addr_t buffer_addr;
+
+         /* TODO: Handle push descriptors. */
+
+         assert(desc_set < PVR_MAX_DESCRIPTOR_SETS);
+         descriptor_set = state->gfx_desc_state.descriptor_sets[desc_set];
+
+         /* TODO: Handle dynamic buffers. */
+         descriptor = &descriptor_set->descriptors[binding];
+         assert(descriptor->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
+
+         assert(descriptor->buffer_desc_range ==
+                const_buffer_entry->size_in_dwords * sizeof(uint32_t));
+         assert(descriptor->buffer_create_info_size ==
+                const_buffer_entry->size_in_dwords * sizeof(uint32_t));
+
+         buffer_addr = descriptor->buffer_dev_addr;
+         buffer_addr.addr += const_buffer_entry->offset * sizeof(uint32_t);
+
+         PVR_WRITE(qword_buffer,
+                   buffer_addr.addr,
+                   const_buffer_entry->const_offset,
+                   pds_info->data_size_in_dwords);
+
+         entries += sizeof(*const_buffer_entry);
+         break;
+      }
+
+      case PVR_PDS_CONST_MAP_ENTRY_TYPE_DESCRIPTOR_SET: {
+         const struct pvr_const_map_entry_descriptor_set *desc_set_entry =
+            (struct pvr_const_map_entry_descriptor_set *)entries;
+         const uint32_t desc_set_num = desc_set_entry->descriptor_set;
+         const struct pvr_descriptor_set *descriptor_set;
+         pvr_dev_addr_t desc_set_addr;
+
+         assert(desc_set_num < PVR_MAX_DESCRIPTOR_SETS);
+
+         /* TODO: Remove this when the compiler provides us with usage info?
+          */
+         /* We skip DMAing unbound descriptor sets. */
+         if (!(desc_state->valid_mask & BITFIELD_BIT(desc_set_num))) {
+            const struct pvr_const_map_entry_literal32 *literal;
+            uint32_t zero_literal_value;
+
+            entries += sizeof(*desc_set_entry);
+            literal = (struct pvr_const_map_entry_literal32 *)entries;
+
+            /* TODO: Is there any guarantee that a literal will follow the
+             * descriptor set entry?
+             */
+            assert(literal->type == PVR_PDS_CONST_MAP_ENTRY_TYPE_LITERAL32);
+
+            /* We zero out the DMA size so the DMA isn't performed. */
+            zero_literal_value =
+               literal->literal_value &
+               PVR_ROGUE_PDSINST_DOUT_FIELDS_DOUTD_SRC1_BSIZE_CLRMSK;
+
+            PVR_WRITE(qword_buffer,
+                      0UL,
+                      desc_set_entry->const_offset,
+                      pds_info->data_size_in_dwords);
+
+            PVR_WRITE(dword_buffer,
+                      zero_literal_value,
+                      desc_set_entry->const_offset,
+                      pds_info->data_size_in_dwords);
+
+            entries += sizeof(*literal);
+            i++;
+            continue;
+         }
+
+         descriptor_set = desc_state->descriptor_sets[desc_set_num];
+
+         pvr_finishme("Handle push descriptor entry.");
+
+         desc_set_addr = descriptor_set->pvr_bo->vma->dev_addr;
+
+         if (desc_set_entry->primary) {
+            desc_set_addr.addr +=
+               descriptor_set->layout->memory_layout_in_dwords_per_stage[stage]
+                  .primary_offset
+               << 2U;
+         } else {
+            desc_set_addr.addr +=
+               descriptor_set->layout->memory_layout_in_dwords_per_stage[stage]
+                  .secondary_offset
+               << 2U;
+         }
+
+         desc_set_addr.addr += (uint64_t)desc_set_entry->offset_in_dwords << 2U;
+
+         PVR_WRITE(qword_buffer,
+                   desc_set_addr.addr,
+                   desc_set_entry->const_offset,
+                   pds_info->data_size_in_dwords);
+
+         entries += sizeof(*desc_set_entry);
+         break;
+      }
+
+      default:
+         unreachable("Unsupported map entry type.");
+      }
+   }
+
+   pvr_bo_cpu_unmap(cmd_buffer->device, pvr_bo);
+
+   *uniform_data_offset_out =
+      pvr_bo->vma->dev_addr.addr -
+      cmd_buffer->device->heaps.pds_heap->base_addr.addr;
+
+   return VK_SUCCESS;
+}
+
+#undef PVR_WRITE
+
+static void
+pvr_emit_dirty_pds_state(const struct pvr_cmd_buffer *const cmd_buffer,
+                         const uint32_t pds_vertex_uniform_data_offset)
+{
+   const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+   const struct pvr_stage_allocation_uniform_state *const vertex_uniform_state =
+      &state->gfx_pipeline->vertex_shader_state.uniform_state;
+   const struct pvr_pipeline_stage_state *const vertex_stage_state =
+      &state->gfx_pipeline->vertex_shader_state.stage_state;
+   struct pvr_csb *const csb = &state->current_sub_cmd->gfx.control_stream;
+
+   if (!vertex_uniform_state->pds_info.code_size_in_dwords)
+      return;
+
+   pvr_csb_emit (csb, VDMCTRL_PDS_STATE0, state0) {
+      state0.usc_target = PVRX(VDMCTRL_USC_TARGET_ALL);
+
+      state0.usc_common_size =
+         DIV_ROUND_UP(vertex_stage_state->const_shared_reg_count << 2,
+                      PVRX(VDMCTRL_PDS_STATE0_USC_COMMON_SIZE_UNIT_SIZE));
+
+      state0.pds_data_size =
+         DIV_ROUND_UP(vertex_uniform_state->pds_info.data_size_in_dwords << 2,
+                      PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE));
+   }
+
+   pvr_csb_emit (csb, VDMCTRL_PDS_STATE1, state1) {
+      state1.pds_data_addr.addr = pds_vertex_uniform_data_offset;
+      state1.sd_type = PVRX(VDMCTRL_SD_TYPE_NONE);
+   }
+
+   pvr_csb_emit (csb, VDMCTRL_PDS_STATE2, state2) {
+      state2.pds_code_addr.addr = vertex_uniform_state->pds_code.code_offset;
+   }
+}
+
+static void pvr_setup_output_select(struct pvr_cmd_buffer *const cmd_buffer)
+{
+   struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
+   const struct pvr_graphics_pipeline *const gfx_pipeline =
+      cmd_buffer->state.gfx_pipeline;
+   struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
+   const struct pvr_vertex_shader_state *const vertex_state =
+      &gfx_pipeline->vertex_shader_state;
+   uint32_t output_selects;
+
+   /* TODO: Handle vertex and fragment shader state flags. */
+
+   pvr_csb_pack (&output_selects, TA_OUTPUT_SEL, state) {
+      const VkPrimitiveTopology topology =
+         gfx_pipeline->input_asm_state.topology;
+
+      state.rhw_pres = true;
+      state.vtxsize = DIV_ROUND_UP(vertex_state->vertex_output_size, 4U);
+      state.psprite_size_pres = (topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST);
+   }
+
+   if (ppp_state->output_selects != output_selects) {
+      ppp_state->output_selects = output_selects;
+      emit_state->output_selects = true;
+   }
+
+   if (ppp_state->varying_word[0] != vertex_state->varying[0]) {
+      ppp_state->varying_word[0] = vertex_state->varying[0];
+      emit_state->varying_word0 = true;
+   }
+
+   if (ppp_state->varying_word[1] != vertex_state->varying[1]) {
+      ppp_state->varying_word[1] = vertex_state->varying[1];
+      emit_state->varying_word1 = true;
+   }
+}
+
+/* clang-format off */
+static enum PVRX(TA_OBJTYPE)
+pvr_ppp_state_get_ispa_objtype_from_vk(const VkPrimitiveTopology topology)
+/* clang-format on */
+{
+   switch (topology) {
+   case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
+      return PVRX(TA_OBJTYPE_SPRITE_01UV);
+
+   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
+   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
+   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+      return PVRX(TA_OBJTYPE_LINE);
+
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+      return PVRX(TA_OBJTYPE_TRIANGLE);
+
+   default:
+      unreachable("Invalid topology.");
+      return 0;
+   }
+}
+
+static void pvr_setup_isp_faces_and_control(
+   struct pvr_cmd_buffer *const cmd_buffer,
+   struct pvr_cmd_struct(TA_STATE_ISPA) *const ispa_out)
+{
+   struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
+   const struct pvr_graphics_pipeline *const gfx_pipeline =
+      cmd_buffer->state.gfx_pipeline;
+   struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
+   const struct pvr_dynamic_state *const dynamic_state =
+      &cmd_buffer->state.dynamic.common;
+   const struct pvr_render_pass_info *const pass_info =
+      &cmd_buffer->state.render_pass_info;
+   const uint32_t subpass_idx = pass_info->subpass_idx;
+   const uint32_t *depth_stencil_attachment_idx =
+      pass_info->pass->subpasses[subpass_idx].depth_stencil_attachment;
+   const struct pvr_image_view *const attachment =
+      (!depth_stencil_attachment_idx)
+         ? NULL
+         : pass_info->attachments[*depth_stencil_attachment_idx];
+
+   const VkCullModeFlags cull_mode = gfx_pipeline->raster_state.cull_mode;
+   const bool raster_discard_enabled =
+      gfx_pipeline->raster_state.discard_enable;
+   const bool disable_all = raster_discard_enabled || !attachment;
+
+   const VkPrimitiveTopology topology = gfx_pipeline->input_asm_state.topology;
+   const enum PVRX(TA_OBJTYPE)
+      obj_type = pvr_ppp_state_get_ispa_objtype_from_vk(topology);
+
+   const bool disable_stencil_write = disable_all;
+   const bool disable_stencil_test =
+      disable_all || !vk_format_has_stencil(attachment->vk.format);
+
+   const bool disable_depth_write = disable_all;
+   const bool disable_depth_test = disable_all ||
+                                   !vk_format_has_depth(attachment->vk.format);
+
+   uint32_t ispb_stencil_off;
+   bool is_two_sided = false;
+   uint32_t isp_control;
+
+   uint32_t line_width;
+   uint32_t common_a;
+   uint32_t front_a;
+   uint32_t front_b;
+   uint32_t back_a;
+   uint32_t back_b;
+
+   /* Convert to 4.4 fixed point format. */
+   line_width = util_unsigned_fixed(dynamic_state->line_width, 4);
+
+   /* Subtract 1 to shift values from range [0=0,256=16] to [0=1/16,255=16].
+    * If 0 it stays at 0, otherwise we subtract 1.
+    */
+   line_width = (!!line_width) * (line_width - 1);
+
+   line_width = MIN2(line_width, PVRX(TA_STATE_ISPA_POINTLINEWIDTH_SIZE_MAX));
+
+   /* TODO: Part of the logic in this function is duplicated in another part
+    * of the code. E.g. the dcmpmode, and sop1/2/3. Could we do this earlier?
+    */
+
+   pvr_csb_pack (&common_a, TA_STATE_ISPA, ispa) {
+      ispa.pointlinewidth = line_width;
+
+      if (disable_depth_test)
+         ispa.dcmpmode = PVRX(TA_CMPMODE_ALWAYS);
+      else
+         ispa.dcmpmode = gfx_pipeline->depth_compare_op;
+
+      /* FIXME: Can we just have this and remove the assignment above?
+       * The user provides a depthTestEnable at vkCreateGraphicsPipelines()
+       * should we be using that?
+       */
+      ispa.dcmpmode |= gfx_pipeline->depth_compare_op;
+
+      ispa.dwritedisable = disable_depth_test || disable_depth_write;
+      /* FIXME: Can we just have this and remove the assignment above? */
+      ispa.dwritedisable = ispa.dwritedisable ||
+                           gfx_pipeline->depth_write_disable;
+
+      ispa.passtype = gfx_pipeline->fragment_shader_state.pass_type;
+
+      ispa.objtype = obj_type;
+
+      /* Return unpacked ispa structure. dcmpmode, dwritedisable, passtype and
+       * objtype are needed by pvr_setup_triangle_merging_flag.
+       */
+      if (ispa_out)
+         *ispa_out = ispa;
+   }
+
+   /* FIXME: This logic should be redone and improved. Can we also get rid of
+    * the front and back variants?
+    */
+
+   pvr_csb_pack (&front_a, TA_STATE_ISPA, ispa) {
+      ispa.sref = (!disable_stencil_test) * dynamic_state->reference.front;
+   }
+   front_a |= common_a;
+
+   pvr_csb_pack (&back_a, TA_STATE_ISPA, ispa) {
+      ispa.sref = (!disable_stencil_test) * dynamic_state->compare_mask.back;
+   }
+   back_a |= common_a;
+
+   /* TODO: Does this actually represent the ispb control word on stencil off?
+    * If not, rename the variable.
+    */
+   pvr_csb_pack (&ispb_stencil_off, TA_STATE_ISPB, ispb) {
+      ispb.sop3 = PVRX(TA_ISPB_STENCILOP_KEEP);
+      ispb.sop2 = PVRX(TA_ISPB_STENCILOP_KEEP);
+      ispb.sop1 = PVRX(TA_ISPB_STENCILOP_KEEP);
+      ispb.scmpmode = PVRX(TA_CMPMODE_ALWAYS);
+   }
+
+   if (disable_stencil_test) {
+      back_b = front_b = ispb_stencil_off;
+   } else {
+      pvr_csb_pack (&front_b, TA_STATE_ISPB, ispb) {
+         ispb.swmask =
+            (!disable_stencil_write) * dynamic_state->write_mask.front;
+         ispb.scmpmask = dynamic_state->compare_mask.front;
+
+         ispb.sop3 = gfx_pipeline->stencil_front.pass_op;
+         ispb.sop2 = gfx_pipeline->stencil_front.depth_fail_op;
+         ispb.sop1 = gfx_pipeline->stencil_front.fail_op;
+
+         ispb.scmpmode = gfx_pipeline->stencil_front.compare_op;
+      }
+
+      pvr_csb_pack (&back_b, TA_STATE_ISPB, ispb) {
+         ispb.swmask =
+            (!disable_stencil_write) * dynamic_state->write_mask.back;
+         ispb.scmpmask = dynamic_state->compare_mask.back;
+
+         ispb.sop3 = gfx_pipeline->stencil_back.pass_op;
+         ispb.sop2 = gfx_pipeline->stencil_back.depth_fail_op;
+         ispb.sop1 = gfx_pipeline->stencil_back.fail_op;
+
+         ispb.scmpmode = gfx_pipeline->stencil_back.compare_op;
+      }
+   }
+
+   if (front_a != back_a || front_b != back_b) {
+      if (cull_mode & VK_CULL_MODE_BACK_BIT) {
+         /* Single face, using front state. */
+      } else if (cull_mode & VK_CULL_MODE_FRONT_BIT) {
+         /* Single face, using back state. */
+
+         front_a = back_a;
+         front_b = back_b;
+      } else {
+         /* Both faces. */
+
+         emit_state->isp_ba = is_two_sided = true;
+
+         if (gfx_pipeline->raster_state.front_face ==
+             VK_FRONT_FACE_COUNTER_CLOCKWISE) {
+            uint32_t tmp = front_a;
+
+            front_a = back_a;
+            back_a = tmp;
+
+            tmp = front_b;
+            front_b = back_b;
+            back_b = tmp;
+         }
+
+         /* HW defaults to stencil off. */
+         if (back_b != ispb_stencil_off)
+            emit_state->isp_fb = emit_state->isp_bb = true;
+      }
+   }
+
+   if (!disable_stencil_test && front_b != ispb_stencil_off)
+      emit_state->isp_fb = true;
+
+   pvr_csb_pack (&isp_control, TA_STATE_ISPCTL, ispctl) {
+      ispctl.upass = pass_info->userpass_spawn;
+
+      /* TODO: is bo ever NULL? Figure out what to do. */
+      ispctl.tagwritedisable = raster_discard_enabled ||
+                               !gfx_pipeline->fragment_shader_state.bo;
+
+      ispctl.two_sided = is_two_sided;
+      ispctl.bpres = emit_state->isp_fb || emit_state->isp_bb;
+
+      ispctl.dbenable = !raster_discard_enabled &&
+                        gfx_pipeline->raster_state.depth_bias_enable &&
+                        obj_type == PVRX(TA_OBJTYPE_TRIANGLE);
+      ispctl.scenable = !raster_discard_enabled;
+
+      ppp_state->isp.control_struct = ispctl;
+   }
+
+   emit_state->isp = true;
+
+   ppp_state->isp.control = isp_control;
+   ppp_state->isp.front_a = front_a;
+   ppp_state->isp.front_b = front_b;
+   ppp_state->isp.back_a = back_a;
+   ppp_state->isp.back_b = back_b;
+}
+
+static void pvr_get_viewport_scissor_overlap(const VkViewport *const viewport,
+                                             const VkRect2D *const scissor,
+                                             VkRect2D *const rect_out)
+{
+   /* TODO: See if we can remove this struct. */
+   struct pvr_rect {
+      int32_t x0, y0;
+      int32_t x1, y1;
+   };
+
+   /* TODO: Worry about overflow? */
+   const struct pvr_rect scissor_rect = {
+      .x0 = scissor->offset.x,
+      .y0 = scissor->offset.y,
+      .x1 = scissor->offset.x + scissor->extent.width,
+      .y1 = scissor->offset.y + scissor->extent.height
+   };
+   struct pvr_rect viewport_rect = { 0 };
+
+   assert(viewport->width >= 0.0f);
+   assert(scissor_rect.x0 >= 0);
+   assert(scissor_rect.y0 >= 0);
+
+   if (scissor->extent.width == 0 || scissor->extent.height == 0) {
+      *rect_out = (VkRect2D){ 0 };
+      return;
+   }
+
+   viewport_rect.x0 = (int32_t)viewport->x;
+   viewport_rect.x1 = (int32_t)viewport->x + (int32_t)viewport->width;
+
+   /* TODO: Is there a mathematical way of doing all this and then clamp at
+    * the end?
+    */
+   /* We flip the y0 and y1 when height is negative. */
+   viewport_rect.y0 = (int32_t)viewport->y + MIN2(0, (int32_t)viewport->height);
+   viewport_rect.y1 = (int32_t)viewport->y + MAX2(0, (int32_t)viewport->height);
+
+   if (scissor_rect.x1 <= viewport_rect.x0 ||
+       scissor_rect.y1 <= viewport_rect.y0 ||
+       scissor_rect.x0 >= viewport_rect.x1 ||
+       scissor_rect.y0 >= viewport_rect.y1) {
+      *rect_out = (VkRect2D){ 0 };
+      return;
+   }
+
+   /* Determine the overlapping rectangle. */
+   viewport_rect.x0 = MAX2(viewport_rect.x0, scissor_rect.x0);
+   viewport_rect.y0 = MAX2(viewport_rect.y0, scissor_rect.y0);
+   viewport_rect.x1 = MIN2(viewport_rect.x1, scissor_rect.x1);
+   viewport_rect.y1 = MIN2(viewport_rect.y1, scissor_rect.y1);
+
+   /* TODO: Is this conversion safe? Is this logic right? */
+   rect_out->offset.x = (uint32_t)viewport_rect.x0;
+   rect_out->offset.y = (uint32_t)viewport_rect.y0;
+   rect_out->extent.height = (uint32_t)(viewport_rect.y1 - viewport_rect.y0);
+   rect_out->extent.width = (uint32_t)(viewport_rect.x1 - viewport_rect.x0);
+}
+
+static inline uint32_t
+pvr_get_geom_region_clip_align_size(struct pvr_device_info *const dev_info)
+{
+   /* TODO: This should come from rogue_ppp.xml. */
+   return 16U + 16U * (!PVR_HAS_FEATURE(dev_info, tile_size_16x16));
+}
+
+/* FIXME: Remove device param when PVR_HAS_FEATURE() accepts const dev_info */
+static void
+pvr_setup_isp_depth_bias_scissor_state(struct pvr_cmd_buffer *const cmd_buffer)
+{
+   struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
+   struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
+   const struct pvr_dynamic_state *const dynamic_state =
+      &cmd_buffer->state.dynamic.common;
+   const struct pvr_cmd_struct(TA_STATE_ISPCTL) *const ispctl =
+      &ppp_state->isp.control_struct;
+   struct pvr_device_info *const dev_info =
+      &cmd_buffer->device->pdevice->dev_info;
+
+   if (ispctl->dbenable)
+      assert(!"Unimplemented");
+
+   if (ispctl->scenable) {
+      const uint32_t region_clip_align_size =
+         pvr_get_geom_region_clip_align_size(dev_info);
+      const VkViewport *const viewport = &dynamic_state->viewport.viewports[0];
+      const VkRect2D *const scissor = &dynamic_state->scissor.scissors[0];
+      VkRect2D overlap_rect;
+      uint32_t scissor_words[2];
+      uint32_t height;
+      uint32_t width;
+      uint32_t x;
+      uint32_t y;
+
+      /* For region clip. */
+      uint32_t bottom;
+      uint32_t right;
+      uint32_t left;
+      uint32_t top;
+
+      /* We don't support multiple viewport calculations. */
+      assert(dynamic_state->viewport.count == 1);
+      /* We don't support multiple scissor calculations. */
+      assert(dynamic_state->scissor.count == 1);
+
+      pvr_get_viewport_scissor_overlap(viewport, scissor, &overlap_rect);
+
+      x = overlap_rect.offset.x;
+      y = overlap_rect.offset.y;
+      width = overlap_rect.extent.width;
+      height = overlap_rect.extent.height;
+
+      pvr_csb_pack (&scissor_words[0], IPF_SCISSOR_WORD_0, word0) {
+         word0.scw0_xmax = x + width;
+         word0.scw0_xmin = x;
+      }
+
+      pvr_csb_pack (&scissor_words[1], IPF_SCISSOR_WORD_1, word1) {
+         word1.scw1_ymax = y + height;
+         word1.scw1_ymin = y;
+      }
+
+      if (cmd_buffer->scissor_array.size &&
+          cmd_buffer->scissor_words[0] == scissor_words[0] &&
+          cmd_buffer->scissor_words[1] == scissor_words[1]) {
+         return;
+      }
+
+      cmd_buffer->scissor_words[0] = scissor_words[0];
+      cmd_buffer->scissor_words[1] = scissor_words[1];
+
+      /* Calculate region clip. */
+
+      left = x / region_clip_align_size;
+      top = y / region_clip_align_size;
+
+      /* We prevent right=-1 with the multiplication. */
+      /* TODO: Is there a better way of doing this? */
+      if ((x + width) != 0U)
+         right = DIV_ROUND_UP(x + width, region_clip_align_size) - 1;
+      else
+         right = 0;
+
+      if ((y + height) != 0U)
+         bottom = DIV_ROUND_UP(y + height, region_clip_align_size) - 1;
+      else
+         bottom = 0U;
+
+      /* Setup region clip to clip everything outside what was calculated. */
+
+      /* FIXME: Should we mask to prevent writing over other words? */
+      pvr_csb_pack (&ppp_state->region_clipping.word0, TA_REGION_CLIP0, word0) {
+         word0.right = right;
+         word0.left = left;
+         word0.mode = PVRX(TA_REGION_CLIP_MODE_OUTSIDE);
+      }
+
+      pvr_csb_pack (&ppp_state->region_clipping.word1, TA_REGION_CLIP1, word1) {
+         word1.bottom = bottom;
+         word1.top = top;
+      }
+
+      ppp_state->depthbias_scissor_indices.scissor_index =
+         util_dynarray_num_elements(&cmd_buffer->scissor_array,
+                                    __typeof__(cmd_buffer->scissor_words));
+
+      memcpy(util_dynarray_grow_bytes(&cmd_buffer->scissor_array,
+                                      1,
+                                      sizeof(cmd_buffer->scissor_words)),
+             cmd_buffer->scissor_words,
+             sizeof(cmd_buffer->scissor_words));
+
+      emit_state->isp_dbsc = true;
+      emit_state->region_clip = true;
+   }
+}
+
+static void
+pvr_setup_triangle_merging_flag(struct pvr_cmd_buffer *const cmd_buffer,
+                                struct pvr_cmd_struct(TA_STATE_ISPA) * ispa)
+{
+   struct pvr_emit_state *const emit_state = &cmd_buffer->state.emit_state;
+   struct pvr_ppp_state *const ppp_state = &cmd_buffer->state.ppp_state;
+   uint32_t merge_word;
+   uint32_t mask;
+
+   pvr_csb_pack (&merge_word, TA_STATE_PDS_SIZEINFO2, size_info) {
+      /* Disable for lines or punch-through or for DWD and depth compare
+       * always.
+       */
+      if (ispa->objtype == PVRX(TA_OBJTYPE_LINE) ||
+          ispa->passtype == PVRX(TA_PASSTYPE_PUNCH_THROUGH) ||
+          (ispa->dwritedisable && ispa->dcmpmode == PVRX(TA_CMPMODE_ALWAYS))) {
+         size_info.pds_tri_merge_disable = true;
+      }
+   }
+
+   pvr_csb_pack (&mask, TA_STATE_PDS_SIZEINFO2, size_info) {
+      size_info.pds_tri_merge_disable = true;
+   }
+
+   merge_word |= ppp_state->pds.size_info2 & ~mask;
+
+   if (merge_word != ppp_state->pds.size_info2) {
+      ppp_state->pds.size_info2 = merge_word;
+      emit_state->pds_fragment_stateptr0 = true;
+   }
+}
+
+/* TODO: See if this function can be improved once fully implemented. */
+static uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
+   const struct pvr_device_info *dev_info,
+   uint32_t fs_common_size,
+   uint32_t min_tiles_in_flight)
+{
+   uint32_t max_tiles_in_flight;
+   uint32_t num_allocs;
+
+   if (PVR_HAS_FEATURE(dev_info, s8xe)) {
+      num_allocs = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);
+   } else {
+      uint32_t num_phantoms = rogue_get_num_phantoms(dev_info);
+      uint32_t min_cluster_per_phantom;
+
+      if (num_phantoms > 1) {
+         pvr_finishme("Unimplemented path!!");
+      } else {
+         min_cluster_per_phantom =
+            PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);
+      }
+
+      if (num_phantoms > 1)
+         pvr_finishme("Unimplemented path!!");
+
+      if (num_phantoms > 2)
+         pvr_finishme("Unimplemented path!!");
+
+      if (num_phantoms > 3)
+         pvr_finishme("Unimplemented path!!");
+
+      if (min_cluster_per_phantom >= 4)
+         num_allocs = 1;
+      else if (min_cluster_per_phantom == 2)
+         num_allocs = 2;
+      else
+         num_allocs = 4;
+   }
+
+   max_tiles_in_flight =
+      PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
+
+   if (fs_common_size == UINT_MAX) {
+      uint32_t max_common_size;
+
+      num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);
+
+      if (!PVR_HAS_ERN(dev_info, 38748)) {
+         /* Hardware needs space for one extra shared allocation. */
+         num_allocs += 1;
+      }
+
+      max_common_size = rogue_get_reserved_shared_size(dev_info) -
+                        rogue_get_max_coeffs(dev_info);
+
+      /* Double resource requirements to deal with fragmentation. */
+      max_common_size /= num_allocs * 2;
+      max_common_size =
+         ROUND_DOWN_TO(max_common_size,
+                       PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
+
+      return max_common_size;
+   } else if (fs_common_size == 0) {
+      return max_tiles_in_flight;
+   }
+
+   pvr_finishme("Unimplemented path!!");
+
+   return 0;
+}
+
+static void
+pvr_setup_fragment_state_pointers(struct pvr_cmd_buffer *const cmd_buffer)
+{
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+   const struct pvr_stage_allocation_uniform_state *uniform_shader_state =
+      &state->gfx_pipeline->fragment_shader_state.uniform_state;
+   const struct pvr_pds_upload *pds_coeff_program =
+      &state->gfx_pipeline->fragment_shader_state.pds_coeff_program;
+   const struct pvr_pipeline_stage_state *fragment_state =
+      &state->gfx_pipeline->fragment_shader_state.stage_state;
+   struct pvr_device_info *const dev_info =
+      &cmd_buffer->device->pdevice->dev_info;
+   struct pvr_emit_state *const emit_state = &state->emit_state;
+   struct pvr_ppp_state *const ppp_state = &state->ppp_state;
+   struct pvr_sub_cmd *sub_cmd = state->current_sub_cmd;
+
+   const uint32_t pds_uniform_size =
+      DIV_ROUND_UP(uniform_shader_state->pds_info.data_size_in_dwords,
+                   PVRX(TA_STATE_PDS_SIZEINFO1_PDS_UNIFORMSIZE_UNIT_SIZE));
+
+   const uint32_t pds_varying_state_size =
+      DIV_ROUND_UP(pds_coeff_program->data_size,
+                   PVRX(TA_STATE_PDS_SIZEINFO1_PDS_VARYINGSIZE_UNIT_SIZE));
+
+   const uint32_t usc_varying_size =
+      DIV_ROUND_UP(fragment_state->coefficient_size,
+                   PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE));
+
+   const uint32_t pds_temp_size =
+      DIV_ROUND_UP(fragment_state->temps_count,
+                   PVRX(TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE));
+
+   const uint32_t usc_shared_size =
+      DIV_ROUND_UP(fragment_state->const_shared_reg_count,
+                   PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
+
+   const uint32_t max_tiles_in_flight =
+      pvr_calc_fscommon_size_and_tiles_in_flight(
+         dev_info,
+         usc_shared_size *
+            PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE),
+         1);
+   uint32_t size_info_mask;
+   uint32_t size_info2;
+
+   if (max_tiles_in_flight < sub_cmd->gfx.max_tiles_in_flight)
+      sub_cmd->gfx.max_tiles_in_flight = max_tiles_in_flight;
+
+   pvr_csb_pack (&ppp_state->pds.pixel_shader_base,
+                 TA_STATE_PDS_SHADERBASE,
+                 shader_base) {
+      const struct pvr_pds_upload *const pds_upload =
+         &state->gfx_pipeline->fragment_shader_state.pds_fragment_program;
+
+      shader_base.addr.addr = pds_upload->data_offset;
+   }
+
+   if (uniform_shader_state->pds_code.pvr_bo) {
+      pvr_csb_pack (&ppp_state->pds.texture_uniform_code_base,
+                    TA_STATE_PDS_TEXUNICODEBASE,
+                    tex_base) {
+         tex_base.addr.addr = uniform_shader_state->pds_code.code_offset;
+      }
+   } else {
+      ppp_state->pds.texture_uniform_code_base = 0U;
+   }
+
+   pvr_csb_pack (&ppp_state->pds.size_info1, TA_STATE_PDS_SIZEINFO1, info1) {
+      info1.pds_uniformsize = pds_uniform_size;
+      info1.pds_texturestatesize = 0U;
+      info1.pds_varyingsize = pds_varying_state_size;
+      info1.usc_varyingsize = usc_varying_size;
+      info1.pds_tempsize = pds_temp_size;
+   }
+
+   pvr_csb_pack (&size_info_mask, TA_STATE_PDS_SIZEINFO2, mask) {
+      mask.pds_tri_merge_disable = true;
+   }
+
+   ppp_state->pds.size_info2 &= size_info_mask;
+
+   pvr_csb_pack (&size_info2, TA_STATE_PDS_SIZEINFO2, info2) {
+      info2.usc_sharedsize = usc_shared_size;
+   }
+
+   ppp_state->pds.size_info2 |= size_info2;
+
+   if (pds_coeff_program->pvr_bo) {
+      state->emit_state.pds_fragment_stateptr1 = true;
+
+      pvr_csb_pack (&ppp_state->pds.varying_base,
+                    TA_STATE_PDS_VARYINGBASE,
+                    base) {
+         base.addr.addr = pds_coeff_program->data_offset;
+      }
+   } else {
+      ppp_state->pds.varying_base = 0U;
+   }
+
+   pvr_csb_pack (&ppp_state->pds.uniform_state_data_base,
+                 TA_STATE_PDS_UNIFORMDATABASE,
+                 base) {
+      base.addr.addr = state->pds_fragment_uniform_data_offset;
+   }
+
+   emit_state->pds_fragment_stateptr0 = true;
+   emit_state->pds_fragment_stateptr3 = true;
+}
+
+static void pvr_setup_viewport(struct pvr_cmd_buffer *const cmd_buffer)
+{
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+   struct pvr_emit_state *const emit_state = &state->emit_state;
+   struct pvr_ppp_state *const ppp_state = &state->ppp_state;
+
+   if (ppp_state->viewport_count != state->dynamic.common.viewport.count) {
+      ppp_state->viewport_count = state->dynamic.common.viewport.count;
+      emit_state->viewport = true;
+   }
+
+   if (state->gfx_pipeline->raster_state.discard_enable) {
+      /* We don't want to emit any viewport data as it'll just get thrown
+       * away. It's after the previous condition because we still want to
+       * stash the viewport_count as it's our trigger for when
+       * rasterizer discard gets disabled.
+       */
+      emit_state->viewport = false;
+      return;
+   }
+
+   for (uint32_t i = 0; i < ppp_state->viewport_count; i++) {
+      VkViewport *viewport = &state->dynamic.common.viewport.viewports[i];
+      uint32_t x_scale = fui(viewport->width * 0.5f);
+      uint32_t y_scale = fui(viewport->height * 0.5f);
+      uint32_t z_scale = fui(viewport->maxDepth - viewport->minDepth);
+      uint32_t x_center = fui(viewport->x + viewport->width * 0.5f);
+      uint32_t y_center = fui(viewport->y + viewport->height * 0.5f);
+      uint32_t z_center = fui(viewport->minDepth);
+
+      if (ppp_state->viewports[i].a0 != x_center ||
+          ppp_state->viewports[i].m0 != x_scale ||
+          ppp_state->viewports[i].a1 != y_center ||
+          ppp_state->viewports[i].m1 != y_scale ||
+          ppp_state->viewports[i].a2 != z_center ||
+          ppp_state->viewports[i].m2 != z_scale) {
+         ppp_state->viewports[i].a0 = x_center;
+         ppp_state->viewports[i].m0 = x_scale;
+         ppp_state->viewports[i].a1 = y_center;
+         ppp_state->viewports[i].m1 = y_scale;
+         ppp_state->viewports[i].a2 = z_center;
+         ppp_state->viewports[i].m2 = z_scale;
+
+         emit_state->viewport = true;
+      }
+   }
+}
+
+static void pvr_setup_ppp_control(struct pvr_cmd_buffer *const cmd_buffer)
+{
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+   const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
+   struct pvr_emit_state *const emit_state = &state->emit_state;
+   struct pvr_ppp_state *const ppp_state = &state->ppp_state;
+   uint32_t ppp_control;
+
+   pvr_csb_pack (&ppp_control, TA_STATE_PPP_CTRL, control) {
+      const struct pvr_raster_state *raster_state = &gfx_pipeline->raster_state;
+      VkPrimitiveTopology topology = gfx_pipeline->input_asm_state.topology;
+      control.drawclippededges = true;
+      control.wclampen = true;
+
+      if (topology == VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN)
+         control.flatshade_vtx = PVRX(TA_FLATSHADE_VTX_VERTEX_1);
+      else
+         control.flatshade_vtx = PVRX(TA_FLATSHADE_VTX_VERTEX_0);
+
+      if (raster_state->depth_clamp_enable)
+         control.clip_mode = PVRX(TA_CLIP_MODE_NO_FRONT_OR_REAR);
+      else
+         control.clip_mode = PVRX(TA_CLIP_MODE_FRONT_REAR);
+
+      /* +--- FrontIsCCW?
+       * | +--- Cull Front?
+       * v v
+       * 0|0 CULLMODE_CULL_CCW,
+       * 0|1 CULLMODE_CULL_CW,
+       * 1|0 CULLMODE_CULL_CW,
+       * 1|1 CULLMODE_CULL_CCW,
+       */
+      switch (raster_state->cull_mode) {
+      case VK_CULL_MODE_BACK_BIT:
+      case VK_CULL_MODE_FRONT_BIT:
+         if ((raster_state->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE) ^
+             (raster_state->cull_mode == VK_CULL_MODE_FRONT_BIT)) {
+            control.cullmode = PVRX(TA_CULLMODE_CULL_CW);
+         } else {
+            control.cullmode = PVRX(TA_CULLMODE_CULL_CCW);
+         }
+
+         break;
+
+      case VK_CULL_MODE_NONE:
+         control.cullmode = PVRX(TA_CULLMODE_NO_CULLING);
+         break;
+
+      default:
+         unreachable("Unsupported cull mode!");
+      }
+   }
+
+   if (ppp_control != ppp_state->ppp_control) {
+      ppp_state->ppp_control = ppp_control;
+      emit_state->ppp_control = true;
+   }
+}
+
+/* Largest valid PPP State update in words = 31
+ * 1 - Header
+ * 3 - Stream Out Config words 0, 1 and 2
+ * 1 - PPP Control word
+ * 3 - Varying Config words 0, 1 and 2
+ * 1 - Output Select
+ * 1 - WClamp
+ * 6 - Viewport Transform words
+ * 2 - Region Clip words
+ * 3 - PDS State for fragment phase (PDSSTATEPTR 1-3)
+ * 4 - PDS State for fragment phase (PDSSTATEPTR0)
+ * 6 - ISP Control Words
+ */
+#define PVR_MAX_PPP_STATE_DWORDS 31
+
+static VkResult pvr_emit_ppp_state(struct pvr_cmd_buffer *const cmd_buffer)
+{
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+   struct pvr_emit_state *const emit_state = &state->emit_state;
+   struct pvr_ppp_state *const ppp_state = &state->ppp_state;
+   struct pvr_csb *const control_stream =
+      &state->current_sub_cmd->gfx.control_stream;
+   uint32_t ppp_state_words[PVR_MAX_PPP_STATE_DWORDS];
+   uint32_t ppp_state_words_count;
+   uint32_t ppp_state_header;
+   bool deferred_secondary;
+   struct pvr_bo *pvr_bo;
+   uint32_t *buffer_ptr;
+   VkResult result;
+
+   buffer_ptr = ppp_state_words;
+
+   pvr_csb_pack (&ppp_state_header, TA_STATE_HEADER, header) {
+      header.view_port_count = (ppp_state->viewport_count == 0)
+                                  ? 0U
+                                  : (ppp_state->viewport_count - 1);
+
+      /* Skip over header. */
+      buffer_ptr++;
+
+      /* Set ISP state. */
+      if (emit_state->isp) {
+         header.pres_ispctl = true;
+         *buffer_ptr++ = ppp_state->isp.control;
+         header.pres_ispctl_fa = true;
+         *buffer_ptr++ = ppp_state->isp.front_a;
+
+         if (emit_state->isp_fb) {
+            header.pres_ispctl_fb = true;
+            *buffer_ptr++ = ppp_state->isp.front_b;
+         }
+
+         if (emit_state->isp_ba) {
+            header.pres_ispctl_ba = true;
+            *buffer_ptr++ = ppp_state->isp.back_a;
+         }
+
+         if (emit_state->isp_bb) {
+            header.pres_ispctl_bb = true;
+            *buffer_ptr++ = ppp_state->isp.back_b;
+         }
+      }
+
+      /* Depth bias / scissor
+       * If deferred_secondary is true then we do a separate state update
+       * which gets patched in ExecuteDeferredCommandBuffer.
+       */
+      /* TODO: Update above comment when we port ExecuteDeferredCommandBuffer.
+       */
+      deferred_secondary =
+         cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
+         cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT;
+
+      if (emit_state->isp_dbsc && !deferred_secondary) {
+         header.pres_ispctl_dbsc = true;
+
+         pvr_csb_pack (buffer_ptr++, TA_STATE_ISPDBSC, ispdbsc) {
+            ispdbsc.dbindex =
+               ppp_state->depthbias_scissor_indices.depthbias_index;
+            ispdbsc.scindex =
+               ppp_state->depthbias_scissor_indices.scissor_index;
+         }
+      }
+
+      /* PDS state. */
+      if (emit_state->pds_fragment_stateptr0) {
+         header.pres_pds_state_ptr0 = true;
+
+         *buffer_ptr++ = ppp_state->pds.pixel_shader_base;
+         *buffer_ptr++ = ppp_state->pds.texture_uniform_code_base;
+         *buffer_ptr++ = ppp_state->pds.size_info1;
+         *buffer_ptr++ = ppp_state->pds.size_info2;
+      }
+
+      if (emit_state->pds_fragment_stateptr1) {
+         header.pres_pds_state_ptr1 = true;
+         *buffer_ptr++ = ppp_state->pds.varying_base;
+      }
+
+      /* We don't use the pds_fragment_stateptr2 (texture state programs)
+       * control word, but this doesn't mean we need to set it to 0. This is
+       * because the hardware runs the texture state program only when the
+       * pds_texture state field of PDS_SIZEINFO1 is non-zero.
+       */
+
+      if (emit_state->pds_fragment_stateptr3) {
+         header.pres_pds_state_ptr3 = true;
+         *buffer_ptr++ = ppp_state->pds.uniform_state_data_base;
+      }
+
+      /* Region clip. */
+      if (emit_state->region_clip) {
+         header.pres_region_clip = true;
+         *buffer_ptr++ = ppp_state->region_clipping.word0;
+         *buffer_ptr++ = ppp_state->region_clipping.word1;
+      }
+
+      /* Viewport. */
+      if (emit_state->viewport) {
+         const uint32_t viewports = MAX2(1, ppp_state->viewport_count);
+
+         header.pres_viewport = true;
+         for (uint32_t i = 0; i < viewports; i++) {
+            *buffer_ptr++ = ppp_state->viewports[i].a0;
+            *buffer_ptr++ = ppp_state->viewports[i].m0;
+            *buffer_ptr++ = ppp_state->viewports[i].a1;
+            *buffer_ptr++ = ppp_state->viewports[i].m1;
+            *buffer_ptr++ = ppp_state->viewports[i].a2;
+            *buffer_ptr++ = ppp_state->viewports[i].m2;
+         }
+      }
+
+      /* W clamp. */
+      if (emit_state->wclamp) {
+         const float wclamp = 0.00001f;
+
+         header.pres_wclamp = true;
+         *buffer_ptr++ = fui(wclamp);
+      }
+
+      /* Output selects. */
+      if (emit_state->output_selects) {
+         header.pres_outselects = true;
+         *buffer_ptr++ = ppp_state->output_selects;
+      }
+
+      /* Varying words. */
+      if (emit_state->varying_word0) {
+         header.pres_varying_word0 = true;
+         *buffer_ptr++ = ppp_state->varying_word[0];
+      }
+
+      if (emit_state->varying_word1) {
+         header.pres_varying_word1 = true;
+         *buffer_ptr++ = ppp_state->varying_word[1];
+      }
+
+      if (emit_state->varying_word2) {
+         /* We only emit this on the first draw of a render job to prevent us
+          * from inheriting a non-zero value set elsewhere.
+          */
+         header.pres_varying_word2 = true;
+         *buffer_ptr++ = 0;
+      }
+
+      /* PPP control. */
+      if (emit_state->ppp_control) {
+         header.pres_ppp_ctrl = true;
+         *buffer_ptr++ = ppp_state->ppp_control;
+      }
+
+      if (emit_state->stream_out) {
+         /* We only emit this on the first draw of a render job to prevent us
+          * from inheriting a non-zero value set elsewhere.
+          */
+         header.pres_stream_out_size = true;
+         *buffer_ptr++ = 0;
+      }
+   }
+
+   if (!ppp_state_header)
+      return VK_SUCCESS;
+
+   ppp_state_words_count = buffer_ptr - ppp_state_words;
+   ppp_state_words[0] = ppp_state_header;
+
+   result = pvr_cmd_buffer_alloc_mem(cmd_buffer,
+                                     cmd_buffer->device->heaps.general_heap,
+                                     ppp_state_words_count * sizeof(uint32_t),
+                                     PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+                                     &pvr_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   memcpy(pvr_bo->bo->map,
+          ppp_state_words,
+          ppp_state_words_count * sizeof(uint32_t));
+
+   /* Write the VDM state update into the VDM control stream. */
+   pvr_csb_emit (control_stream, VDMCTRL_PPP_STATE0, state0) {
+      state0.word_count = ppp_state_words_count;
+      state0.addrmsb = pvr_bo->vma->dev_addr;
+   }
+
+   pvr_csb_emit (control_stream, VDMCTRL_PPP_STATE1, state1) {
+      state1.addrlsb = pvr_bo->vma->dev_addr;
+   }
+
+   if (emit_state->isp_dbsc &&
+       cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) {
+      pvr_finishme("Unimplemented path!!");
+   }
+
+   state->emit_state_bits = 0;
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+pvr_emit_dirty_ppp_state(struct pvr_cmd_buffer *const cmd_buffer)
+{
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+   const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
+   const bool dirty_stencil = state->dirty.compare_mask ||
+                              state->dirty.write_mask || state->dirty.reference;
+   VkResult result;
+
+   if (!(dirty_stencil || state->dirty.depth_bias ||
+         state->dirty.fragment_descriptors || state->dirty.line_width ||
+         state->dirty.gfx_pipeline_binding || state->dirty.scissor ||
+         state->dirty.userpass_spawn || state->dirty.viewport ||
+         state->emit_state_bits)) {
+      return VK_SUCCESS;
+   }
+
+   if (state->dirty.gfx_pipeline_binding) {
+      struct pvr_cmd_struct(TA_STATE_ISPA) ispa;
+
+      pvr_setup_output_select(cmd_buffer);
+      pvr_setup_isp_faces_and_control(cmd_buffer, &ispa);
+      pvr_setup_triangle_merging_flag(cmd_buffer, &ispa);
+   } else if (dirty_stencil || state->dirty.line_width ||
+              state->dirty.userpass_spawn) {
+      pvr_setup_isp_faces_and_control(cmd_buffer, NULL);
+   }
+
+   if (!gfx_pipeline->raster_state.discard_enable &&
+       state->dirty.fragment_descriptors &&
+       gfx_pipeline->fragment_shader_state.bo) {
+      pvr_setup_fragment_state_pointers(cmd_buffer);
+   }
+
+   pvr_setup_isp_depth_bias_scissor_state(cmd_buffer);
+
+   if (state->dirty.viewport)
+      pvr_setup_viewport(cmd_buffer);
+
+   pvr_setup_ppp_control(cmd_buffer);
+
+   if (gfx_pipeline->raster_state.cull_mode == VK_CULL_MODE_FRONT_AND_BACK) {
+      /* FIXME: Port SetNegativeViewport(). */
+   }
+
+   result = pvr_emit_ppp_state(cmd_buffer);
+   if (result != VK_SUCCESS)
+      return result;
+
+   return VK_SUCCESS;
+}
+
+static void
+pvr_validate_push_descriptors(struct pvr_cmd_buffer *cmd_buffer,
+                              bool *const push_descriptors_dirty_out)
+{
+   /* TODO: Implement this function, based on ValidatePushDescriptors. */
+   pvr_finishme("Add support for push descriptors!");
+   *push_descriptors_dirty_out = false;
+}
+
+static void
+pvr_calculate_vertex_cam_size(const struct pvr_device_info *dev_info,
+                              const uint32_t vs_output_size,
+                              const bool raster_enable,
+                              uint32_t *const cam_size_out,
+                              uint32_t *const vs_max_instances_out)
+{
+   /* First work out the size of a vertex in the UVS and multiply by 4 for
+    * column ordering.
+    */
+   const uint32_t uvs_vertex_vector_size_in_dwords =
+      (vs_output_size + 1U + raster_enable * 4U) * 4U;
+   const uint32_t vdm_cam_size =
+      PVR_GET_FEATURE_VALUE(dev_info, vdm_cam_size, 32U);
+
+   /* This is a proxy for 8XE. */
+   if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
+       vdm_cam_size < 96U) {
+      /* Comparisons are based on size including scratch per vertex vector. */
+      if (uvs_vertex_vector_size_in_dwords < (14U * 4U)) {
+         *cam_size_out = MIN2(31U, vdm_cam_size - 1U);
+         *vs_max_instances_out = 16U;
+      } else if (uvs_vertex_vector_size_in_dwords < (20U * 4U)) {
+         *cam_size_out = 15U;
+         *vs_max_instances_out = 16U;
+      } else if (uvs_vertex_vector_size_in_dwords < (28U * 4U)) {
+         *cam_size_out = 11U;
+         *vs_max_instances_out = 12U;
+      } else if (uvs_vertex_vector_size_in_dwords < (44U * 4U)) {
+         *cam_size_out = 7U;
+         *vs_max_instances_out = 8U;
+      } else if (PVR_HAS_FEATURE(dev_info,
+                                 simple_internal_parameter_format_v2) ||
+                 uvs_vertex_vector_size_in_dwords < (64U * 4U)) {
+         *cam_size_out = 7U;
+         *vs_max_instances_out = 4U;
+      } else {
+         *cam_size_out = 3U;
+         *vs_max_instances_out = 2U;
+      }
+   } else {
+      /* Comparisons are based on size including scratch per vertex vector. */
+      if (uvs_vertex_vector_size_in_dwords <= (32U * 4U)) {
+         /* output size <= 27 + 5 scratch. */
+         *cam_size_out = MIN2(95U, vdm_cam_size - 1U);
+         *vs_max_instances_out = 0U;
+      } else if (uvs_vertex_vector_size_in_dwords <= 48U * 4U) {
+         /* output size <= 43 + 5 scratch */
+         *cam_size_out = 63U;
+         if (PVR_GET_FEATURE_VALUE(dev_info, uvs_vtx_entries, 144U) < 288U)
+            *vs_max_instances_out = 16U;
+         else
+            *vs_max_instances_out = 0U;
+      } else if (uvs_vertex_vector_size_in_dwords <= 64U * 4U) {
+         /* output size <= 59 + 5 scratch. */
+         *cam_size_out = 31U;
+         if (PVR_GET_FEATURE_VALUE(dev_info, uvs_vtx_entries, 144U) < 288U)
+            *vs_max_instances_out = 16U;
+         else
+            *vs_max_instances_out = 0U;
+      } else {
+         *cam_size_out = 15U;
+         *vs_max_instances_out = 16U;
+      }
+   }
+}
+
+static void
+pvr_emit_dirty_vdm_state(const struct pvr_cmd_buffer *const cmd_buffer)
+{
+   /* FIXME: Assume all state is dirty for the moment. */
+   struct pvr_device_info *const dev_info =
+      &cmd_buffer->device->pdevice->dev_info;
+   ASSERTED const uint32_t max_user_vertex_output_components =
+      pvr_get_max_user_vertex_output_components(dev_info);
+   struct pvr_cmd_struct(VDMCTRL_VDM_STATE0)
+      header = { pvr_cmd_header(VDMCTRL_VDM_STATE0) };
+   const struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+   const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
+   struct pvr_csb *const csb = &state->current_sub_cmd->gfx.control_stream;
+   uint32_t vs_output_size;
+   uint32_t max_instances;
+   uint32_t cam_size;
+
+   assert(gfx_pipeline);
+
+   /* CAM Calculations and HW state take vertex size aligned to DWORDS. */
+   vs_output_size =
+      DIV_ROUND_UP(gfx_pipeline->vertex_shader_state.vertex_output_size,
+                   PVRX(VDMCTRL_VDM_STATE4_VS_OUTPUT_SIZE_UNIT_SIZE));
+
+   assert(vs_output_size <= max_user_vertex_output_components);
+
+   pvr_calculate_vertex_cam_size(dev_info,
+                                 vs_output_size,
+                                 true,
+                                 &cam_size,
+                                 &max_instances);
+
+   pvr_csb_emit (csb, VDMCTRL_VDM_STATE0, state0) {
+      state0.cam_size = cam_size;
+
+      if (gfx_pipeline->input_asm_state.primitive_restart) {
+         state0.cut_index_enable = true;
+         state0.cut_index_present = true;
+      }
+
+      switch (gfx_pipeline->input_asm_state.topology) {
+      case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+         state0.flatshade_control = PVRX(VDMCTRL_FLATSHADE_CONTROL_VERTEX_1);
+         break;
+
+      default:
+         state0.flatshade_control = PVRX(VDMCTRL_FLATSHADE_CONTROL_VERTEX_0);
+         break;
+      }
+
+      /* If we've bound a different vertex buffer, or this draw-call requires
+       * a different PDS attrib data-section from the last draw call (changed
+       * base_instance) then we need to specify a new data section. This is
+       * also the case if we've switched pipeline or attrib program as the
+       * data-section layout will be different.
+       */
+      state0.vs_data_addr_present =
+         state->dirty.gfx_pipeline_binding || state->dirty.vertex_bindings ||
+         state->dirty.draw_base_instance || state->dirty.draw_variant;
+
+      /* Need to specify new PDS Attrib program if we've bound a different
+       * pipeline or we needed a different PDS Attrib variant for this
+       * draw-call.
+       */
+      state0.vs_other_present = state->dirty.gfx_pipeline_binding ||
+                                state->dirty.draw_variant;
+
+      /* UVB_SCRATCH_SELECT_ONE with no rasterization is only valid when
+       * stream output is enabled. We use UVB_SCRATCH_SELECT_FIVE because
+       * Vulkan doesn't support stream output and the vertex position is
+       * always emitted to the UVB.
+       */
+      state0.uvs_scratch_size_select =
+         PVRX(VDMCTRL_UVS_SCRATCH_SIZE_SELECT_FIVE);
+
+      header = state0;
+   }
+
+   if (header.cut_index_present) {
+      pvr_csb_emit (csb, VDMCTRL_VDM_STATE1, state1) {
+         switch (state->index_buffer_binding.type) {
+         case VK_INDEX_TYPE_UINT32:
+            /* FIXME: Defines for these? These seem to come from the Vulkan
+             * spec. for VkPipelineInputAssemblyStateCreateInfo
+             * primitiveRestartEnable.
+             */
+            state1.cut_index = 0xFFFFFFFF;
+            break;
+
+         case VK_INDEX_TYPE_UINT16:
+            state1.cut_index = 0xFFFF;
+            break;
+
+         default:
+            unreachable(!"Invalid index type");
+         }
+      }
+   }
+
+   if (header.vs_data_addr_present) {
+      pvr_csb_emit (csb, VDMCTRL_VDM_STATE2, state2) {
+         state2.vs_pds_data_base_addr.addr = state->pds_vertex_attrib_offset;
+      }
+   }
+
+   if (header.vs_other_present) {
+      const uint32_t usc_unified_store_size_in_bytes =
+         gfx_pipeline->vertex_shader_state.vertex_input_size << 2;
+
+      pvr_csb_emit (csb, VDMCTRL_VDM_STATE3, state3) {
+         state3.vs_pds_code_base_addr.addr = state->pds_shader.code_offset;
+      }
+
+      pvr_csb_emit (csb, VDMCTRL_VDM_STATE4, state4) {
+         state4.vs_output_size = vs_output_size;
+      }
+
+      pvr_csb_emit (csb, VDMCTRL_VDM_STATE5, state5) {
+         state5.vs_max_instances = max_instances;
+         state5.vs_usc_common_size = 0U;
+         state5.vs_usc_unified_size = DIV_ROUND_UP(
+            usc_unified_store_size_in_bytes,
+            PVRX(VDMCTRL_VDM_STATE5_VS_USC_UNIFIED_SIZE_UNIT_SIZE));
+         state5.vs_pds_temp_size =
+            DIV_ROUND_UP(state->pds_shader.info->temps_required << 2,
+                         PVRX(VDMCTRL_VDM_STATE5_VS_PDS_TEMP_SIZE_UNIT_SIZE));
+         state5.vs_pds_data_size =
+            DIV_ROUND_UP(state->pds_shader.info->data_size_in_dwords << 2,
+                         PVRX(VDMCTRL_VDM_STATE5_VS_PDS_DATA_SIZE_UNIT_SIZE));
+      }
+   }
+}
+
+static VkResult pvr_validate_draw_state(struct pvr_cmd_buffer *cmd_buffer)
+{
+   struct pvr_cmd_buffer_state *const state = &cmd_buffer->state;
+   const struct pvr_graphics_pipeline *const gfx_pipeline = state->gfx_pipeline;
+   const struct pvr_pipeline_stage_state *const fragment_state =
+      &gfx_pipeline->fragment_shader_state.stage_state;
+   struct pvr_sub_cmd *sub_cmd;
+   bool fstencil_writemask_zero;
+   bool bstencil_writemask_zero;
+   bool push_descriptors_dirty;
+   bool fstencil_keep;
+   bool bstencil_keep;
+   VkResult result;
+
+   pvr_cmd_buffer_start_sub_cmd(cmd_buffer, PVR_SUB_CMD_TYPE_GRAPHICS);
+
+   sub_cmd = state->current_sub_cmd;
+   sub_cmd->gfx.empty_cmd = false;
+
+   /* Determine pipeline depth/stencil usage. If a pipeline uses depth or
+    * stencil testing, those attachments are using their loaded values, and
+    * the loadOps cannot be optimized out.
+    */
+   /* Pipeline uses depth testing. */
+   if (sub_cmd->gfx.depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED &&
+       gfx_pipeline->depth_compare_op != VK_COMPARE_OP_ALWAYS) {
+      sub_cmd->gfx.depth_usage = PVR_DEPTH_STENCIL_USAGE_NEEDED;
+   }
+
+   /* Pipeline uses stencil testing. */
+   if (sub_cmd->gfx.stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED &&
+       (gfx_pipeline->stencil_front.compare_op != VK_COMPARE_OP_ALWAYS ||
+        gfx_pipeline->stencil_back.compare_op != VK_COMPARE_OP_ALWAYS)) {
+      sub_cmd->gfx.stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEEDED;
+   }
+
+   if (PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info,
+                       compute_overlap)) {
+      uint32_t coefficient_size =
+         DIV_ROUND_UP(fragment_state->coefficient_size,
+                      PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_UNIT_SIZE));
+
+      if (coefficient_size >
+          PVRX(TA_STATE_PDS_SIZEINFO1_USC_VARYINGSIZE_MAX_SIZE))
+         sub_cmd->gfx.disable_compute_overlap = true;
+   }
+
+   sub_cmd->gfx.frag_uses_atomic_ops |= fragment_state->uses_atomic_ops;
+   sub_cmd->gfx.frag_has_side_effects |= fragment_state->has_side_effects;
+   sub_cmd->gfx.frag_uses_texture_rw |= fragment_state->uses_texture_rw;
+   sub_cmd->gfx.vertex_uses_texture_rw |=
+      gfx_pipeline->vertex_shader_state.stage_state.uses_texture_rw;
+
+   fstencil_keep =
+      (gfx_pipeline->stencil_front.fail_op == VK_STENCIL_OP_KEEP) &&
+      (gfx_pipeline->stencil_front.pass_op == VK_STENCIL_OP_KEEP);
+   bstencil_keep = (gfx_pipeline->stencil_back.fail_op == VK_STENCIL_OP_KEEP) &&
+                   (gfx_pipeline->stencil_back.pass_op == VK_STENCIL_OP_KEEP);
+   fstencil_writemask_zero = (state->dynamic.common.write_mask.front == 0);
+   bstencil_writemask_zero = (state->dynamic.common.write_mask.back == 0);
+
+   /* Set stencil modified flag if:
+    * - Neither front nor back-facing stencil has a fail_op/pass_op of KEEP.
+    * - Neither front nor back-facing stencil has a write_mask of zero.
+    */
+   if (!(fstencil_keep && bstencil_keep) &&
+       !(fstencil_writemask_zero && bstencil_writemask_zero)) {
+      sub_cmd->gfx.modifies_stencil = true;
+   }
+
+   /* Set depth modified flag if depth write is enabled. */
+   if (!gfx_pipeline->depth_write_disable)
+      sub_cmd->gfx.modifies_depth = true;
+
+   /* If either the data or code changes for pds vertex attribs, regenerate the
+    * data segment.
+    */
+   if (state->dirty.vertex_bindings || state->dirty.gfx_pipeline_binding ||
+       state->dirty.draw_variant || state->dirty.draw_base_instance) {
+      enum pvr_pds_vertex_attrib_program_type prog_type;
+      const struct pvr_pds_attrib_program *program;
+
+      if (state->draw_state.draw_indirect)
+         prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT;
+      else if (state->draw_state.base_instance)
+         prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE;
+      else
+         prog_type = PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC;
+
+      program =
+         &gfx_pipeline->vertex_shader_state.pds_attrib_programs[prog_type];
+      state->pds_shader.info = &program->info;
+      state->pds_shader.code_offset = program->program.code_offset;
+
+      state->max_shared_regs =
+         MAX2(state->max_shared_regs, pvr_calc_shared_regs_count(gfx_pipeline));
+
+      pvr_setup_vertex_buffers(cmd_buffer, gfx_pipeline);
+   }
+
+   /* TODO: Check for dirty push constants */
+
+   pvr_validate_push_descriptors(cmd_buffer, &push_descriptors_dirty);
+
+   state->dirty.vertex_descriptors = push_descriptors_dirty ||
+                                     state->dirty.gfx_pipeline_binding;
+   state->dirty.fragment_descriptors = state->dirty.vertex_descriptors;
+
+   if (state->dirty.fragment_descriptors) {
+      result = pvr_setup_descriptor_mappings(
+         cmd_buffer,
+         PVR_STAGE_ALLOCATION_FRAGMENT,
+         &state->gfx_pipeline->fragment_shader_state.uniform_state,
+         &state->pds_fragment_uniform_data_offset);
+      if (result != VK_SUCCESS) {
+         mesa_loge("Could not setup fragment descriptor mappings.");
+         return result;
+      }
+   }
+
+   if (state->dirty.vertex_descriptors) {
+      uint32_t pds_vertex_uniform_data_offset;
+
+      result = pvr_setup_descriptor_mappings(
+         cmd_buffer,
+         PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY,
+         &state->gfx_pipeline->vertex_shader_state.uniform_state,
+         &pds_vertex_uniform_data_offset);
+      if (result != VK_SUCCESS) {
+         mesa_loge("Could not setup vertex descriptor mappings.");
+         return result;
+      }
+
+      pvr_emit_dirty_pds_state(cmd_buffer, pds_vertex_uniform_data_offset);
+   }
+
+   pvr_emit_dirty_ppp_state(cmd_buffer);
+   pvr_emit_dirty_vdm_state(cmd_buffer);
+
+   state->dirty.gfx_desc_dirty = false;
+   state->dirty.blend_constants = false;
+   state->dirty.compare_mask = false;
+   state->dirty.depth_bias = false;
+   state->dirty.draw_base_instance = false;
+   state->dirty.draw_variant = false;
+   state->dirty.fragment_descriptors = false;
+   state->dirty.line_width = false;
+   state->dirty.gfx_pipeline_binding = false;
+   state->dirty.reference = false;
+   state->dirty.scissor = false;
+   state->dirty.userpass_spawn = false;
+   state->dirty.vertex_bindings = false;
+   state->dirty.viewport = false;
+   state->dirty.write_mask = false;
+
+   return VK_SUCCESS;
+}
+
+static uint32_t pvr_get_hw_primitive_topology(VkPrimitiveTopology topology)
+{
+   switch (topology) {
+   case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
+      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_POINT_LIST);
+   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
+      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_LIST);
+   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
+      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_STRIP);
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
+      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_LIST);
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
+      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_STRIP);
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
+      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_FAN);
+   case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
+      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ);
+   case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
+      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ);
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
+      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_LIST_ADJ);
+   case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
+      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_TRI_STRIP_ADJ);
+   case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
+      return PVRX(VDMCTRL_PRIMITIVE_TOPOLOGY_PATCH_LIST);
+   default:
+      unreachable("Undefined primitive topology");
+   }
+}
+
+static void pvr_emit_vdm_index_list(struct pvr_cmd_buffer *cmd_buffer,
+                                    VkPrimitiveTopology topology,
+                                    uint32_t first_vertex,
+                                    uint32_t vertex_count,
+                                    uint32_t first_index,
+                                    uint32_t index_count,
+                                    uint32_t instance_count)
+{
+   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+   struct pvr_csb *const csb = &state->current_sub_cmd->gfx.control_stream;
+   struct pvr_cmd_struct(VDMCTRL_INDEX_LIST0)
+      list_hdr = { pvr_cmd_header(VDMCTRL_INDEX_LIST0) };
+   pvr_dev_addr_t index_buffer_addr = { 0 };
+   unsigned int index_stride = 0;
+
+   pvr_csb_emit (csb, VDMCTRL_INDEX_LIST0, list0) {
+      list0.primitive_topology = pvr_get_hw_primitive_topology(topology);
+
+      /* First instance is not handled in the VDM state, it's implemented as
+       * an addition in the PDS vertex fetch.
+       */
+      list0.index_count_present = true;
+
+      if (instance_count > 1)
+         list0.index_instance_count_present = true;
+
+      if (first_vertex != 0)
+         list0.index_offset_present = true;
+
+      if (state->draw_state.draw_indexed) {
+         struct pvr_buffer *buffer = state->index_buffer_binding.buffer;
+
+         switch (state->index_buffer_binding.type) {
+         default:
+            unreachable("Invalid index type");
+            FALLTHROUGH;
+
+         case VK_INDEX_TYPE_UINT32:
+            list0.index_size = PVRX(VDMCTRL_INDEX_SIZE_B32);
+            index_stride = 4;
+            break;
+
+         case VK_INDEX_TYPE_UINT16:
+            list0.index_size = PVRX(VDMCTRL_INDEX_SIZE_B16);
+            index_stride = 2;
+            break;
+         }
+
+         list0.index_addr_present = true;
+         index_buffer_addr.addr = buffer->dev_addr.addr;
+         index_buffer_addr.addr += state->index_buffer_binding.offset;
+         index_buffer_addr.addr += first_index * index_stride;
+         list0.index_base_addrmsb = index_buffer_addr;
+      }
+
+      list_hdr = list0;
+   }
+
+   if (list_hdr.index_addr_present) {
+      pvr_csb_emit (csb, VDMCTRL_INDEX_LIST1, list1) {
+         list1.index_base_addrlsb = index_buffer_addr;
+      }
+   }
+
+   if (list_hdr.index_count_present) {
+      pvr_csb_emit (csb, VDMCTRL_INDEX_LIST2, list2) {
+         list2.index_count = vertex_count | index_count;
+      }
+   }
+
+   if (list_hdr.index_instance_count_present) {
+      pvr_csb_emit (csb, VDMCTRL_INDEX_LIST3, list3) {
+         list3.instance_count = instance_count - 1;
+      }
+   }
+
+   if (list_hdr.index_offset_present) {
+      pvr_csb_emit (csb, VDMCTRL_INDEX_LIST4, list4) {
+         list4.index_offset = first_vertex;
+      }
+   }
+
+   /* TODO: See if we need list_words[5-9]. */
+}
+
+void pvr_CmdDrawIndexed(VkCommandBuffer commandBuffer,
+                        uint32_t indexCount,
+                        uint32_t instanceCount,
+                        uint32_t firstIndex,
+                        int32_t vertexOffset,
+                        uint32_t firstInstance)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+   struct pvr_cmd_buffer_draw_state draw_state;
+   VkResult result;
+
+   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+   draw_state.base_vertex = vertexOffset;
+   draw_state.base_instance = firstInstance;
+   draw_state.draw_indirect = false;
+   draw_state.draw_indexed = true;
+   pvr_update_draw_state(&cmd_buffer->state, &draw_state);
+
+   result = pvr_validate_draw_state(cmd_buffer);
+   if (result != VK_SUCCESS)
+      return;
+
+   /* Write the VDM control stream for the primitive. */
+   pvr_emit_vdm_index_list(cmd_buffer,
+                           state->gfx_pipeline->input_asm_state.topology,
+                           vertexOffset,
+                           0,
+                           firstIndex,
+                           indexCount,
+                           instanceCount);
+}
+
+void pvr_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer,
+                                VkBuffer _buffer,
+                                VkDeviceSize offset,
+                                uint32_t drawCount,
+                                uint32_t stride)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdDrawIndirect(VkCommandBuffer commandBuffer,
+                         VkBuffer _buffer,
+                         VkDeviceSize offset,
+                         uint32_t drawCount,
+                         uint32_t stride)
+{
+   assert(!"Unimplemented");
+}
+
+static VkResult
+pvr_resolve_unemitted_resolve_attachments(struct pvr_cmd_buffer *cmd_buffer)
+{
+   pvr_finishme("Add attachment resolve support!");
+   return pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
+}
+
+void pvr_CmdEndRenderPass2(VkCommandBuffer commandBuffer,
+                           const VkSubpassEndInfoKHR *pSubpassEndInfo)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+   struct pvr_image_view **attachments;
+   VkClearValue *clear_values;
+   VkResult result;
+
+   PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
+
+   assert(state->render_pass_info.pass);
+   assert(state->render_pass_info.framebuffer);
+
+   /* TODO: Investigate why pvr_cmd_buffer_end_sub_cmd/EndSubCommand is called
+    * twice in this path, one here and one from
+    * pvr_resolve_unemitted_resolve_attachments.
+    */
+   result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
+   if (result != VK_SUCCESS)
+      return;
+
+   result = pvr_resolve_unemitted_resolve_attachments(cmd_buffer);
+   if (result != VK_SUCCESS)
+      return;
+
+   /* Save the required fields before clearing render_pass_info struct. */
+   attachments = state->render_pass_info.attachments;
+   clear_values = state->render_pass_info.clear_values;
+
+   memset(&state->render_pass_info, 0, sizeof(state->render_pass_info));
+
+   state->render_pass_info.attachments = attachments;
+   state->render_pass_info.clear_values = clear_values;
+}
+
+void pvr_CmdExecuteCommands(VkCommandBuffer commandBuffer,
+                            uint32_t commandBufferCount,
+                            const VkCommandBuffer *pCommandBuffers)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdNextSubpass2(VkCommandBuffer commandBuffer,
+                         const VkSubpassBeginInfo *pSubpassBeginInfo,
+                         const VkSubpassEndInfo *pSubpassEndInfo)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdPipelineBarrier2KHR(VkCommandBuffer commandBuffer,
+                                const VkDependencyInfoKHR *pDependencyInfo)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdResetEvent2KHR(VkCommandBuffer commandBuffer,
+                           VkEvent _event,
+                           VkPipelineStageFlags2KHR stageMask)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdSetEvent2KHR(VkCommandBuffer commandBuffer,
+                         VkEvent _event,
+                         const VkDependencyInfoKHR *pDependencyInfo)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdWaitEvents2KHR(VkCommandBuffer commandBuffer,
+                           uint32_t eventCount,
+                           const VkEvent *pEvents,
+                           const VkDependencyInfoKHR *pDependencyInfos)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdWriteTimestamp2KHR(VkCommandBuffer commandBuffer,
+                               VkPipelineStageFlags2KHR stage,
+                               VkQueryPool queryPool,
+                               uint32_t query)
+{
+   unreachable("Timestamp queries are not supported.");
+}
+
+VkResult pvr_EndCommandBuffer(VkCommandBuffer commandBuffer)
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
+   VkResult result;
+
+   /* From the Vulkan 1.0 spec:
+    *
+    * CommandBuffer must be in the recording state.
+    */
+   assert(cmd_buffer->status == PVR_CMD_BUFFER_STATUS_RECORDING);
+
+   if (state->status != VK_SUCCESS)
+      return state->status;
+
+   result = pvr_cmd_buffer_end_sub_cmd(cmd_buffer);
+   if (result != VK_SUCCESS)
+      return result;
+
+   cmd_buffer->status = PVR_CMD_BUFFER_STATUS_EXECUTABLE;
+
+   return VK_SUCCESS;
+}
diff --git a/src/imagination/vulkan/pvr_csb.c b/src/imagination/vulkan/pvr_csb.c
new file mode 100644 (file)
index 0000000..64c734d
--- /dev/null
@@ -0,0 +1,281 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * based in part on v3dv_cl.c which is:
+ * Copyright © 2019 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_bo.h"
+#include "pvr_csb.h"
+#include "pvr_device_info.h"
+#include "pvr_private.h"
+#include "vk_log.h"
+
+/**
+ * \file pvr_csb.c
+ *
+ * \brief Contains functions to manage Control Stream Builder (csb) object.
+ *
+ * A csb object can be used to create a primary/main control stream, referred
+ * as control stream hereafter, or a secondary control stream, also referred as
+ * a sub control stream. The main difference between these is that, the control
+ * stream is the one directly submitted to the GPU and is terminated using
+ * STREAM_TERMINATE. Whereas, the secondary control stream can be thought of as
+ * an independent set of commands that can be referenced by a primary control
+ * stream to avoid duplication and is instead terminated using STREAM_RETURN,
+ * which means the control stream parser should return to the main stream it
+ * came from.
+ *
+ * Note: Sub control stream is only supported for PVR_CMD_STREAM_TYPE_GRAPHICS
+ * type control streams.
+ */
+
+/**
+ * \brief Size of the individual csb buffer object.
+ */
+#define PVR_CMD_BUFFER_CSB_BO_SIZE 4096
+
+/**
+ * \brief Initializes the csb object.
+ *
+ * \param[in] device Logical device pointer.
+ * \param[in] csb    Control Stream Builder object to initialize.
+ *
+ * \sa #pvr_csb_finish()
+ */
+void pvr_csb_init(struct pvr_device *device,
+                  enum pvr_cmd_stream_type stream_type,
+                  struct pvr_csb *csb)
+{
+   csb->start = NULL;
+   csb->next = NULL;
+   csb->pvr_bo = NULL;
+   csb->end = NULL;
+   csb->device = device;
+   csb->stream_type = stream_type;
+   csb->status = VK_SUCCESS;
+   list_inithead(&csb->pvr_bo_list);
+}
+
+/**
+ * \brief Frees the resources associated with the csb object.
+ *
+ * \param[in] csb Control Stream Builder object to free.
+ *
+ * \sa #pvr_csb_init()
+ */
+void pvr_csb_finish(struct pvr_csb *csb)
+{
+   list_for_each_entry_safe (struct pvr_bo, pvr_bo, &csb->pvr_bo_list, link) {
+      list_del(&pvr_bo->link);
+      pvr_bo_free(csb->device, pvr_bo);
+   }
+
+   /* Leave the csb in a reset state to catch use after destroy instances */
+   pvr_csb_init(NULL, PVR_CMD_STREAM_TYPE_INVALID, csb);
+}
+
+/**
+ * \brief Helper function to extend csb memory.
+ *
+ * Allocates a new buffer object and links it with the previous buffer object
+ * using STREAM_LINK dwords and updates csb object to use the new buffer.
+ *
+ * To make sure that we have enough space to emit STREAM_LINK dwords in the
+ * current buffer, a few bytes are reserved at the end, every time a buffer is
+ * created. Every time we allocate a new buffer we fix the current buffer in use
+ * to emit the stream link dwords. This makes sure that when
+ * #pvr_csb_alloc_dwords() is called from #pvr_csb_emit() to add STREAM_LINK0
+ * and STREAM_LINK1, it succeeds without trying to allocate new pages.
+ *
+ * \param[in] csb Control Stream Builder object to extend.
+ * \return true on success and false otherwise.
+ */
+static bool pvr_csb_buffer_extend(struct pvr_csb *csb)
+{
+   const uint8_t stream_link_space = (pvr_cmd_length(VDMCTRL_STREAM_LINK0) +
+                                      pvr_cmd_length(VDMCTRL_STREAM_LINK1)) *
+                                     4;
+   const uint32_t cache_line_size =
+      rogue_get_slc_cache_line_size(&csb->device->pdevice->dev_info);
+   struct pvr_bo *pvr_bo;
+   VkResult result;
+
+   /* Make sure extra space allocated for stream links is sufficient for both
+    * stream types.
+    */
+   STATIC_ASSERT((pvr_cmd_length(VDMCTRL_STREAM_LINK0) +
+                  pvr_cmd_length(VDMCTRL_STREAM_LINK1)) ==
+                 (pvr_cmd_length(CDMCTRL_STREAM_LINK0) +
+                  pvr_cmd_length(CDMCTRL_STREAM_LINK1)));
+
+   result = pvr_bo_alloc(csb->device,
+                         csb->device->heaps.general_heap,
+                         PVR_CMD_BUFFER_CSB_BO_SIZE,
+                         cache_line_size,
+                         PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+                         &pvr_bo);
+   if (result != VK_SUCCESS) {
+      vk_error(csb->device, result);
+      csb->status = result;
+      return false;
+   }
+
+   /* Chain to the old BO if this is not the first BO in csb */
+   if (csb->pvr_bo) {
+      csb->end += stream_link_space;
+      assert(csb->next + stream_link_space <= csb->end);
+
+      switch (csb->stream_type) {
+      case PVR_CMD_STREAM_TYPE_GRAPHICS:
+         pvr_csb_emit (csb, VDMCTRL_STREAM_LINK0, link) {
+            link.link_addrmsb = pvr_bo->vma->dev_addr;
+         }
+
+         pvr_csb_emit (csb, VDMCTRL_STREAM_LINK1, link) {
+            link.link_addrlsb = pvr_bo->vma->dev_addr;
+         }
+
+         break;
+
+      case PVR_CMD_STREAM_TYPE_COMPUTE:
+         pvr_csb_emit (csb, CDMCTRL_STREAM_LINK0, link) {
+            link.link_addrmsb = pvr_bo->vma->dev_addr;
+         }
+
+         pvr_csb_emit (csb, CDMCTRL_STREAM_LINK1, link) {
+            link.link_addrlsb = pvr_bo->vma->dev_addr;
+         }
+
+         break;
+
+      default:
+         unreachable("Unknown stream type");
+         break;
+      }
+   }
+
+   csb->pvr_bo = pvr_bo;
+   csb->start = pvr_bo->bo->map;
+
+   /* Reserve stream link size at the end to make sure we don't run out of
+    * space when a stream link is required.
+    */
+   csb->end = csb->start + pvr_bo->bo->size - stream_link_space;
+   csb->next = csb->start;
+
+   list_addtail(&pvr_bo->link, &csb->pvr_bo_list);
+
+   return true;
+}
+
+/**
+ * \brief Provides a chunk of memory from the current csb buffer. In cases where
+ * the buffer is not able to fulfill the required amount of memory,
+ * #pvr_csb_buffer_extend() is called to allocate a new buffer. Maximum size
+ * allocable in bytes is #PVR_CMD_BUFFER_CSB_BO_SIZE - size of STREAM_LINK0
+ * and STREAM_LINK1 dwords.
+ *
+ * \param[in] csb        Control Stream Builder object to allocate from.
+ * \param[in] num_dwords Number of dwords to allocate.
+ * \return Valid host virtual address or NULL otherwise.
+ */
+void *pvr_csb_alloc_dwords(struct pvr_csb *csb, uint32_t num_dwords)
+{
+   const uint32_t required_space = num_dwords * 4;
+
+   if (csb->status != VK_SUCCESS)
+      return NULL;
+
+   if (csb->next + required_space > csb->end) {
+      bool ret = pvr_csb_buffer_extend(csb);
+      if (!ret)
+         return NULL;
+   }
+
+   void *p = csb->next;
+
+   csb->next += required_space;
+   assert(csb->next <= csb->end);
+
+   return p;
+}
+
+/**
+ * \brief Adds VDMCTRL_STREAM_RETURN dword into the control stream pointed by
+ * csb object. Given a VDMCTRL_STREAM_RETURN marks the end of the sub control
+ * stream, we return the status of the control stream as well.
+ *
+ * \param[in] csb Control Stream Builder object to add VDMCTRL_STREAM_RETURN to.
+ * \return VK_SUCCESS on success, or error code otherwise.
+ */
+VkResult pvr_csb_emit_return(struct pvr_csb *csb)
+{
+   /* STREAM_RETURN is only supported by graphics control stream. */
+   assert(csb->stream_type == PVR_CMD_STREAM_TYPE_GRAPHICS);
+
+   /* clang-format off */
+   pvr_csb_emit(csb, VDMCTRL_STREAM_RETURN, ret);
+   /* clang-format on */
+
+   return csb->status;
+}
+
+/**
+ * \brief Adds STREAM_TERMINATE dword into the control stream pointed by csb
+ * object. Given a STREAM_TERMINATE marks the end of the control stream, we
+ * return the status of the control stream as well.
+ *
+ * \param[in] csb Control Stream Builder object to terminate.
+ * \return VK_SUCCESS on success, or error code otherwise.
+ */
+VkResult pvr_csb_emit_terminate(struct pvr_csb *csb)
+{
+   switch (csb->stream_type) {
+   case PVR_CMD_STREAM_TYPE_GRAPHICS:
+      /* clang-format off */
+      pvr_csb_emit(csb, VDMCTRL_STREAM_TERMINATE, terminate);
+      /* clang-format on */
+      break;
+
+   case PVR_CMD_STREAM_TYPE_COMPUTE:
+      /* clang-format off */
+      pvr_csb_emit(csb, CDMCTRL_STREAM_TERMINATE, terminate);
+      /* clang-format on */
+      break;
+
+   default:
+      unreachable("Unknown stream type");
+      break;
+   }
+
+   return csb->status;
+}
diff --git a/src/imagination/vulkan/pvr_csb.h b/src/imagination/vulkan/pvr_csb.h
new file mode 100644 (file)
index 0000000..5815959
--- /dev/null
@@ -0,0 +1,205 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * based in part on v3dv_cl.h which is:
+ * Copyright © 2019 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_CSB_H
+#define PVR_CSB_H
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_bo.h"
+#include "pvr_winsys.h"
+#include "util/list.h"
+
+#define __pvr_address_type pvr_dev_addr_t
+#define __pvr_get_address(pvr_dev_addr) (pvr_dev_addr).addr
+
+#include "csbgen/rogue_hwdefs.h"
+
+struct pvr_device;
+
+enum pvr_cmd_stream_type {
+   PVR_CMD_STREAM_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */
+   PVR_CMD_STREAM_TYPE_GRAPHICS,
+   PVR_CMD_STREAM_TYPE_COMPUTE,
+};
+
+struct pvr_csb {
+   struct pvr_device *device;
+
+   /* Pointer to current csb buffer object */
+   struct pvr_bo *pvr_bo;
+
+   /* pointers to current bo memory */
+   void *start;
+   void *end;
+   void *next;
+
+   /* List of csb buffer objects */
+   struct list_head pvr_bo_list;
+
+   enum pvr_cmd_stream_type stream_type;
+
+   /* Current error status of the command buffer. Used to track inconsistent
+    * or incomplete command buffer states that are the consequence of run-time
+    * errors such as out of memory scenarios. We want to track this in the
+    * csb because the command buffer object is not visible to some parts
+    * of the driver.
+    */
+   VkResult status;
+};
+
+/**
+ * \brief Gets the status of the csb.
+ *
+ * \param[in] csb Control Stream Builder object.
+ * \return VK_SUCCESS if the csb hasn't encountered any error or error code
+ *         otherwise.
+ */
+static inline VkResult pvr_csb_get_status(struct pvr_csb *csb)
+{
+   return csb->status;
+}
+
+/**
+ * \brief Checks if the control stream is empty or not.
+ *
+ * \param[in] csb Control Stream Builder object.
+ * \return true if csb is empty false otherwise.
+ */
+static inline bool pvr_csb_is_empty(struct pvr_csb *csb)
+{
+   return list_is_empty(&csb->pvr_bo_list);
+}
+
+static inline pvr_dev_addr_t pvr_csb_get_start_address(struct pvr_csb *csb)
+{
+   if (!pvr_csb_is_empty(csb)) {
+      struct pvr_bo *pvr_bo =
+         list_first_entry(&csb->pvr_bo_list, struct pvr_bo, link);
+
+      return pvr_bo->vma->dev_addr;
+   }
+
+   return PVR_DEV_ADDR_INVALID;
+}
+
+void pvr_csb_init(struct pvr_device *device,
+                  enum pvr_cmd_stream_type stream_type,
+                  struct pvr_csb *csb);
+void pvr_csb_finish(struct pvr_csb *csb);
+void *pvr_csb_alloc_dwords(struct pvr_csb *csb, uint32_t num_dwords);
+VkResult pvr_csb_emit_return(struct pvr_csb *csb);
+VkResult pvr_csb_emit_terminate(struct pvr_csb *csb);
+
+#define PVRX(x) ROGUE_##x
+#define pvr_cmd_struct(x) PVRX(x)
+#define pvr_cmd_length(x) PVRX(x##_length)
+#define pvr_cmd_header(x) PVRX(x##_header)
+#define pvr_cmd_pack(x) PVRX(x##_pack)
+
+/**
+ * \brief Packs a command/state into one or more dwords and stores them in the
+ * memory pointed to by _dst.
+ *
+ * \param[out] _dst    Pointer to store the packed command/state.
+ * \param[in] cmd      Command/state type.
+ * \param[in,out] name Name to give to the command/state structure variable,
+ *                     which contains the information to be packed and emitted.
+ *                     This can be used by the caller to modify the command or
+ *                     state information before it's packed.
+ */
+#define pvr_csb_pack(_dst, cmd, name)                              \
+   for (struct pvr_cmd_struct(cmd) name = { pvr_cmd_header(cmd) }, \
+                                   *_loop_terminate = &name;       \
+        __builtin_expect(_loop_terminate != NULL, 1);              \
+        ({                                                         \
+           pvr_cmd_pack(cmd)((_dst), &name);                       \
+           _loop_terminate = NULL;                                 \
+        }))
+
+/**
+ * \brief Merges dwords0 and dwords1 arrays and stores the result into the
+ * control stream pointed by the csb object.
+ *
+ * \param[in] csb     Control Stream Builder object.
+ * \param[in] dwords0 Dwords0 array.
+ * \param[in] dwords1 Dwords1 array.
+ */
+#define pvr_csb_emit_merge(csb, dwords0, dwords1)                \
+   do {                                                          \
+      uint32_t *dw;                                              \
+      STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1)); \
+      dw = pvr_csb_alloc_dwords(csb, ARRAY_SIZE(dwords0));       \
+      if (!dw)                                                   \
+         break;                                                  \
+      for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++)         \
+         dw[i] = (dwords0)[i] | (dwords1)[i];                    \
+   } while (0)
+
+/**
+ * \brief Packs a command/state into one or more dwords and stores them into
+ * the control stream pointed by the csb object.
+ *
+ * \param[in] csb      Control Stream Builder object.
+ * \param[in] cmd      Command/state type.
+ * \param[in,out] name Name to give to the command/state structure variable,
+ *                     which contains the information to be packed. This can be
+ *                     used by the caller to modify the command or state
+ *                     information before it's packed.
+ */
+#define pvr_csb_emit(csb, cmd, name)                               \
+   for (struct pvr_cmd_struct(cmd)                                 \
+           name = { pvr_cmd_header(cmd) },                         \
+           *_dst = pvr_csb_alloc_dwords(csb, pvr_cmd_length(cmd)); \
+        __builtin_expect(_dst != NULL, 1);                         \
+        ({                                                         \
+           pvr_cmd_pack(cmd)(_dst, &name);                         \
+           _dst = NULL;                                            \
+        }))
+
+/**
+ * \brief Stores dword into the control stream pointed by the csb object.
+ *
+ * \param[in] csb   Control Stream Builder object.
+ * \param[in] dword Dword to store into control stream.
+ */
+#define pvr_csb_emit_dword(csb, dword)                  \
+   do {                                                 \
+      uint32_t *dw;                                     \
+      STATIC_ASSERT(sizeof(dword) == sizeof(uint32_t)); \
+      dw = pvr_csb_alloc_dwords(csb, 1U);               \
+      if (!dw)                                          \
+         break;                                         \
+      *dw = dword;                                      \
+   } while (0)
+
+#endif /* PVR_CSB_H */
diff --git a/src/imagination/vulkan/pvr_descriptor_set.c b/src/imagination/vulkan/pvr_descriptor_set.c
new file mode 100644 (file)
index 0000000..8575663
--- /dev/null
@@ -0,0 +1,1454 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_bo.h"
+#include "pvr_private.h"
+#include "util/compiler.h"
+#include "util/list.h"
+#include "util/log.h"
+#include "util/macros.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+#include "vk_object.h"
+#include "vk_util.h"
+
+#if defined(DEBUG)
+static const struct {
+   const char *raw;
+   const char *primary;
+   const char *secondary;
+   const char *primary_dynamic;
+   const char *secondary_dynamic;
+} stage_names[] = {
+   { "Vertex",
+     "Vertex Primary",
+     "Vertex Secondary",
+     "Vertex Dynamic Primary",
+     "Vertex Dynamic Secondary" },
+   { "Fragment",
+     "Fragment Primary",
+     "Fragment Secondary",
+     "Fragment Dynamic Primary",
+     "Fragment Dynamic Secondary" },
+   { "Compute",
+     "Compute Primary",
+     "Compute Secondary",
+     "Compute Dynamic Primary",
+     "Compute Dynamic Secondary" },
+};
+
+static const char *descriptor_names[] = { "VK SAMPLER",
+                                          "VK COMBINED_IMAGE_SAMPLER",
+                                          "VK SAMPLED_IMAGE",
+                                          "VK STORAGE_IMAGE",
+                                          "VK UNIFORM_TEXEL_BUFFER",
+                                          "VK STORAGE_TEXEL_BUFFER",
+                                          "VK UNIFORM_BUFFER",
+                                          "VK STORAGE_BUFFER",
+                                          "VK UNIFORM_BUFFER_DYNAMIC",
+                                          "VK STORAGE_BUFFER_DYNAMIC",
+                                          "VK INPUT_ATTACHMENT" };
+#endif
+
+static void pvr_descriptor_size_info_init(
+   const struct pvr_device *device,
+   VkDescriptorType type,
+   struct pvr_descriptor_size_info *const size_info_out)
+{
+   /* UINT_MAX is a place holder. These values will be filled by calling the
+    * init function, and set appropriately based on device features.
+    */
+   static const struct pvr_descriptor_size_info template_size_infos[] = {
+      /* VK_DESCRIPTOR_TYPE_SAMPLER */
+      { 4, 0, 4 },
+      /* VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER */
+      { 8, UINT_MAX, 4 },
+      /* VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE */
+      { 4, UINT_MAX, 4 },
+      /* VK_DESCRIPTOR_TYPE_STORAGE_IMAGE */
+      { 4, UINT_MAX, 4 },
+      /* VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER */
+      { 4, UINT_MAX, 4 },
+      /* VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER */
+      { 4, UINT_MAX, 4 },
+      /* VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER */
+      { 2, UINT_MAX, 2 },
+      /* VK_DESCRIPTOR_TYPE_STORAGE_BUFFER */
+      { 2, 1, 2 },
+      /* VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC */
+      { 2, UINT_MAX, 2 },
+      /* VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC */
+      { 2, 1, 2 },
+      /* VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT */
+      { 8, UINT_MAX, 4 }
+   };
+
+   *size_info_out = template_size_infos[type];
+
+   switch (type) {
+   case VK_DESCRIPTOR_TYPE_SAMPLER:
+   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+   case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+      break;
+
+   case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+   case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+   case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+   case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+   case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+   case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: {
+      const uint32_t image_secondary_offset_arraybase = 0;
+      const uint32_t image_secondary_size_arraybase = 2;
+      const uint32_t image_secondary_size_arraystride = 1;
+
+      const uint32_t image_secondary_offset_arraystride =
+         image_secondary_offset_arraybase + image_secondary_size_arraybase;
+
+      const uint32_t image_secondary_offset_arraymaxindex =
+         (PVR_HAS_FEATURE(&device->pdevice->dev_info, tpu_array_textures))
+            ? 0
+            : image_secondary_offset_arraystride +
+                 image_secondary_size_arraystride;
+
+      const uint32_t image_secondary_size_arraymaxindex = 1;
+
+      const uint32_t image_secondary_size_width = 1;
+      const uint32_t image_secondary_size_height = 1;
+      const uint32_t image_secondary_size_depth = 1;
+
+      const uint32_t image_secondary_offset_width =
+         image_secondary_offset_arraymaxindex +
+         image_secondary_size_arraymaxindex;
+      const uint32_t image_secondary_offset_height =
+         image_secondary_offset_width + image_secondary_size_width;
+      const uint32_t image_secondary_offset_depth =
+         image_secondary_offset_height + image_secondary_size_height;
+      const uint32_t image_secondary_total_size =
+         image_secondary_offset_depth + image_secondary_size_depth;
+
+      size_info_out->secondary = image_secondary_total_size;
+      break;
+   }
+   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+   case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+      size_info_out->secondary = (uint32_t)device->features.robustBufferAccess;
+      break;
+
+   default:
+      unreachable("Unknown descriptor type");
+   }
+}
+
+static bool pvr_stage_matches_vk_flags(enum pvr_stage_allocation pvr_stage,
+                                       VkShaderStageFlags flags)
+{
+   VkShaderStageFlags flags_per_stage;
+
+   switch (pvr_stage) {
+   case PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY:
+      flags_per_stage = VK_SHADER_STAGE_VERTEX_BIT |
+                        VK_SHADER_STAGE_GEOMETRY_BIT;
+      break;
+   case PVR_STAGE_ALLOCATION_FRAGMENT:
+      flags_per_stage = VK_SHADER_STAGE_FRAGMENT_BIT;
+      break;
+   case PVR_STAGE_ALLOCATION_COMPUTE:
+      flags_per_stage = VK_SHADER_STAGE_COMPUTE_BIT;
+      break;
+   default:
+      unreachable("Unrecognized allocation stage.");
+   }
+
+   return !!(flags_per_stage & flags);
+}
+
+/* If allocator == NULL, the internal one will be used. */
+static struct pvr_descriptor_set_layout *
+pvr_descriptor_set_layout_allocate(struct pvr_device *device,
+                                   const VkAllocationCallbacks *allocator,
+                                   uint32_t binding_count,
+                                   uint32_t immutable_sampler_count,
+                                   uint32_t supported_descriptors_count)
+{
+   struct pvr_descriptor_set_layout_binding *bindings;
+   struct pvr_descriptor_set_layout *layout;
+   __typeof__(layout->per_stage_descriptor_count) counts;
+   struct pvr_sampler **immutable_samplers;
+
+   VK_MULTIALLOC(ma);
+   vk_multialloc_add(&ma, &layout, __typeof__(*layout), 1);
+   vk_multialloc_add(&ma, &bindings, __typeof__(*bindings), binding_count);
+   vk_multialloc_add(&ma,
+                     &immutable_samplers,
+                     __typeof__(*immutable_samplers),
+                     immutable_sampler_count);
+
+   for (uint32_t stage = 0; stage < ARRAY_SIZE(counts); stage++) {
+      vk_multialloc_add(&ma,
+                        &counts[stage],
+                        __typeof__(*counts[0]),
+                        supported_descriptors_count);
+   }
+
+   /* pvr_CreateDescriptorSetLayout() relies on this being zero allocated. */
+   if (!vk_multialloc_zalloc2(&ma,
+                              &device->vk.alloc,
+                              allocator,
+                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT)) {
+      return NULL;
+   }
+
+   layout->bindings = bindings;
+   layout->immutable_samplers = immutable_samplers;
+
+   memcpy(&layout->per_stage_descriptor_count, &counts, sizeof(counts));
+
+   vk_object_base_init(&device->vk,
+                       &layout->base,
+                       VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT);
+
+   return layout;
+}
+
+/* If allocator == NULL, the internal one will be used. */
+static void
+pvr_descriptor_set_layout_free(struct pvr_device *device,
+                               const VkAllocationCallbacks *allocator,
+                               struct pvr_descriptor_set_layout *layout)
+{
+   vk_object_base_finish(&layout->base);
+   vk_free2(&device->vk.alloc, allocator, layout);
+}
+
+static int pvr_binding_compare(const void *a, const void *b)
+{
+   uint32_t binding_a = ((VkDescriptorSetLayoutBinding *)a)->binding;
+   uint32_t binding_b = ((VkDescriptorSetLayoutBinding *)b)->binding;
+
+   if (binding_a < binding_b)
+      return -1;
+
+   if (binding_a > binding_b)
+      return 1;
+
+   return 0;
+}
+
+/* If allocator == NULL, the internal one will be used. */
+static VkDescriptorSetLayoutBinding *
+pvr_create_sorted_bindings(struct pvr_device *device,
+                           const VkAllocationCallbacks *allocator,
+                           const VkDescriptorSetLayoutBinding *bindings,
+                           uint32_t binding_count)
+{
+   VkDescriptorSetLayoutBinding *sorted_bindings =
+      vk_alloc2(&device->vk.alloc,
+                allocator,
+                binding_count * sizeof(*sorted_bindings),
+                8,
+                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!sorted_bindings)
+      return NULL;
+
+   memcpy(sorted_bindings, bindings, binding_count * sizeof(*sorted_bindings));
+
+   qsort(sorted_bindings,
+         binding_count,
+         sizeof(*sorted_bindings),
+         pvr_binding_compare);
+
+   return sorted_bindings;
+}
+
+struct pvr_register_usage {
+   uint32_t primary;
+   uint32_t primary_dynamic;
+   uint32_t secondary;
+   uint32_t secondary_dynamic;
+};
+
+static void pvr_setup_in_memory_layout_sizes(
+   struct pvr_descriptor_set_layout *layout,
+   const struct pvr_register_usage reg_usage[PVR_STAGE_ALLOCATION_COUNT])
+{
+   for (uint32_t stage = 0;
+        stage < ARRAY_SIZE(layout->memory_layout_in_dwords_per_stage);
+        stage++) {
+      layout->total_size_in_dwords = ALIGN_POT(layout->total_size_in_dwords, 4);
+
+      layout->memory_layout_in_dwords_per_stage[stage].primary_offset =
+         layout->total_size_in_dwords;
+      layout->memory_layout_in_dwords_per_stage[stage].primary_size =
+         reg_usage[stage].primary;
+
+      layout->total_size_in_dwords += reg_usage[stage].primary;
+      layout->total_size_in_dwords = ALIGN_POT(layout->total_size_in_dwords, 4);
+
+      layout->memory_layout_in_dwords_per_stage[stage].secondary_offset =
+         layout->total_size_in_dwords;
+      layout->memory_layout_in_dwords_per_stage[stage].secondary_size =
+         reg_usage[stage].secondary;
+
+      layout->total_size_in_dwords += reg_usage[stage].secondary;
+
+      layout->memory_layout_in_dwords_per_stage[stage].primary_dynamic_size =
+         reg_usage[stage].primary_dynamic;
+      layout->memory_layout_in_dwords_per_stage[stage].secondary_dynamic_size =
+         reg_usage[stage].secondary_dynamic;
+   }
+}
+
+#if defined(DEBUG)
+static void
+pvr_dump_in_memory_layout_sizes(const struct pvr_descriptor_set_layout *layout)
+{
+   mesa_logd("=== SET LAYOUT ===");
+   mesa_logd("----------------------------------------------");
+   mesa_logd(" in memory:");
+   mesa_logd("----------------------------------------------");
+
+   for (uint32_t stage = 0;
+        stage < ARRAY_SIZE(layout->memory_layout_in_dwords_per_stage);
+        stage++) {
+      mesa_logd(
+         "| %-18s @   %04u                |",
+         stage_names[stage].primary,
+         layout->memory_layout_in_dwords_per_stage[stage].primary_offset);
+      mesa_logd("----------------------------------------------");
+
+      /* Print primaries. */
+      for (uint32_t i = 0; i < layout->binding_count; i++) {
+         const struct pvr_descriptor_set_layout_binding *const binding =
+            &layout->bindings[i];
+
+         if (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+             binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+            continue;
+
+         mesa_logd("|   %s %04u | %-26s[%3u] |",
+                   (binding->shader_stage_mask & (1U << stage)) ? " " : "X",
+                   binding->per_stage_offset_in_dwords[stage].primary,
+                   descriptor_names[binding->type],
+                   binding->descriptor_count);
+      }
+
+      /* Print dynamic primaries. */
+      for (uint32_t i = 0; i < layout->binding_count; i++) {
+         const struct pvr_descriptor_set_layout_binding *const binding =
+            &layout->bindings[i];
+
+         if (binding->type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
+             binding->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+            continue;
+
+         mesa_logd("| * %s %04u | %-26s[%3u] |",
+                   (binding->shader_stage_mask & (1U << stage)) ? " " : "X",
+                   binding->per_stage_offset_in_dwords[stage].primary,
+                   descriptor_names[binding->type],
+                   binding->descriptor_count);
+      }
+
+      mesa_logd("----------------------------------------------");
+      mesa_logd(
+         "| %-18s @   %04u                |",
+         stage_names[stage].secondary,
+         layout->memory_layout_in_dwords_per_stage[stage].secondary_offset);
+      mesa_logd("----------------------------------------------");
+
+      /* Print secondaries. */
+      for (uint32_t i = 0; i < layout->binding_count; i++) {
+         const struct pvr_descriptor_set_layout_binding *const binding =
+            &layout->bindings[i];
+
+         if (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+             binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+            continue;
+
+         mesa_logd("|   %s %04u | %-26s[%3u] |",
+                   (binding->shader_stage_mask & (1U << stage)) ? " " : "X",
+                   binding->per_stage_offset_in_dwords[stage].secondary,
+                   descriptor_names[binding->type],
+                   binding->descriptor_count);
+      }
+
+      /* Print dynamic secondaries. */
+      for (uint32_t i = 0; i < layout->binding_count; i++) {
+         const struct pvr_descriptor_set_layout_binding *const binding =
+            &layout->bindings[i];
+
+         if (binding->type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
+             binding->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+            continue;
+
+         mesa_logd("| * %s %04u | %-26s[%3u] |",
+                   (binding->shader_stage_mask & (1U << stage)) ? " " : "X",
+                   binding->per_stage_offset_in_dwords[stage].secondary,
+                   descriptor_names[binding->type],
+                   binding->descriptor_count);
+      }
+
+      mesa_logd("==============================================");
+   }
+}
+#endif
+
+VkResult pvr_CreateDescriptorSetLayout(
+   VkDevice _device,
+   const VkDescriptorSetLayoutCreateInfo *pCreateInfo,
+   const VkAllocationCallbacks *pAllocator,
+   VkDescriptorSetLayout *pSetLayout)
+{
+   /* Used to accumulate sizes and set each descriptor's offsets per stage. */
+   struct pvr_register_usage reg_usage[PVR_STAGE_ALLOCATION_COUNT] = { 0 };
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   struct pvr_descriptor_set_layout *layout;
+   VkDescriptorSetLayoutBinding *bindings;
+   uint32_t immutable_sampler_count;
+
+   assert(pCreateInfo->sType ==
+          VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
+
+   vk_foreach_struct (ext, pCreateInfo->pNext) {
+      pvr_debug_ignored_stype(ext->sType);
+   }
+
+   /* TODO: Add support for push descriptors. */
+
+   if (pCreateInfo->bindingCount == 0) {
+      layout = pvr_descriptor_set_layout_allocate(device, pAllocator, 0, 0, 0);
+      if (!layout)
+         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+      *pSetLayout = pvr_descriptor_set_layout_to_handle(layout);
+      return VK_SUCCESS;
+   }
+
+   /* TODO: Instead of sorting, maybe do what anvil does? */
+   bindings = pvr_create_sorted_bindings(device,
+                                         pAllocator,
+                                         pCreateInfo->pBindings,
+                                         pCreateInfo->bindingCount);
+   if (!bindings)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   immutable_sampler_count = 0;
+   for (uint32_t i = 0; i < pCreateInfo->bindingCount; i++) {
+      /* From the Vulkan 1.1.97 spec for VkDescriptorSetLayoutBinding:
+       *
+       *    "If descriptorType specifies a VK_DESCRIPTOR_TYPE_SAMPLER or
+       *    VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER type descriptor, then
+       *    pImmutableSamplers can be used to initialize a set of immutable
+       *    samplers. [...]  If descriptorType is not one of these descriptor
+       *    types, then pImmutableSamplers is ignored.
+       *
+       * We need to be careful here and only parse pImmutableSamplers if we
+       * have one of the right descriptor types.
+       */
+      const VkDescriptorType descriptor_type = bindings[i].descriptorType;
+      if ((descriptor_type == VK_DESCRIPTOR_TYPE_SAMPLER ||
+           descriptor_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) &&
+          bindings[i].pImmutableSamplers)
+         immutable_sampler_count += bindings[i].descriptorCount;
+   }
+
+   /* From the Vulkan 1.2.190 spec for VkDescriptorSetLayoutCreateInfo:
+    *
+    *     "The VkDescriptorSetLayoutBinding::binding members of the elements
+    *     of the pBindings array must each have different values."
+    *
+    * So we don't worry about duplicates and just allocate for bindingCount
+    * amount of bindings.
+    */
+   layout = pvr_descriptor_set_layout_allocate(
+      device,
+      pAllocator,
+      pCreateInfo->bindingCount,
+      immutable_sampler_count,
+      PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT);
+   if (!layout) {
+      vk_free2(&device->vk.alloc, pAllocator, bindings);
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   layout->binding_count = pCreateInfo->bindingCount;
+
+   for (uint32_t bind_num = 0; bind_num < layout->binding_count; bind_num++) {
+      const VkDescriptorSetLayoutBinding *const binding = &bindings[bind_num];
+      struct pvr_descriptor_set_layout_binding *const internal_binding =
+         &layout->bindings[bind_num];
+      VkShaderStageFlags shader_stages = 0;
+
+      internal_binding->type = binding->descriptorType;
+      /* The binding_numbers can be non-contiguous so we ignore the user
+       * specified binding numbers and make them contiguous ourselves.
+       */
+      internal_binding->binding_number = bind_num;
+
+      /* From Vulkan spec 1.2.189:
+       *
+       *    "If descriptorCount is zero this binding entry is reserved and the
+       *    resource must not be accessed from any stage via this binding"
+       *
+       * So do not use bindings->stageFlags, use shader_stages instead.
+       */
+      if (binding->descriptorCount) {
+         shader_stages = binding->stageFlags;
+
+         internal_binding->descriptor_count = binding->descriptorCount;
+         internal_binding->descriptor_index = layout->descriptor_count;
+         layout->descriptor_count += binding->descriptorCount;
+      }
+
+      switch (binding->descriptorType) {
+      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+      case VK_DESCRIPTOR_TYPE_SAMPLER:
+         if (binding->pImmutableSamplers && binding->descriptorCount > 0) {
+            internal_binding->immutable_samplers_index =
+               layout->immutable_sampler_count;
+
+            for (uint32_t j = 0; j < binding->descriptorCount; j++) {
+               PVR_FROM_HANDLE(pvr_sampler,
+                               sampler,
+                               bindings->pImmutableSamplers[j]);
+               const uint32_t next = j + layout->immutable_sampler_count;
+
+               layout->immutable_samplers[next] = sampler;
+            }
+
+            layout->immutable_sampler_count += binding->descriptorCount;
+         }
+         break;
+
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+         layout->dynamic_buffer_count += binding->descriptorCount;
+         break;
+
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+      case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+      case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+         break;
+
+      default:
+         unreachable("Unknown descriptor type");
+         break;
+      }
+
+      if (!shader_stages)
+         continue;
+
+      internal_binding->shader_stages = shader_stages;
+      layout->shader_stages |= shader_stages;
+
+      for (uint32_t stage = 0;
+           stage < ARRAY_SIZE(layout->bindings[0].per_stage_offset_in_dwords);
+           stage++) {
+         const VkDescriptorType descriptor_type = binding->descriptorType;
+
+         if (!pvr_stage_matches_vk_flags(stage, shader_stages))
+            continue;
+
+         internal_binding->shader_stage_mask |= (1U << stage);
+
+         /* TODO: Do we have to allocate them at the end? We could speed it
+          * by allocating them here if not. */
+         /* We allocate dynamics primary and secondaries at the end. */
+         if (descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
+             descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
+            struct pvr_descriptor_size_info size_info;
+
+            pvr_descriptor_size_info_init(device, descriptor_type, &size_info);
+
+            STATIC_ASSERT(
+               ARRAY_SIZE(reg_usage) ==
+               ARRAY_SIZE(layout->bindings[0].per_stage_offset_in_dwords));
+
+            reg_usage[stage].primary =
+               ALIGN_POT(reg_usage[stage].primary, size_info.alignment);
+
+            internal_binding->per_stage_offset_in_dwords[stage].primary =
+               reg_usage[stage].primary;
+            reg_usage[stage].primary +=
+               size_info.primary * internal_binding->descriptor_count;
+
+            internal_binding->per_stage_offset_in_dwords[stage].secondary =
+               reg_usage[stage].secondary;
+            reg_usage[stage].secondary +=
+               size_info.secondary * internal_binding->descriptor_count;
+         }
+
+         STATIC_ASSERT(
+            ARRAY_SIZE(layout->per_stage_descriptor_count) ==
+            ARRAY_SIZE(layout->bindings[0].per_stage_offset_in_dwords));
+
+         layout->per_stage_descriptor_count[stage][descriptor_type] +=
+            internal_binding->descriptor_count;
+      }
+   }
+
+   for (uint32_t bind_num = 0; bind_num < layout->binding_count; bind_num++) {
+      struct pvr_descriptor_set_layout_binding *const internal_binding =
+         &layout->bindings[bind_num];
+      const VkDescriptorType descriptor_type = internal_binding->type;
+
+      if (descriptor_type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
+          descriptor_type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+         continue;
+
+      for (uint32_t stage = 0;
+           stage < ARRAY_SIZE(layout->bindings[0].per_stage_offset_in_dwords);
+           stage++) {
+         struct pvr_descriptor_size_info size_info;
+         const VkShaderStageFlags shader_stages =
+            internal_binding->shader_stages;
+
+         if (!pvr_stage_matches_vk_flags(stage, shader_stages))
+            continue;
+
+         pvr_descriptor_size_info_init(device, descriptor_type, &size_info);
+
+         /* TODO: align primary like we did with other descriptors? */
+         internal_binding->per_stage_offset_in_dwords[stage].primary =
+            reg_usage[stage].primary_dynamic;
+         reg_usage[stage].primary_dynamic +=
+            size_info.primary * internal_binding->descriptor_count;
+
+         internal_binding->per_stage_offset_in_dwords[stage].secondary =
+            reg_usage[stage].secondary_dynamic;
+         reg_usage[stage].secondary_dynamic +=
+            size_info.secondary * internal_binding->descriptor_count;
+      }
+   }
+
+   pvr_setup_in_memory_layout_sizes(layout, reg_usage);
+
+#if defined(DEBUG)
+   pvr_dump_in_memory_layout_sizes(layout);
+#endif
+
+   vk_free2(&device->vk.alloc, pAllocator, bindings);
+
+   *pSetLayout = pvr_descriptor_set_layout_to_handle(layout);
+
+   return VK_SUCCESS;
+}
+
+void pvr_DestroyDescriptorSetLayout(VkDevice _device,
+                                    VkDescriptorSetLayout _set_layout,
+                                    const VkAllocationCallbacks *pAllocator)
+{
+   PVR_FROM_HANDLE(pvr_descriptor_set_layout, layout, _set_layout);
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+
+   pvr_descriptor_set_layout_free(device, pAllocator, layout);
+}
+
+#if defined(DEBUG)
+static void
+pvr_dump_in_register_layout_sizes(const struct pvr_device *device,
+                                  const struct pvr_pipeline_layout *layout)
+{
+   mesa_logd("=== SET LAYOUT ===");
+   mesa_logd("----------------------------------------------------");
+   mesa_logd(" in registers:");
+   mesa_logd("----------------------------------------------------");
+
+   for (uint32_t stage = 0;
+        stage < ARRAY_SIZE(layout->register_layout_in_dwords_per_stage);
+        stage++) {
+      uint32_t dynamic_offset = 0;
+
+      mesa_logd("| %-48s |", stage_names[stage].primary_dynamic);
+      mesa_logd("----------------------------------------------------");
+
+      /* Print dynamic primaries. */
+      for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+         const struct pvr_descriptor_set_layout *const set_layout =
+            layout->set_layout[set_num];
+
+         for (uint32_t i = 0; i < set_layout->binding_count; i++) {
+            const struct pvr_descriptor_set_layout_binding *const binding =
+               &set_layout->bindings[i];
+            bool valid = !!(binding->shader_stage_mask & (1U << stage));
+
+            if (binding->type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
+                binding->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+               continue;
+
+            mesa_logd("| %s %04u | %u:%03u | %-26s[%3u] |",
+                      (valid) ? " " : "X",
+                      dynamic_offset,
+                      set_num,
+                      i,
+                      descriptor_names[binding->type],
+                      binding->descriptor_count);
+
+            if (valid) {
+               struct pvr_descriptor_size_info size_info;
+
+               pvr_descriptor_size_info_init(device, binding->type, &size_info);
+
+               dynamic_offset += size_info.primary;
+            }
+         }
+      }
+
+      mesa_logd("----------------------------------------------------");
+      mesa_logd("| %-48s |", stage_names[stage].secondary_dynamic);
+      mesa_logd("----------------------------------------------------");
+
+      /* Print dynamic secondaries. */
+      for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+         const struct pvr_descriptor_set_layout *const set_layout =
+            layout->set_layout[set_num];
+
+         for (uint32_t i = 0; i < set_layout->binding_count; i++) {
+            const struct pvr_descriptor_set_layout_binding *const binding =
+               &set_layout->bindings[i];
+            bool valid = !!(binding->shader_stage_mask & (1U << stage));
+
+            if (binding->type != VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC &&
+                binding->type != VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+               continue;
+
+            mesa_logd("| %s %04u | %u:%03u | %-26s[%3u] |",
+                      (valid) ? " " : "X",
+                      dynamic_offset,
+                      set_num,
+                      i,
+                      descriptor_names[binding->type],
+                      binding->descriptor_count);
+
+            if (valid) {
+               struct pvr_descriptor_size_info size_info;
+
+               pvr_descriptor_size_info_init(device, binding->type, &size_info);
+
+               dynamic_offset += size_info.secondary;
+            }
+         }
+      }
+
+      mesa_logd("----------------------------------------------------");
+      mesa_logd("| %-48s |", stage_names[stage].primary);
+      mesa_logd("----------------------------------------------------");
+
+      /* Print primaries. */
+      for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+         const struct pvr_descriptor_set_layout *const set_layout =
+            layout->set_layout[set_num];
+         const uint32_t base =
+            layout->register_layout_in_dwords_per_stage[stage][set_num]
+               .primary_offset;
+
+         for (uint32_t i = 0; i < set_layout->binding_count; i++) {
+            const struct pvr_descriptor_set_layout_binding *const binding =
+               &set_layout->bindings[i];
+
+            if (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+                binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+               continue;
+
+            mesa_logd("| %s %04u | %u:%03u | %-26s[%3u] |",
+                      (binding->shader_stage_mask & (1U << stage)) ? " " : "X",
+                      base + binding->per_stage_offset_in_dwords[stage].primary,
+                      set_num,
+                      i,
+                      descriptor_names[binding->type],
+                      binding->descriptor_count);
+         }
+      }
+
+      mesa_logd("----------------------------------------------------");
+      mesa_logd("| %-48s |", stage_names[stage].secondary);
+      mesa_logd("----------------------------------------------------");
+
+      /* Print secondaries. */
+      for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+         const struct pvr_descriptor_set_layout *const set_layout =
+            layout->set_layout[set_num];
+         const uint32_t base =
+            layout->register_layout_in_dwords_per_stage[stage][set_num]
+               .secondary_offset;
+
+         for (uint32_t i = 0; i < set_layout->binding_count; i++) {
+            const struct pvr_descriptor_set_layout_binding *const binding =
+               &set_layout->bindings[i];
+
+            if (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
+                binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC)
+               continue;
+
+            mesa_logd("| %s %04u | %u:%03u | %-26s[%3u] |",
+                      (binding->shader_stage_mask & (1U << stage)) ? " " : "X",
+                      base +
+                         binding->per_stage_offset_in_dwords[stage].secondary,
+                      set_num,
+                      i,
+                      descriptor_names[binding->type],
+                      binding->descriptor_count);
+         }
+      }
+
+      mesa_logd("====================================================");
+   }
+}
+#endif
+
+/* Pipeline layouts. These have nothing to do with the pipeline. They are
+ * just multiple descriptor set layouts pasted together.
+ */
+VkResult pvr_CreatePipelineLayout(VkDevice _device,
+                                  const VkPipelineLayoutCreateInfo *pCreateInfo,
+                                  const VkAllocationCallbacks *pAllocator,
+                                  VkPipelineLayout *pPipelineLayout)
+{
+   uint32_t next_free_reg[PVR_STAGE_ALLOCATION_COUNT];
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   struct pvr_pipeline_layout *layout;
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO);
+   assert(pCreateInfo->setLayoutCount <= PVR_MAX_DESCRIPTOR_SETS);
+
+   layout = vk_object_alloc(&device->vk,
+                            pAllocator,
+                            sizeof(*layout),
+                            VK_OBJECT_TYPE_PIPELINE_LAYOUT);
+   if (!layout)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   layout->set_count = pCreateInfo->setLayoutCount;
+   layout->shader_stages = 0;
+   for (uint32_t stage = 0; stage < PVR_STAGE_ALLOCATION_COUNT; stage++) {
+      uint32_t descriptor_counts
+         [PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT] = { 0 };
+      struct pvr_pipeline_layout_reg_info *const reg_info =
+         &layout->per_stage_reg_info[stage];
+
+      *reg_info = (struct pvr_pipeline_layout_reg_info){ 0 };
+
+      layout->per_stage_descriptor_masks[stage] = 0;
+
+      for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+         /* So we don't write these again and again. Just do it once. */
+         if (stage == 0) {
+            PVR_FROM_HANDLE(pvr_descriptor_set_layout,
+                            set_layout,
+                            pCreateInfo->pSetLayouts[set_num]);
+
+            layout->set_layout[set_num] = set_layout;
+            layout->shader_stages |= set_layout->shader_stages;
+         }
+
+         const struct pvr_descriptor_set_layout_mem_layout *const mem_layout =
+            &layout->set_layout[set_num]
+                ->memory_layout_in_dwords_per_stage[stage];
+
+         /* Allocate registers counts for dynamic descriptors. */
+         reg_info->primary_dynamic_size_in_dwords +=
+            mem_layout->primary_dynamic_size;
+         reg_info->secondary_dynamic_size_in_dwords +=
+            mem_layout->secondary_dynamic_size;
+
+         for (VkDescriptorType type = 0;
+              type < PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT;
+              type++) {
+            uint32_t descriptor_count;
+
+            layout->descriptor_offsets[set_num][stage][type] =
+               descriptor_counts[type];
+
+            descriptor_count = layout->set_layout[set_num]
+                                  ->per_stage_descriptor_count[stage][type];
+
+            if (!descriptor_count)
+               continue;
+
+            switch (type) {
+            case VK_DESCRIPTOR_TYPE_SAMPLER:
+            case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+            case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+            case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+            case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+               layout->per_stage_descriptor_masks[stage] |= 1U << set_num;
+               descriptor_counts[type] += descriptor_count;
+               break;
+
+            /* We don't need to keep track of the counts or masks for other
+             * descriptor types so there is no assert() here since other
+             * types are not invalid or unsupported.
+             */
+            /* TODO: Improve the comment above to specify why, when we find
+             * out.
+             */
+            default:
+               break;
+            }
+         }
+      }
+
+      next_free_reg[stage] = reg_info->primary_dynamic_size_in_dwords +
+                             reg_info->secondary_dynamic_size_in_dwords;
+   }
+
+   /* Allocate registers counts for primary and secondary descriptors. */
+   for (uint32_t stage = 0; stage < PVR_STAGE_ALLOCATION_COUNT; stage++) {
+      for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+         const struct pvr_descriptor_set_layout_mem_layout *const mem_layout =
+            &layout->set_layout[set_num]
+                ->memory_layout_in_dwords_per_stage[stage];
+         struct pvr_descriptor_set_layout_mem_layout *const reg_layout =
+            &layout->register_layout_in_dwords_per_stage[stage][set_num];
+
+         next_free_reg[stage] = ALIGN_POT(next_free_reg[stage], 4);
+
+         reg_layout->primary_offset = next_free_reg[stage];
+         reg_layout->primary_size = mem_layout->primary_size;
+
+         next_free_reg[stage] += reg_layout->primary_size;
+      }
+
+      /* To optimize the total shared layout allocation used by the shader,
+       * secondary descriptors come last since they're less likely to be used.
+       */
+      for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+         const struct pvr_descriptor_set_layout_mem_layout *const mem_layout =
+            &layout->set_layout[set_num]
+                ->memory_layout_in_dwords_per_stage[stage];
+         struct pvr_descriptor_set_layout_mem_layout *const reg_layout =
+            &layout->register_layout_in_dwords_per_stage[stage][set_num];
+
+         /* Should we be aligning next_free_reg like it's done with the
+          * primary descriptors?
+          */
+
+         reg_layout->secondary_offset = next_free_reg[stage];
+         reg_layout->secondary_size = mem_layout->secondary_size;
+
+         next_free_reg[stage] += reg_layout->secondary_size;
+      }
+   }
+
+   layout->push_constants_shader_stages = 0;
+   for (uint32_t i = 0; i < pCreateInfo->pushConstantRangeCount; ++i) {
+      const VkPushConstantRange *range = &pCreateInfo->pPushConstantRanges[i];
+
+      layout->push_constants_shader_stages |= range->stageFlags;
+   }
+
+#if defined(DEBUG)
+   pvr_dump_in_register_layout_sizes(device, layout);
+#endif
+
+   *pPipelineLayout = pvr_pipeline_layout_to_handle(layout);
+
+   return VK_SUCCESS;
+}
+
+void pvr_DestroyPipelineLayout(VkDevice _device,
+                               VkPipelineLayout _pipelineLayout,
+                               const VkAllocationCallbacks *pAllocator)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_pipeline_layout, layout, _pipelineLayout);
+
+   vk_object_free(&device->vk, pAllocator, layout);
+}
+
+VkResult pvr_CreateDescriptorPool(VkDevice _device,
+                                  const VkDescriptorPoolCreateInfo *pCreateInfo,
+                                  const VkAllocationCallbacks *pAllocator,
+                                  VkDescriptorPool *pDescriptorPool)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   struct pvr_descriptor_pool *pool;
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO);
+
+   pool = vk_object_alloc(&device->vk,
+                          pAllocator,
+                          sizeof(*pool),
+                          VK_OBJECT_TYPE_DESCRIPTOR_POOL);
+   if (!pool)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   if (pAllocator)
+      pool->alloc = *pAllocator;
+   else
+      pool->alloc = device->vk.alloc;
+
+   pool->max_sets = pCreateInfo->maxSets;
+   list_inithead(&pool->descriptor_sets);
+
+   pool->total_size_in_dwords = 0;
+   for (uint32_t i = 0; i < pCreateInfo->poolSizeCount; i++) {
+      struct pvr_descriptor_size_info size_info;
+      const uint32_t descriptor_count =
+         pCreateInfo->pPoolSizes[i].descriptorCount;
+
+      pvr_descriptor_size_info_init(device,
+                                    pCreateInfo->pPoolSizes[i].type,
+                                    &size_info);
+
+      const uint32_t secondary = ALIGN_POT(size_info.secondary, 4);
+      const uint32_t primary = ALIGN_POT(size_info.primary, 4);
+
+      pool->total_size_in_dwords += descriptor_count * (primary + secondary);
+   }
+   pool->total_size_in_dwords *= PVR_STAGE_ALLOCATION_COUNT;
+   pool->current_size_in_dwords = 0;
+
+   pvr_finishme("Entry tracker for allocations?");
+
+   *pDescriptorPool = pvr_descriptor_pool_to_handle(pool);
+
+   return VK_SUCCESS;
+}
+
+static void pvr_free_descriptor_set(struct pvr_device *device,
+                                    struct pvr_descriptor_pool *pool,
+                                    struct pvr_descriptor_set *set)
+{
+   list_del(&set->link);
+   pvr_bo_free(device, set->pvr_bo);
+   vk_object_free(&device->vk, &pool->alloc, set);
+}
+
+void pvr_DestroyDescriptorPool(VkDevice _device,
+                               VkDescriptorPool _pool,
+                               const VkAllocationCallbacks *pAllocator)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_descriptor_pool, pool, _pool);
+
+   if (!pool)
+      return;
+
+   list_for_each_entry_safe (struct pvr_descriptor_set,
+                             set,
+                             &pool->descriptor_sets,
+                             link) {
+      pvr_free_descriptor_set(device, pool, set);
+   }
+
+   vk_object_free(&device->vk, pAllocator, pool);
+}
+
+VkResult pvr_ResetDescriptorPool(VkDevice _device,
+                                 VkDescriptorPool descriptorPool,
+                                 VkDescriptorPoolResetFlags flags)
+{
+   assert(!"Unimplemented");
+   return VK_SUCCESS;
+}
+
+static uint16_t pvr_get_descriptor_primary_offset(
+   const struct pvr_device *device,
+   const struct pvr_descriptor_set_layout *layout,
+   const struct pvr_descriptor_set_layout_binding *binding,
+   const uint32_t stage,
+   const uint32_t desc_idx)
+{
+   struct pvr_descriptor_size_info size_info;
+   uint32_t offset;
+
+   assert(stage < ARRAY_SIZE(layout->memory_layout_in_dwords_per_stage));
+   assert(desc_idx < binding->descriptor_count);
+
+   pvr_descriptor_size_info_init(device, binding->type, &size_info);
+
+   offset = layout->memory_layout_in_dwords_per_stage[stage].primary_offset;
+   offset += binding->per_stage_offset_in_dwords[stage].primary;
+   offset += (desc_idx * size_info.primary);
+
+   /* Offset must be less than 16bits. */
+   assert(offset < UINT16_MAX);
+
+   return (uint16_t)offset;
+}
+
+static uint16_t pvr_get_descriptor_secondary_offset(
+   const struct pvr_device *device,
+   const struct pvr_descriptor_set_layout *layout,
+   const struct pvr_descriptor_set_layout_binding *binding,
+   const uint32_t stage,
+   const uint32_t desc_idx)
+{
+   struct pvr_descriptor_size_info size_info;
+   uint32_t offset;
+
+   assert(stage < ARRAY_SIZE(layout->memory_layout_in_dwords_per_stage));
+   assert(desc_idx < binding->descriptor_count);
+
+   pvr_descriptor_size_info_init(device, binding->type, &size_info);
+
+   offset = layout->memory_layout_in_dwords_per_stage[stage].secondary_offset;
+   offset += binding->per_stage_offset_in_dwords[stage].secondary;
+   offset += (desc_idx * size_info.secondary);
+
+   /* Offset must be less than 16bits. */
+   assert(offset < UINT16_MAX);
+
+   return (uint16_t)offset;
+}
+
+static void pvr_write_sampler_descriptor(uint32_t *primary,
+                                         const struct pvr_sampler *sampler)
+{
+   /* TODO: Implement based on WriteSamplerDescriptor. */
+   pvr_finishme("Implement after vkCreateSampler API.");
+}
+
+#define PVR_MAX_DESCRIPTOR_MEM_SIZE_IN_DWORDS (4 * 1024)
+
+static VkResult
+pvr_descriptor_set_create(struct pvr_device *device,
+                          struct pvr_descriptor_pool *pool,
+                          const struct pvr_descriptor_set_layout *layout,
+                          struct pvr_descriptor_set **const descriptor_set_out)
+{
+   struct pvr_descriptor_set *set;
+   VkResult result;
+   size_t size;
+   void *map;
+
+   size = sizeof(*set) + sizeof(set->descriptors[0]) * layout->descriptor_count;
+
+   /* TODO: Add support to allocate descriptors from descriptor pool, also
+    * check the required descriptors must not exceed max allowed descriptors.
+    */
+   set = vk_object_zalloc(&device->vk,
+                          &pool->alloc,
+                          size,
+                          VK_OBJECT_TYPE_DESCRIPTOR_SET);
+   if (!set)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   /* TODO: Add support to allocate device memory from a common pool. Look at
+    * something like anv. Also we can allocate a whole chunk of device memory
+    * for max descriptors supported by pool as done by v3dv. Also check the
+    * possibility if this can be removed from here and done on need basis.
+    */
+   if (layout->binding_count > 0) {
+      const uint32_t cache_line_size =
+         rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+      uint64_t bo_size = MIN2(pool->total_size_in_dwords,
+                              PVR_MAX_DESCRIPTOR_MEM_SIZE_IN_DWORDS) *
+                         sizeof(uint32_t);
+
+      result = pvr_bo_alloc(device,
+                            device->heaps.general_heap,
+                            bo_size,
+                            cache_line_size,
+                            PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+                            &set->pvr_bo);
+      if (result != VK_SUCCESS)
+         goto err_free_descriptor_set;
+   }
+
+   set->layout = layout;
+   set->pool = pool;
+
+   map = set->pvr_bo->bo->map;
+   for (uint32_t i = 0; i < layout->binding_count; i++) {
+      const struct pvr_descriptor_set_layout_binding *binding =
+         &layout->bindings[i];
+
+      if (binding->descriptor_count == 0 ||
+          (binding->type != VK_DESCRIPTOR_TYPE_SAMPLER &&
+           binding->type != VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER))
+         continue;
+
+      for (uint32_t stage = 0;
+           stage < ARRAY_SIZE(binding->per_stage_offset_in_dwords);
+           stage++) {
+         if (!(binding->shader_stage_mask & (1U << stage)))
+            continue;
+
+         for (uint32_t j = 0; j < binding->descriptor_count; j++) {
+            uint32_t idx = binding->immutable_samplers_index + j;
+            struct pvr_sampler *sampler = layout->immutable_samplers[idx];
+            unsigned int offset_in_dwords =
+               pvr_get_descriptor_primary_offset(device,
+                                                 layout,
+                                                 binding,
+                                                 stage,
+                                                 j);
+
+            if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
+               offset_in_dwords += 4;
+
+            pvr_write_sampler_descriptor(map +
+                                            offset_in_dwords * sizeof(uint32_t),
+                                         sampler);
+         }
+      }
+   }
+
+   list_addtail(&set->link, &pool->descriptor_sets);
+
+   *descriptor_set_out = set;
+
+   return VK_SUCCESS;
+
+err_free_descriptor_set:
+   vk_object_free(&device->vk, &pool->alloc, set);
+
+   return result;
+}
+
+VkResult
+pvr_AllocateDescriptorSets(VkDevice _device,
+                           const VkDescriptorSetAllocateInfo *pAllocateInfo,
+                           VkDescriptorSet *pDescriptorSets)
+{
+   PVR_FROM_HANDLE(pvr_descriptor_pool, pool, pAllocateInfo->descriptorPool);
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   VkResult result;
+   uint32_t i;
+
+   vk_foreach_struct (ext, pAllocateInfo->pNext) {
+      pvr_debug_ignored_stype(ext->sType);
+   }
+
+   for (i = 0; i < pAllocateInfo->descriptorSetCount; i++) {
+      PVR_FROM_HANDLE(pvr_descriptor_set_layout,
+                      layout,
+                      pAllocateInfo->pSetLayouts[i]);
+      struct pvr_descriptor_set *set = NULL;
+
+      result = pvr_descriptor_set_create(device, pool, layout, &set);
+      if (result != VK_SUCCESS)
+         goto err_free_descriptor_sets;
+
+      pDescriptorSets[i] = pvr_descriptor_set_to_handle(set);
+   }
+
+   return VK_SUCCESS;
+
+err_free_descriptor_sets:
+   pvr_FreeDescriptorSets(_device,
+                          pAllocateInfo->descriptorPool,
+                          i,
+                          pDescriptorSets);
+
+   for (i = 0; i < pAllocateInfo->descriptorSetCount; i++)
+      pDescriptorSets[i] = VK_NULL_HANDLE;
+
+   return result;
+}
+
+VkResult pvr_FreeDescriptorSets(VkDevice _device,
+                                VkDescriptorPool descriptorPool,
+                                uint32_t count,
+                                const VkDescriptorSet *pDescriptorSets)
+{
+   PVR_FROM_HANDLE(pvr_descriptor_pool, pool, descriptorPool);
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+
+   for (uint32_t i = 0; i < count; i++) {
+      struct pvr_descriptor_set *set;
+
+      if (!pDescriptorSets[i])
+         continue;
+
+      set = pvr_descriptor_set_from_handle(pDescriptorSets[i]);
+      pvr_free_descriptor_set(device, pool, set);
+   }
+
+   return VK_SUCCESS;
+}
+
+static int pvr_compare_layout_binding(const void *a, const void *b)
+{
+   uint32_t binding_a;
+   uint32_t binding_b;
+
+   binding_a = ((struct pvr_descriptor_set_layout_binding *)a)->binding_number;
+   binding_b = ((struct pvr_descriptor_set_layout_binding *)b)->binding_number;
+
+   if (binding_a < binding_b)
+      return -1;
+
+   if (binding_a > binding_b)
+      return 1;
+
+   return 0;
+}
+
+/* This function does not assume that the binding will always exist for a
+ * particular binding_num. Caller should check before using the return pointer.
+ */
+static struct pvr_descriptor_set_layout_binding *
+pvr_get_descriptor_binding(const struct pvr_descriptor_set_layout *layout,
+                           const uint32_t binding_num)
+{
+   struct pvr_descriptor_set_layout_binding binding;
+   binding.binding_number = binding_num;
+
+   return bsearch(&binding,
+                  layout->bindings,
+                  layout->binding_count,
+                  sizeof(binding),
+                  pvr_compare_layout_binding);
+}
+
+static void
+pvr_descriptor_update_buffer_info(const struct pvr_device *device,
+                                  const VkWriteDescriptorSet *write_set,
+                                  struct pvr_descriptor_set *set,
+                                  uint32_t *mem_ptr,
+                                  uint32_t start_stage,
+                                  uint32_t end_stage)
+{
+   const struct pvr_descriptor_set_layout_binding *binding;
+   struct pvr_descriptor_size_info size_info;
+   bool is_dynamic;
+
+   binding = pvr_get_descriptor_binding(set->layout, write_set->dstBinding);
+   /* Binding should not be NULL. */
+   assert(binding);
+
+   is_dynamic = (binding->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC) ||
+                (binding->type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC);
+
+   pvr_descriptor_size_info_init(device, binding->type, &size_info);
+
+   /* Only need to update the buffer if it is actually being used. If it was
+    * not present in any stage, then the shader_stage_mask would be 0 and we
+    * can skip this update.
+    */
+   if (binding->shader_stage_mask == 0)
+      return;
+
+   for (uint32_t i = 0; i < write_set->descriptorCount; i++) {
+      const VkDescriptorBufferInfo *buffer_info = &write_set->pBufferInfo[i];
+      PVR_FROM_HANDLE(pvr_buffer, buffer, buffer_info->buffer);
+      const uint32_t desc_idx =
+         binding->descriptor_index + write_set->dstArrayElement + i;
+      uint64_t addr = buffer->dev_addr.addr + buffer_info->offset;
+      uint32_t range = (buffer_info->range == VK_WHOLE_SIZE)
+                          ? (buffer->size - buffer_info->offset)
+                          : (buffer_info->range);
+
+      set->descriptors[desc_idx].type = write_set->descriptorType;
+      set->descriptors[desc_idx].buffer_dev_addr.addr = addr;
+      set->descriptors[desc_idx].buffer_create_info_size = buffer->size;
+      set->descriptors[desc_idx].buffer_desc_range = range;
+
+      if (is_dynamic)
+         continue;
+
+      /* Update the entries in the descriptor memory for static buffer. */
+      for (uint32_t j = start_stage; j < end_stage; j++) {
+         uint32_t primary_offset;
+         uint32_t secondary_offset;
+
+         if (!(binding->shader_stage_mask & (1U << j)))
+            continue;
+
+         /* Offset calculation functions expect descriptor_index to be
+          * binding relative not layout relative, so we have used
+          * write_set->dstArrayElement + i rather than desc_idx.
+          */
+         primary_offset =
+            pvr_get_descriptor_primary_offset(device,
+                                              set->layout,
+                                              binding,
+                                              j,
+                                              write_set->dstArrayElement + i);
+         secondary_offset =
+            pvr_get_descriptor_secondary_offset(device,
+                                                set->layout,
+                                                binding,
+                                                j,
+                                                write_set->dstArrayElement + i);
+
+         memcpy(mem_ptr + primary_offset, &addr, size_info.primary << 2);
+         memcpy(mem_ptr + secondary_offset, &range, size_info.secondary << 2);
+      }
+   }
+}
+
+void pvr_UpdateDescriptorSets(VkDevice _device,
+                              uint32_t descriptorWriteCount,
+                              const VkWriteDescriptorSet *pDescriptorWrites,
+                              uint32_t descriptorCopyCount,
+                              const VkCopyDescriptorSet *pDescriptorCopies)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+
+   for (uint32_t i = 0; i < descriptorWriteCount; i++) {
+      const VkWriteDescriptorSet *write_set = &pDescriptorWrites[i];
+      PVR_FROM_HANDLE(pvr_descriptor_set, set, write_set->dstSet);
+      uint32_t *map = set->pvr_bo->bo->map;
+
+      vk_foreach_struct (ext, write_set->pNext) {
+         pvr_debug_ignored_stype(ext->sType);
+      }
+
+      switch (write_set->descriptorType) {
+      case VK_DESCRIPTOR_TYPE_SAMPLER:
+      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+      case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+      case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+         pvr_finishme("Update support missing for %d descriptor type\n",
+                      write_set->descriptorType);
+         break;
+
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+         pvr_descriptor_update_buffer_info(device,
+                                           write_set,
+                                           set,
+                                           map,
+                                           0,
+                                           PVR_STAGE_ALLOCATION_COUNT);
+         break;
+
+      default:
+         unreachable("Unknown descriptor type");
+         break;
+      }
+   }
+
+   if (descriptorCopyCount > 0)
+      pvr_finishme("Descriptor copying support missing\n");
+}
diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c
new file mode 100644 (file)
index 0000000..ece61f9
--- /dev/null
@@ -0,0 +1,2034 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * based in part on v3dv driver which is:
+ * Copyright © 2019 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <vulkan/vulkan.h>
+#include <xf86drm.h>
+
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_bo.h"
+#include "pvr_csb.h"
+#include "pvr_device_info.h"
+#include "pvr_job_render.h"
+#include "pvr_limits.h"
+#include "pvr_pds.h"
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+#include "rogue/rogue_compiler.h"
+#include "util/build_id.h"
+#include "util/log.h"
+#include "util/mesa-sha1.h"
+#include "util/os_misc.h"
+#include "util/u_math.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+#include "vk_util.h"
+
+#define PVR_GLOBAL_FREE_LIST_INITIAL_SIZE (2U * 1024U * 1024U)
+#define PVR_GLOBAL_FREE_LIST_MAX_SIZE (256U * 1024U * 1024U)
+#define PVR_GLOBAL_FREE_LIST_GROW_SIZE (1U * 1024U * 1024U)
+
+/* The grow threshold is a percentage. This is intended to be 12.5%, but has
+ * been rounded up since the percentage is treated as an integer.
+ */
+#define PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD 13U
+
+#if defined(VK_USE_PLATFORM_DISPLAY_KHR)
+#   define PVR_USE_WSI_PLATFORM
+#endif
+
+#define PVR_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
+
+static const struct vk_instance_extension_table pvr_instance_extensions = {
+#if defined(VK_USE_PLATFORM_DISPLAY_KHR)
+   .KHR_display = true,
+#endif
+   .KHR_external_memory_capabilities = true,
+   .KHR_get_physical_device_properties2 = true,
+#if defined(PVR_USE_WSI_PLATFORM)
+   .KHR_surface = true,
+#endif
+   .EXT_debug_report = true,
+   .EXT_debug_utils = true,
+};
+
+static void pvr_physical_device_get_supported_extensions(
+   const struct pvr_physical_device *pdevice,
+   struct vk_device_extension_table *extensions)
+{
+   /* clang-format off */
+   *extensions = (struct vk_device_extension_table){
+      .KHR_external_memory = true,
+      .KHR_external_memory_fd = true,
+#if defined(PVR_USE_WSI_PLATFORM)
+      .KHR_swapchain = true,
+#endif
+      .EXT_external_memory_dma_buf = true,
+      .EXT_private_data = true,
+   };
+   /* clang-format on */
+}
+
+VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion)
+{
+   *pApiVersion = PVR_API_VERSION;
+   return VK_SUCCESS;
+}
+
+VkResult
+pvr_EnumerateInstanceExtensionProperties(const char *pLayerName,
+                                         uint32_t *pPropertyCount,
+                                         VkExtensionProperties *pProperties)
+{
+   if (pLayerName)
+      return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
+
+   return vk_enumerate_instance_extension_properties(&pvr_instance_extensions,
+                                                     pPropertyCount,
+                                                     pProperties);
+}
+
+VkResult pvr_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
+                            const VkAllocationCallbacks *pAllocator,
+                            VkInstance *pInstance)
+{
+   struct vk_instance_dispatch_table dispatch_table;
+   struct pvr_instance *instance;
+   VkResult result;
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
+
+   if (!pAllocator)
+      pAllocator = vk_default_allocator();
+
+   instance = vk_alloc(pAllocator,
+                       sizeof(*instance),
+                       8,
+                       VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+   if (!instance)
+      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
+                                               &pvr_instance_entrypoints,
+                                               true);
+
+   vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
+                                               &wsi_instance_entrypoints,
+                                               false);
+
+   result = vk_instance_init(&instance->vk,
+                             &pvr_instance_extensions,
+                             &dispatch_table,
+                             pCreateInfo,
+                             pAllocator);
+   if (result != VK_SUCCESS) {
+      vk_free(pAllocator, instance);
+      return vk_error(NULL, result);
+   }
+
+   instance->physical_devices_count = -1;
+
+   VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
+
+   *pInstance = pvr_instance_to_handle(instance);
+
+   return VK_SUCCESS;
+}
+
+static void pvr_physical_device_finish(struct pvr_physical_device *pdevice)
+{
+   /* Be careful here. The device might not have been initialized. This can
+    * happen since initialization is done in vkEnumeratePhysicalDevices() but
+    * finish is done in vkDestroyInstance(). Make sure that you check for NULL
+    * before freeing or that the freeing functions accept NULL pointers.
+    */
+
+   if (pdevice->compiler)
+      rogue_compiler_destroy(pdevice->compiler);
+
+   pvr_wsi_finish(pdevice);
+
+   free(pdevice->name);
+
+   if (pdevice->ws)
+      pvr_winsys_destroy(pdevice->ws);
+
+   if (pdevice->master_fd >= 0) {
+      vk_free(&pdevice->vk.instance->alloc, pdevice->master_path);
+      close(pdevice->master_fd);
+   }
+
+   if (pdevice->render_fd >= 0) {
+      vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
+      close(pdevice->render_fd);
+   }
+   vk_physical_device_finish(&pdevice->vk);
+}
+
+void pvr_DestroyInstance(VkInstance _instance,
+                         const VkAllocationCallbacks *pAllocator)
+{
+   PVR_FROM_HANDLE(pvr_instance, instance, _instance);
+
+   if (!instance)
+      return;
+
+   pvr_physical_device_finish(&instance->physical_device);
+
+   VG(VALGRIND_DESTROY_MEMPOOL(instance));
+
+   vk_instance_finish(&instance->vk);
+   vk_free(&instance->vk.alloc, instance);
+}
+
+static VkResult
+pvr_physical_device_init_uuids(struct pvr_physical_device *pdevice)
+{
+   struct mesa_sha1 sha1_ctx;
+   unsigned build_id_len;
+   uint8_t sha1[20];
+   uint64_t bvnc;
+
+   const struct build_id_note *note =
+      build_id_find_nhdr_for_addr(pvr_physical_device_init_uuids);
+   if (!note) {
+      return vk_errorf(pdevice,
+                       VK_ERROR_INITIALIZATION_FAILED,
+                       "Failed to find build-id");
+   }
+
+   build_id_len = build_id_length(note);
+   if (build_id_len < 20) {
+      return vk_errorf(pdevice,
+                       VK_ERROR_INITIALIZATION_FAILED,
+                       "Build-id too short. It needs to be a SHA");
+   }
+
+   bvnc = pvr_get_packed_bvnc(&pdevice->dev_info);
+
+   _mesa_sha1_init(&sha1_ctx);
+   _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
+   _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc));
+   _mesa_sha1_final(&sha1_ctx, sha1);
+   memcpy(pdevice->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
+
+   return VK_SUCCESS;
+}
+
+static uint64_t pvr_compute_heap_size(void)
+{
+   /* Query the total ram from the system */
+   uint64_t total_ram;
+   if (!os_get_total_physical_memory(&total_ram))
+      return 0;
+
+   /* We don't want to burn too much ram with the GPU. If the user has 4GiB
+    * or less, we use at most half. If they have more than 4GiB, we use 3/4.
+    */
+   uint64_t available_ram;
+   if (total_ram <= 4ULL * 1024ULL * 1024ULL * 1024ULL)
+      available_ram = total_ram / 2U;
+   else
+      available_ram = total_ram * 3U / 4U;
+
+   return available_ram;
+}
+
+static VkResult pvr_physical_device_init(struct pvr_physical_device *pdevice,
+                                         struct pvr_instance *instance,
+                                         drmDevicePtr drm_render_device,
+                                         drmDevicePtr drm_primary_device)
+{
+   const char *path = drm_render_device->nodes[DRM_NODE_RENDER];
+   struct vk_device_extension_table supported_extensions;
+   struct vk_physical_device_dispatch_table dispatch_table;
+   const char *primary_path;
+   VkResult result;
+   int ret;
+
+   if (!getenv("PVR_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
+      return vk_errorf(instance,
+                       VK_ERROR_INCOMPATIBLE_DRIVER,
+                       "WARNING: powervr is not a conformant Vulkan "
+                       "implementation. Pass "
+                       "PVR_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know "
+                       "what you're doing.");
+   }
+
+   pvr_physical_device_get_supported_extensions(pdevice, &supported_extensions);
+
+   vk_physical_device_dispatch_table_from_entrypoints(
+      &dispatch_table,
+      &pvr_physical_device_entrypoints,
+      true);
+
+   vk_physical_device_dispatch_table_from_entrypoints(
+      &dispatch_table,
+      &wsi_physical_device_entrypoints,
+      false);
+
+   result = vk_physical_device_init(&pdevice->vk,
+                                    &instance->vk,
+                                    &supported_extensions,
+                                    &dispatch_table);
+   if (result != VK_SUCCESS)
+      return result;
+
+   pdevice->instance = instance;
+
+   pdevice->render_fd = open(path, O_RDWR | O_CLOEXEC);
+   if (pdevice->render_fd < 0) {
+      result = vk_errorf(instance,
+                         VK_ERROR_INCOMPATIBLE_DRIVER,
+                         "Failed to open device %s",
+                         path);
+      goto err_vk_physical_device_finish;
+   }
+
+   pdevice->render_path = vk_strdup(&pdevice->vk.instance->alloc,
+                                    path,
+                                    VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+   if (!pdevice->render_path) {
+      result = VK_ERROR_OUT_OF_HOST_MEMORY;
+      goto err_close_render_fd;
+   }
+
+   if (instance->vk.enabled_extensions.KHR_display) {
+      primary_path = drm_primary_device->nodes[DRM_NODE_PRIMARY];
+
+      pdevice->master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
+   } else {
+      pdevice->master_fd = -1;
+   }
+
+   if (pdevice->master_fd >= 0) {
+      pdevice->master_path = vk_strdup(&pdevice->vk.instance->alloc,
+                                       primary_path,
+                                       VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+      if (!pdevice->master_path) {
+         result = VK_ERROR_OUT_OF_HOST_MEMORY;
+         goto err_close_master_fd;
+      }
+   } else {
+      pdevice->master_path = NULL;
+   }
+
+   pdevice->ws = pvr_winsys_create(pdevice->master_fd,
+                                   pdevice->render_fd,
+                                   &pdevice->vk.instance->alloc);
+   if (!pdevice->ws) {
+      result = VK_ERROR_INITIALIZATION_FAILED;
+      goto err_vk_free_master_path;
+   }
+
+   ret = pdevice->ws->ops->device_info_init(pdevice->ws, &pdevice->dev_info);
+   if (ret) {
+      result = VK_ERROR_INITIALIZATION_FAILED;
+      goto err_pvr_winsys_destroy;
+   }
+
+   result = pvr_physical_device_init_uuids(pdevice);
+   if (result != VK_SUCCESS)
+      goto err_pvr_winsys_destroy;
+
+   if (asprintf(&pdevice->name,
+                "Imagination PowerVR %s %s",
+                pdevice->dev_info.ident.series_name,
+                pdevice->dev_info.ident.public_name) < 0) {
+      result = vk_errorf(instance,
+                         VK_ERROR_OUT_OF_HOST_MEMORY,
+                         "Unable to allocate memory to store device name");
+      goto err_pvr_winsys_destroy;
+   }
+
+   /* Setup available memory heaps and types */
+   pdevice->memory.memoryHeapCount = 1;
+   pdevice->memory.memoryHeaps[0].size = pvr_compute_heap_size();
+   pdevice->memory.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
+
+   pdevice->memory.memoryTypeCount = 1;
+   pdevice->memory.memoryTypes[0].propertyFlags =
+      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
+      VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
+      VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
+   pdevice->memory.memoryTypes[0].heapIndex = 0;
+
+   result = pvr_wsi_init(pdevice);
+   if (result != VK_SUCCESS) {
+      vk_error(instance, result);
+      goto err_free_name;
+   }
+
+   pdevice->compiler = rogue_compiler_create(&pdevice->dev_info);
+   if (!pdevice->compiler) {
+      result = vk_errorf(instance,
+                         VK_ERROR_INITIALIZATION_FAILED,
+                         "Failed to initialize Rogue compiler");
+      goto err_wsi_finish;
+   }
+
+   return VK_SUCCESS;
+
+err_wsi_finish:
+   pvr_wsi_finish(pdevice);
+
+err_free_name:
+   free(pdevice->name);
+
+err_pvr_winsys_destroy:
+   pvr_winsys_destroy(pdevice->ws);
+
+err_vk_free_master_path:
+   vk_free(&pdevice->vk.instance->alloc, pdevice->master_path);
+
+err_close_master_fd:
+   if (pdevice->master_fd >= 0)
+      close(pdevice->master_fd);
+
+   vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
+
+err_close_render_fd:
+   close(pdevice->render_fd);
+
+err_vk_physical_device_finish:
+   vk_physical_device_finish(&pdevice->vk);
+
+   return result;
+}
+
+static VkResult pvr_enumerate_devices(struct pvr_instance *instance)
+{
+   /* FIXME: It should be possible to query the number of devices via
+    * drmGetDevices2 by passing in NULL for the 'devices' parameter. However,
+    * this was broken by libdrm commit
+    * 8cb12a2528d795c45bba5f03b3486b4040fb0f45, so, until this is fixed in
+    * upstream, hard-code the maximum number of devices.
+    */
+   drmDevicePtr drm_primary_device = NULL;
+   drmDevicePtr drm_render_device = NULL;
+   drmDevicePtr drm_devices[8];
+   int max_drm_devices;
+   VkResult result;
+
+   instance->physical_devices_count = 0;
+
+   max_drm_devices = drmGetDevices2(0, drm_devices, ARRAY_SIZE(drm_devices));
+   if (max_drm_devices < 1)
+      return VK_SUCCESS;
+
+   for (unsigned i = 0; i < (unsigned)max_drm_devices; i++) {
+      if (drm_devices[i]->bustype != DRM_BUS_PLATFORM)
+         continue;
+
+      if (drm_devices[i]->available_nodes & (1 << DRM_NODE_RENDER)) {
+         char **compat;
+
+         compat = drm_devices[i]->deviceinfo.platform->compatible;
+         while (*compat) {
+            if (strncmp(*compat, "mediatek,mt8173-gpu", 19) == 0) {
+               drm_render_device = drm_devices[i];
+
+               mesa_logd("Found compatible render device '%s'.",
+                         drm_render_device->nodes[DRM_NODE_RENDER]);
+               break;
+            }
+            compat++;
+         }
+      } else if (drm_devices[i]->available_nodes & 1 << DRM_NODE_PRIMARY) {
+         char **compat;
+
+         compat = drm_devices[i]->deviceinfo.platform->compatible;
+         while (*compat) {
+            if (strncmp(*compat, "mediatek-drm", 12) == 0) {
+               drm_primary_device = drm_devices[i];
+
+               mesa_logd("Found compatible primary device '%s'.",
+                         drm_primary_device->nodes[DRM_NODE_PRIMARY]);
+               break;
+            }
+            compat++;
+         }
+      }
+   }
+
+   if (drm_render_device && drm_primary_device) {
+      result = pvr_physical_device_init(&instance->physical_device,
+                                        instance,
+                                        drm_render_device,
+                                        drm_primary_device);
+      if (result == VK_SUCCESS)
+         instance->physical_devices_count = 1;
+      else if (result == VK_ERROR_INCOMPATIBLE_DRIVER)
+         result = VK_SUCCESS;
+   } else {
+      result = VK_SUCCESS;
+   }
+
+   drmFreeDevices(drm_devices, max_drm_devices);
+
+   return result;
+}
+
+VkResult pvr_EnumeratePhysicalDevices(VkInstance _instance,
+                                      uint32_t *pPhysicalDeviceCount,
+                                      VkPhysicalDevice *pPhysicalDevices)
+{
+   PVR_FROM_HANDLE(pvr_instance, instance, _instance);
+   VK_OUTARRAY_MAKE(out, pPhysicalDevices, pPhysicalDeviceCount);
+   VkResult result;
+
+   if (instance->physical_devices_count < 0) {
+      result = pvr_enumerate_devices(instance);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
+   if (instance->physical_devices_count == 0)
+      return VK_SUCCESS;
+
+   assert(instance->physical_devices_count == 1);
+   vk_outarray_append (&out, p) {
+      *p = pvr_physical_device_to_handle(&instance->physical_device);
+   }
+
+   return vk_outarray_status(&out);
+}
+
+void pvr_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
+                                    VkPhysicalDeviceFeatures2 *pFeatures)
+{
+   PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
+
+   pFeatures->features = (VkPhysicalDeviceFeatures){
+      .robustBufferAccess =
+         PVR_HAS_FEATURE(&pdevice->dev_info, robust_buffer_access),
+      .fullDrawIndexUint32 = true,
+      .imageCubeArray = true,
+      .independentBlend = true,
+      .geometryShader = false,
+      .tessellationShader = false,
+      .sampleRateShading = true,
+      .dualSrcBlend = false,
+      .logicOp = true,
+      .multiDrawIndirect = true,
+      .drawIndirectFirstInstance = true,
+      .depthClamp = true,
+      .depthBiasClamp = true,
+      .fillModeNonSolid = false,
+      .depthBounds = false,
+      .wideLines = true,
+      .largePoints = true,
+      .alphaToOne = true,
+      .multiViewport = false,
+      .samplerAnisotropy = true,
+      .textureCompressionETC2 = true,
+      .textureCompressionASTC_LDR = PVR_HAS_FEATURE(&pdevice->dev_info, astc),
+      .textureCompressionBC = false,
+      .occlusionQueryPrecise = true,
+      .pipelineStatisticsQuery = false,
+      .vertexPipelineStoresAndAtomics = true,
+      .fragmentStoresAndAtomics = true,
+      .shaderTessellationAndGeometryPointSize = false,
+      .shaderImageGatherExtended = false,
+      .shaderStorageImageExtendedFormats = true,
+      .shaderStorageImageMultisample = false,
+      .shaderStorageImageReadWithoutFormat = true,
+      .shaderStorageImageWriteWithoutFormat = false,
+      .shaderUniformBufferArrayDynamicIndexing = true,
+      .shaderSampledImageArrayDynamicIndexing = true,
+      .shaderStorageBufferArrayDynamicIndexing = true,
+      .shaderStorageImageArrayDynamicIndexing = true,
+      .shaderClipDistance = true,
+      .shaderCullDistance = true,
+      .shaderFloat64 = false,
+      .shaderInt64 = true,
+      .shaderInt16 = true,
+      .shaderResourceResidency = false,
+      .shaderResourceMinLod = false,
+      .sparseBinding = false,
+      .sparseResidencyBuffer = false,
+      .sparseResidencyImage2D = false,
+      .sparseResidencyImage3D = false,
+      .sparseResidency2Samples = false,
+      .sparseResidency4Samples = false,
+      .sparseResidency8Samples = false,
+      .sparseResidency16Samples = false,
+      .sparseResidencyAliased = false,
+      .variableMultisampleRate = false,
+      .inheritedQueries = false,
+   };
+
+   vk_foreach_struct (ext, pFeatures->pNext) {
+      pvr_debug_ignored_stype(ext->sType);
+   }
+}
+
+/* clang-format off */
+/* FIXME: Clang-format places multiple initializers on the same line, fix this
+ * and remove clang-format on/off comments.
+ */
+static const struct pvr_descriptor_limits bvnc_4_V_2_51_descriptor_limits = {
+   .max_per_stage_resources = 456U,
+   .max_per_stage_samplers = 64U,
+   .max_per_stage_uniform_buffers = 96U,
+   .max_per_stage_storage_buffers = 96U,
+   .max_per_stage_sampled_images = 128U,
+   .max_per_stage_storage_images = 64U,
+   .max_per_stage_input_attachments = 8U,
+};
+/* clang-format on */
+
+static const struct pvr_descriptor_limits *
+pvr_get_physical_device_descriptor_limits(struct pvr_physical_device *pdevice)
+{
+   /* Series 6XT - GX6x50 - Clyde */
+   if (pdevice->dev_info.ident.b == 4 && pdevice->dev_info.ident.n == 2)
+      return &bvnc_4_V_2_51_descriptor_limits;
+
+   vk_errorf(pdevice,
+             VK_ERROR_INCOMPATIBLE_DRIVER,
+             "No device ID found for BVNC %d.%d.%d.%d",
+             pdevice->dev_info.ident.b,
+             pdevice->dev_info.ident.v,
+             pdevice->dev_info.ident.n,
+             pdevice->dev_info.ident.c);
+
+   assert(false);
+
+   return NULL;
+}
+
+void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
+                                      VkPhysicalDeviceProperties2 *pProperties)
+{
+   PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
+   const struct pvr_descriptor_limits *descriptor_limits =
+      pvr_get_physical_device_descriptor_limits(pdevice);
+
+   /* Default value based on the minimum value found in all existing cores. */
+   const uint32_t max_multisample =
+      PVR_GET_FEATURE_VALUE(&pdevice->dev_info, max_multisample, 4);
+
+   /* Default value based on the minimum value found in all existing cores. */
+   const uint32_t uvs_banks =
+      PVR_GET_FEATURE_VALUE(&pdevice->dev_info, uvs_banks, 2);
+
+   /* Default value based on the minimum value found in all existing cores. */
+   const uint32_t uvs_pba_entries =
+      PVR_GET_FEATURE_VALUE(&pdevice->dev_info, uvs_pba_entries, 160);
+
+   /* Default value based on the minimum value found in all existing cores. */
+   const uint32_t num_user_clip_planes =
+      PVR_GET_FEATURE_VALUE(&pdevice->dev_info, num_user_clip_planes, 8);
+
+   const uint32_t sub_pixel_precision =
+      PVR_HAS_FEATURE(&pdevice->dev_info, simple_internal_parameter_format)
+         ? 4U
+         : 8U;
+
+   const uint32_t max_render_size =
+      rogue_get_render_size_max(&pdevice->dev_info);
+
+   const uint32_t max_sample_bits = ((max_multisample << 1) - 1);
+
+   const uint32_t max_user_vertex_components =
+      ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
+
+   VkPhysicalDeviceLimits limits = {
+      .maxImageDimension1D = max_render_size,
+      .maxImageDimension2D = max_render_size,
+      .maxImageDimension3D = 2U * 1024U,
+      .maxImageDimensionCube = max_render_size,
+      .maxImageArrayLayers = 2U * 1024U,
+      .maxTexelBufferElements = 64U * 1024U,
+      .maxUniformBufferRange = 128U * 1024U * 1024U,
+      .maxStorageBufferRange = 128U * 1024U * 1024U,
+      .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE,
+      .maxMemoryAllocationCount = UINT32_MAX,
+      .maxSamplerAllocationCount = UINT32_MAX,
+      .bufferImageGranularity = 1U,
+      .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL,
+
+      /* Maximum number of descriptor sets that can be bound at the same time.
+       */
+      .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS,
+
+      .maxPerStageResources = descriptor_limits->max_per_stage_resources,
+      .maxPerStageDescriptorSamplers =
+         descriptor_limits->max_per_stage_samplers,
+      .maxPerStageDescriptorUniformBuffers =
+         descriptor_limits->max_per_stage_uniform_buffers,
+      .maxPerStageDescriptorStorageBuffers =
+         descriptor_limits->max_per_stage_storage_buffers,
+      .maxPerStageDescriptorSampledImages =
+         descriptor_limits->max_per_stage_sampled_images,
+      .maxPerStageDescriptorStorageImages =
+         descriptor_limits->max_per_stage_storage_images,
+      .maxPerStageDescriptorInputAttachments =
+         descriptor_limits->max_per_stage_input_attachments,
+
+      .maxDescriptorSetSamplers = 256U,
+      .maxDescriptorSetUniformBuffers = 256U,
+      .maxDescriptorSetUniformBuffersDynamic = 8U,
+      .maxDescriptorSetStorageBuffers = 256U,
+      .maxDescriptorSetStorageBuffersDynamic = 8U,
+      .maxDescriptorSetSampledImages = 256U,
+      .maxDescriptorSetStorageImages = 256U,
+      .maxDescriptorSetInputAttachments = 256U,
+
+      /* Vertex Shader Limits */
+      .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS,
+      .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS,
+      .maxVertexInputAttributeOffset = 0xFFFF,
+      .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U,
+      .maxVertexOutputComponents = max_user_vertex_components,
+
+      /* Tessellation Limits */
+      .maxTessellationGenerationLevel = 0,
+      .maxTessellationPatchSize = 0,
+      .maxTessellationControlPerVertexInputComponents = 0,
+      .maxTessellationControlPerVertexOutputComponents = 0,
+      .maxTessellationControlPerPatchOutputComponents = 0,
+      .maxTessellationControlTotalOutputComponents = 0,
+      .maxTessellationEvaluationInputComponents = 0,
+      .maxTessellationEvaluationOutputComponents = 0,
+
+      /* Geometry Shader Limits */
+      .maxGeometryShaderInvocations = 32U,
+      .maxGeometryInputComponents = max_user_vertex_components,
+      .maxGeometryOutputComponents = max_user_vertex_components,
+      .maxGeometryOutputVertices = 256U,
+      .maxGeometryTotalOutputComponents = 1024U,
+
+      /* Fragment Shader Limits */
+      .maxFragmentInputComponents = max_user_vertex_components,
+      .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
+      .maxFragmentDualSrcAttachments = 0,
+      .maxFragmentCombinedOutputResources = 8U,
+
+      /* Compute Shader Limits */
+      .maxComputeSharedMemorySize = 16U * 1024U,
+      .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
+      .maxComputeWorkGroupInvocations = 512U,
+      .maxComputeWorkGroupSize = { 512U, 512U, 64U },
+
+      /* Rasterization Limits */
+      .subPixelPrecisionBits = sub_pixel_precision,
+      .subTexelPrecisionBits = 8U,
+      .mipmapPrecisionBits = 4U,
+
+      .maxDrawIndexedIndexValue = UINT32_MAX,
+      .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
+      .maxSamplerLodBias = 15.0f,
+      .maxSamplerAnisotropy = 16.0f,
+      .maxViewports = PVR_MAX_VIEWPORTS,
+
+      .maxViewportDimensions[0] = max_render_size,
+      .maxViewportDimensions[1] = max_render_size,
+      .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size),
+      .viewportBoundsRange[1] = 2U * max_render_size,
+
+      .viewportSubPixelBits = 0,
+      .minMemoryMapAlignment = 64U,
+      .minTexelBufferOffsetAlignment = 16U,
+      .minUniformBufferOffsetAlignment = 4U,
+      .minStorageBufferOffsetAlignment = 4U,
+
+      .minTexelOffset = -8,
+      .maxTexelOffset = 7U,
+      .minTexelGatherOffset = 0,
+      .maxTexelGatherOffset = 0,
+      .minInterpolationOffset = -0.5,
+      .maxInterpolationOffset = 0.5,
+      .subPixelInterpolationOffsetBits = 4U,
+
+      .maxFramebufferWidth = max_render_size,
+      .maxFramebufferHeight = max_render_size,
+      .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS,
+
+      .framebufferColorSampleCounts = max_sample_bits,
+      .framebufferDepthSampleCounts = max_sample_bits,
+      .framebufferStencilSampleCounts = max_sample_bits,
+      .framebufferNoAttachmentsSampleCounts = max_sample_bits,
+      .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS,
+      .sampledImageColorSampleCounts = max_sample_bits,
+      .sampledImageIntegerSampleCounts = max_sample_bits,
+      .sampledImageDepthSampleCounts = max_sample_bits,
+      .sampledImageStencilSampleCounts = max_sample_bits,
+      .storageImageSampleCounts = max_sample_bits,
+      .maxSampleMaskWords = 1U,
+      .timestampComputeAndGraphics = false,
+      .timestampPeriod = 0.0f,
+      .maxClipDistances = num_user_clip_planes,
+      .maxCullDistances = num_user_clip_planes,
+      .maxCombinedClipAndCullDistances = num_user_clip_planes,
+      .discreteQueuePriorities = 2U,
+      .pointSizeRange[0] = 1.0f,
+      .pointSizeRange[1] = 511.0f,
+      .pointSizeGranularity = 0.0625f,
+      .lineWidthRange[0] = 1.0f / 16.0f,
+      .lineWidthRange[1] = 16.0f,
+      .lineWidthGranularity = 1.0f / 16.0f,
+      .strictLines = false,
+      .standardSampleLocations = true,
+      .optimalBufferCopyOffsetAlignment = 4U,
+      .optimalBufferCopyRowPitchAlignment = 4U,
+      .nonCoherentAtomSize = 1U,
+   };
+
+   pProperties->properties = (VkPhysicalDeviceProperties){
+      .apiVersion = PVR_API_VERSION,
+      .driverVersion = vk_get_driver_version(),
+      .vendorID = VK_VENDOR_ID_IMAGINATION,
+      .deviceID = pdevice->dev_info.ident.device_id,
+      .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
+      .limits = limits,
+      .sparseProperties = { 0 },
+   };
+
+   snprintf(pProperties->properties.deviceName,
+            sizeof(pProperties->properties.deviceName),
+            "%s",
+            pdevice->name);
+
+   memcpy(pProperties->properties.pipelineCacheUUID,
+          pdevice->pipeline_cache_uuid,
+          VK_UUID_SIZE);
+
+   vk_foreach_struct (ext, pProperties->pNext) {
+      pvr_debug_ignored_stype(ext->sType);
+   }
+}
+
+const static VkQueueFamilyProperties pvr_queue_family_properties = {
+   .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT |
+                 VK_QUEUE_TRANSFER_BIT,
+   .queueCount = PVR_MAX_QUEUES,
+   .timestampValidBits = 0,
+   .minImageTransferGranularity = { 1, 1, 1 },
+};
+
+void pvr_GetPhysicalDeviceQueueFamilyProperties(
+   VkPhysicalDevice physicalDevice,
+   uint32_t *pCount,
+   VkQueueFamilyProperties *pQueueFamilyProperties)
+{
+   VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pCount);
+
+   vk_outarray_append (&out, p) {
+      *p = pvr_queue_family_properties;
+   }
+}
+
+void pvr_GetPhysicalDeviceQueueFamilyProperties2(
+   VkPhysicalDevice physicalDevice,
+   uint32_t *pQueueFamilyPropertyCount,
+   VkQueueFamilyProperties2 *pQueueFamilyProperties)
+{
+   VK_OUTARRAY_MAKE(out, pQueueFamilyProperties, pQueueFamilyPropertyCount);
+
+   vk_outarray_append (&out, p) {
+      p->queueFamilyProperties = pvr_queue_family_properties;
+
+      vk_foreach_struct (ext, p->pNext) {
+         pvr_debug_ignored_stype(ext->sType);
+      }
+   }
+}
+
+void pvr_GetPhysicalDeviceMemoryProperties2(
+   VkPhysicalDevice physicalDevice,
+   VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
+{
+   PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
+
+   pMemoryProperties->memoryProperties = pdevice->memory;
+
+   vk_foreach_struct (ext, pMemoryProperties->pNext) {
+      pvr_debug_ignored_stype(ext->sType);
+   }
+}
+
+PFN_vkVoidFunction pvr_GetInstanceProcAddr(VkInstance _instance,
+                                           const char *pName)
+{
+   PVR_FROM_HANDLE(pvr_instance, instance, _instance);
+   return vk_instance_get_proc_addr(&instance->vk,
+                                    &pvr_instance_entrypoints,
+                                    pName);
+}
+
+/* With version 1+ of the loader interface the ICD should expose
+ * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in
+ * apps.
+ */
+PUBLIC
+VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
+{
+   return pvr_GetInstanceProcAddr(instance, pName);
+}
+
+/* With version 4+ of the loader interface the ICD should expose
+ * vk_icdGetPhysicalDeviceProcAddr().
+ */
+PUBLIC
+VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
+vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName)
+{
+   PVR_FROM_HANDLE(pvr_instance, instance, _instance);
+   return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
+}
+
+static VkResult pvr_device_init_compute_pds_program(struct pvr_device *device)
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+   struct pvr_pds_compute_shader_program program = { 0U };
+   size_t staging_buffer_size;
+   uint32_t *staging_buffer;
+   uint32_t *data_buffer;
+   uint32_t *code_buffer;
+   VkResult result;
+
+   STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) ==
+                 ARRAY_SIZE(program.work_group_input_regs));
+   STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) ==
+                 ARRAY_SIZE(program.global_input_regs));
+
+   /* Initialize PDS structure. */
+   for (uint32_t i = 0U; i < ARRAY_SIZE(program.local_input_regs); i++) {
+      program.local_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
+      program.work_group_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
+      program.global_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
+   }
+
+   program.barrier_coefficient = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
+
+   /* Fence kernel. */
+   program.fence = true;
+   program.clear_pds_barrier = true;
+
+   /* Calculate how much space we'll need for the compute shader PDS program.
+    */
+   pvr_pds_set_sizes_compute_shader(&program, dev_info);
+
+   /* FIXME: Fix the below inconsistency of code size being in bytes whereas
+    * data size being in dwords.
+    */
+   /* Code size is in bytes, data size in dwords. */
+   staging_buffer_size =
+      program.data_size * sizeof(uint32_t) + program.code_size;
+
+   staging_buffer = vk_alloc(&device->vk.alloc,
+                             staging_buffer_size,
+                             8U,
+                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!staging_buffer)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   data_buffer = staging_buffer;
+   code_buffer = pvr_pds_generate_compute_shader_data_segment(&program,
+                                                              data_buffer,
+                                                              dev_info);
+   pvr_pds_generate_compute_shader_code_segment(&program,
+                                                code_buffer,
+                                                dev_info);
+   result = pvr_gpu_upload_pds(device,
+                               data_buffer,
+                               program.data_size,
+                               PVRX(CDMCTRL_KERNEL1_DATA_ADDR_ALIGNMENT),
+                               code_buffer,
+                               program.code_size / sizeof(uint32_t),
+                               PVRX(CDMCTRL_KERNEL2_CODE_ADDR_ALIGNMENT),
+                               cache_line_size,
+                               &device->pds_compute_fence_program);
+
+   vk_free(&device->vk.alloc, staging_buffer);
+
+   return result;
+}
+
+/* FIXME: We should be calculating the size when we upload the code in
+ * pvr_srv_setup_static_pixel_event_program().
+ */
+static void pvr_device_get_pixel_event_pds_program_data_size(
+   uint32_t *const data_size_in_dwords_out)
+{
+   struct pvr_pds_event_program program = {
+      /* No data to DMA, just a DOUTU needed. */
+      .num_emit_word_pairs = 0,
+   };
+
+   pvr_pds_set_sizes_pixel_event(&program);
+
+   *data_size_in_dwords_out = program.data_size;
+}
+
+VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice,
+                          const VkDeviceCreateInfo *pCreateInfo,
+                          const VkAllocationCallbacks *pAllocator,
+                          VkDevice *pDevice)
+{
+   PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
+   struct pvr_instance *instance = pdevice->instance;
+   struct vk_device_dispatch_table dispatch_table;
+   struct pvr_device *device;
+   VkResult result;
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
+
+   device = vk_alloc2(&pdevice->vk.instance->alloc,
+                      pAllocator,
+                      sizeof(*device),
+                      8,
+                      VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!device)
+      return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_device_dispatch_table_from_entrypoints(&dispatch_table,
+                                             &pvr_device_entrypoints,
+                                             true);
+
+   vk_device_dispatch_table_from_entrypoints(&dispatch_table,
+                                             &wsi_device_entrypoints,
+                                             false);
+
+   result = vk_device_init(&device->vk,
+                           &pdevice->vk,
+                           &dispatch_table,
+                           pCreateInfo,
+                           pAllocator);
+   if (result != VK_SUCCESS)
+      goto err_free_device;
+
+   device->render_fd = open(pdevice->render_path, O_RDWR | O_CLOEXEC);
+   if (device->render_fd < 0) {
+      result = vk_errorf(instance,
+                         VK_ERROR_INITIALIZATION_FAILED,
+                         "Failed to open device %s",
+                         pdevice->render_path);
+      goto err_vk_device_finish;
+   }
+
+   if (pdevice->master_path)
+      device->master_fd = open(pdevice->master_path, O_RDWR | O_CLOEXEC);
+   else
+      device->master_fd = -1;
+
+   device->instance = instance;
+   device->pdevice = pdevice;
+
+   if (pAllocator)
+      device->vk.alloc = *pAllocator;
+   else
+      device->vk.alloc = pdevice->vk.instance->alloc;
+
+   device->ws = pvr_winsys_create(device->master_fd,
+                                  device->render_fd,
+                                  &device->vk.alloc);
+   if (!device->ws) {
+      result = VK_ERROR_INITIALIZATION_FAILED;
+      goto err_close_master_fd;
+   }
+
+   device->ws->ops->get_heaps_info(device->ws, &device->heaps);
+
+   result = pvr_free_list_create(device,
+                                 PVR_GLOBAL_FREE_LIST_INITIAL_SIZE,
+                                 PVR_GLOBAL_FREE_LIST_MAX_SIZE,
+                                 PVR_GLOBAL_FREE_LIST_GROW_SIZE,
+                                 PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD,
+                                 NULL /* parent_free_list */,
+                                 &device->global_free_list);
+   if (result != VK_SUCCESS)
+      goto err_pvr_winsys_destroy;
+
+   result = pvr_queues_create(device, pCreateInfo);
+   if (result != VK_SUCCESS)
+      goto err_pvr_free_list_destroy;
+
+   result = pvr_device_init_compute_pds_program(device);
+   if (result != VK_SUCCESS)
+      goto err_pvr_queues_destroy;
+
+   if (pCreateInfo->pEnabledFeatures)
+      memcpy(&device->features,
+             pCreateInfo->pEnabledFeatures,
+             sizeof(device->features));
+
+   /* FIXME: Move this to a later stage and possibly somewhere other than
+    * pvr_device. The purpose of this is so that we don't have to get the size
+    * on each kick.
+    */
+   pvr_device_get_pixel_event_pds_program_data_size(
+      &device->pixel_event_data_size_in_dwords);
+
+   device->global_queue_job_count = 0;
+   device->global_queue_present_count = 0;
+
+   *pDevice = pvr_device_to_handle(device);
+
+   return VK_SUCCESS;
+
+err_pvr_queues_destroy:
+   pvr_queues_destroy(device);
+
+err_pvr_free_list_destroy:
+   pvr_free_list_destroy(device->global_free_list);
+
+err_pvr_winsys_destroy:
+   pvr_winsys_destroy(device->ws);
+
+err_close_master_fd:
+   if (device->master_fd >= 0)
+      close(device->master_fd);
+
+   close(device->render_fd);
+
+err_vk_device_finish:
+   vk_device_finish(&device->vk);
+
+err_free_device:
+   vk_free(&device->vk.alloc, device);
+
+   return result;
+}
+
+void pvr_DestroyDevice(VkDevice _device,
+                       const VkAllocationCallbacks *pAllocator)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+
+   pvr_bo_free(device, device->pds_compute_fence_program.pvr_bo);
+   pvr_queues_destroy(device);
+   pvr_free_list_destroy(device->global_free_list);
+   pvr_winsys_destroy(device->ws);
+   close(device->render_fd);
+   vk_device_finish(&device->vk);
+   vk_free(&device->vk.alloc, device);
+}
+
+VkResult pvr_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
+                                              VkLayerProperties *pProperties)
+{
+   if (!pProperties) {
+      *pPropertyCount = 0;
+      return VK_SUCCESS;
+   }
+
+   return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
+}
+
+VkResult pvr_AllocateMemory(VkDevice _device,
+                            const VkMemoryAllocateInfo *pAllocateInfo,
+                            const VkAllocationCallbacks *pAllocator,
+                            VkDeviceMemory *pMem)
+{
+   const VkImportMemoryFdInfoKHR *fd_info = NULL;
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   enum pvr_winsys_bo_type type = PVR_WINSYS_BO_TYPE_GPU;
+   struct pvr_device_memory *mem;
+   VkResult result;
+
+   assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
+   assert(pAllocateInfo->allocationSize > 0);
+
+   mem = vk_object_alloc(&device->vk,
+                         pAllocator,
+                         sizeof(*mem),
+                         VK_OBJECT_TYPE_DEVICE_MEMORY);
+   if (!mem)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_foreach_struct_const (ext, pAllocateInfo->pNext) {
+      switch ((unsigned)ext->sType) {
+      case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
+         type = PVR_WINSYS_BO_TYPE_DISPLAY;
+         break;
+      case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
+         fd_info = (void *)ext;
+         break;
+      default:
+         pvr_debug_ignored_stype(ext->sType);
+         break;
+      }
+   }
+
+   if (fd_info && fd_info->handleType) {
+      VkDeviceSize aligned_alloc_size =
+         ALIGN_POT(pAllocateInfo->allocationSize, device->ws->page_size);
+
+      assert(
+         fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
+         fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
+
+      result = device->ws->ops->buffer_create_from_fd(device->ws,
+                                                      fd_info->fd,
+                                                      &mem->bo);
+      if (result != VK_SUCCESS)
+         goto err_vk_object_free_mem;
+
+      /* For security purposes, we reject importing the bo if it's smaller
+       * than the requested allocation size. This prevents a malicious client
+       * from passing a buffer to a trusted client, lying about the size, and
+       * telling the trusted client to try and texture from an image that goes
+       * out-of-bounds. This sort of thing could lead to GPU hangs or worse
+       * in the trusted client. The trusted client can protect itself against
+       * this sort of attack but only if it can trust the buffer size.
+       */
+      if (aligned_alloc_size > mem->bo->size) {
+         result = vk_errorf(device,
+                            VK_ERROR_INVALID_EXTERNAL_HANDLE,
+                            "Aligned requested size too large for the given fd "
+                            "%" PRIu64 "B > %" PRIu64 "B",
+                            pAllocateInfo->allocationSize,
+                            mem->bo->size);
+         device->ws->ops->buffer_destroy(mem->bo);
+         goto err_vk_object_free_mem;
+      }
+
+      /* From the Vulkan spec:
+       *
+       *    "Importing memory from a file descriptor transfers ownership of
+       *    the file descriptor from the application to the Vulkan
+       *    implementation. The application must not perform any operations on
+       *    the file descriptor after a successful import."
+       *
+       * If the import fails, we leave the file descriptor open.
+       */
+      close(fd_info->fd);
+   } else {
+      /* Align physical allocations to the page size of the heap that will be
+       * used when binding device memory (see pvr_bind_memory()) to ensure the
+       * entire allocation can be mapped.
+       */
+      const uint64_t alignment = device->heaps.general_heap->page_size;
+
+      /* FIXME: Need to determine the flags based on
+       * device->pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags.
+       *
+       * The alternative would be to store the flags alongside the memory
+       * types as an array that's indexed by pAllocateInfo->memoryTypeIndex so
+       * that they can be looked up.
+       */
+      result = device->ws->ops->buffer_create(device->ws,
+                                              pAllocateInfo->allocationSize,
+                                              alignment,
+                                              type,
+                                              PVR_WINSYS_BO_FLAG_CPU_ACCESS,
+                                              &mem->bo);
+      if (result != VK_SUCCESS)
+         goto err_vk_object_free_mem;
+   }
+
+   *pMem = pvr_device_memory_to_handle(mem);
+
+   return VK_SUCCESS;
+
+err_vk_object_free_mem:
+   vk_object_free(&device->vk, pAllocator, mem);
+
+   return result;
+}
+
+VkResult pvr_GetMemoryFdKHR(VkDevice _device,
+                            const VkMemoryGetFdInfoKHR *pGetFdInfo,
+                            int *pFd)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_device_memory, mem, pGetFdInfo->memory);
+
+   assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
+
+   assert(
+      pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
+      pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
+
+   return device->ws->ops->buffer_get_fd(mem->bo, pFd);
+}
+
+VkResult
+pvr_GetMemoryFdPropertiesKHR(VkDevice _device,
+                             VkExternalMemoryHandleTypeFlagBits handleType,
+                             int fd,
+                             VkMemoryFdPropertiesKHR *pMemoryFdProperties)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+
+   switch (handleType) {
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
+      /* FIXME: This should only allow memory types having
+       * VK_MEMORY_PROPERTY_HOST_CACHED_BIT flag set, as
+       * dma-buf should be imported using cacheable memory types,
+       * given exporter's mmap will always map it as cacheable.
+       * Ref:
+       * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops
+       */
+      pMemoryFdProperties->memoryTypeBits =
+         (1 << device->pdevice->memory.memoryTypeCount) - 1;
+      return VK_SUCCESS;
+   default:
+      return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
+   }
+}
+
+void pvr_FreeMemory(VkDevice _device,
+                    VkDeviceMemory _mem,
+                    const VkAllocationCallbacks *pAllocator)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_device_memory, mem, _mem);
+
+   if (!mem)
+      return;
+
+   device->ws->ops->buffer_destroy(mem->bo);
+
+   vk_object_free(&device->vk, pAllocator, mem);
+}
+
+VkResult pvr_MapMemory(VkDevice _device,
+                       VkDeviceMemory _memory,
+                       VkDeviceSize offset,
+                       VkDeviceSize size,
+                       VkMemoryMapFlags flags,
+                       void **ppData)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
+   void *map;
+
+   if (!mem) {
+      *ppData = NULL;
+      return VK_SUCCESS;
+   }
+
+   if (size == VK_WHOLE_SIZE)
+      size = mem->bo->size - offset;
+
+   /* From the Vulkan spec version 1.0.32 docs for MapMemory:
+    *
+    *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
+    *    assert(size != 0);
+    *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
+    *    equal to the size of the memory minus offset
+    */
+
+   assert(size > 0);
+   assert(offset + size <= mem->bo->size);
+
+   /* Check if already mapped */
+   if (mem->bo->map) {
+      *ppData = mem->bo->map + offset;
+      return VK_SUCCESS;
+   }
+
+   /* Map it all at once */
+   map = device->ws->ops->buffer_map(mem->bo);
+   if (!map)
+      return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
+
+   *ppData = map + offset;
+
+   return VK_SUCCESS;
+}
+
+void pvr_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
+
+   if (!mem || !mem->bo->map)
+      return;
+
+   device->ws->ops->buffer_unmap(mem->bo);
+}
+
+VkResult pvr_FlushMappedMemoryRanges(VkDevice _device,
+                                     uint32_t memoryRangeCount,
+                                     const VkMappedMemoryRange *pMemoryRanges)
+{
+   return VK_SUCCESS;
+}
+
+VkResult
+pvr_InvalidateMappedMemoryRanges(VkDevice _device,
+                                 uint32_t memoryRangeCount,
+                                 const VkMappedMemoryRange *pMemoryRanges)
+{
+   return VK_SUCCESS;
+}
+
+void pvr_GetImageSparseMemoryRequirements2(
+   VkDevice device,
+   const VkImageSparseMemoryRequirementsInfo2 *pInfo,
+   uint32_t *pSparseMemoryRequirementCount,
+   VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
+{
+   *pSparseMemoryRequirementCount = 0;
+}
+
+void pvr_GetDeviceMemoryCommitment(VkDevice device,
+                                   VkDeviceMemory memory,
+                                   VkDeviceSize *pCommittedMemoryInBytes)
+{
+   *pCommittedMemoryInBytes = 0;
+}
+
+VkResult pvr_bind_memory(struct pvr_device *device,
+                         struct pvr_device_memory *mem,
+                         VkDeviceSize offset,
+                         VkDeviceSize size,
+                         VkDeviceSize alignment,
+                         struct pvr_winsys_vma **const vma_out,
+                         pvr_dev_addr_t *const dev_addr_out)
+{
+   VkDeviceSize virt_size =
+      size + (offset & (device->heaps.general_heap->page_size - 1));
+   struct pvr_winsys_vma *vma;
+   pvr_dev_addr_t dev_addr;
+
+   /* Valid usage:
+    *
+    *   "memoryOffset must be an integer multiple of the alignment member of
+    *    the VkMemoryRequirements structure returned from a call to
+    *    vkGetBufferMemoryRequirements with buffer"
+    *
+    *   "memoryOffset must be an integer multiple of the alignment member of
+    *    the VkMemoryRequirements structure returned from a call to
+    *    vkGetImageMemoryRequirements with image"
+    */
+   assert(offset % alignment == 0);
+   assert(offset < mem->bo->size);
+
+   vma = device->ws->ops->heap_alloc(device->heaps.general_heap,
+                                     virt_size,
+                                     alignment);
+   if (!vma)
+      return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+   dev_addr = device->ws->ops->vma_map(vma, mem->bo, offset, size);
+   if (!dev_addr.addr) {
+      device->ws->ops->heap_free(vma);
+      return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+   }
+
+   *dev_addr_out = dev_addr;
+   *vma_out = vma;
+
+   return VK_SUCCESS;
+}
+
+void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma)
+{
+   device->ws->ops->vma_unmap(vma);
+   device->ws->ops->heap_free(vma);
+}
+
+VkResult pvr_BindBufferMemory2(VkDevice _device,
+                               uint32_t bindInfoCount,
+                               const VkBindBufferMemoryInfo *pBindInfos)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   uint32_t i;
+
+   for (i = 0; i < bindInfoCount; i++) {
+      PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory);
+      PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
+
+      VkResult result = pvr_bind_memory(device,
+                                        mem,
+                                        pBindInfos[i].memoryOffset,
+                                        buffer->size,
+                                        buffer->alignment,
+                                        &buffer->vma,
+                                        &buffer->dev_addr);
+      if (result != VK_SUCCESS) {
+         while (i--) {
+            PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
+            pvr_unbind_memory(device, buffer->vma);
+         }
+
+         return result;
+      }
+   }
+
+   return VK_SUCCESS;
+}
+
+VkResult pvr_QueueBindSparse(VkQueue _queue,
+                             uint32_t bindInfoCount,
+                             const VkBindSparseInfo *pBindInfo,
+                             VkFence fence)
+{
+   return VK_SUCCESS;
+}
+
+/* Event functions. */
+
+VkResult pvr_CreateEvent(VkDevice _device,
+                         const VkEventCreateInfo *pCreateInfo,
+                         const VkAllocationCallbacks *pAllocator,
+                         VkEvent *pEvent)
+{
+   assert(!"Unimplemented");
+   return VK_SUCCESS;
+}
+
+void pvr_DestroyEvent(VkDevice _device,
+                      VkEvent _event,
+                      const VkAllocationCallbacks *pAllocator)
+{
+   assert(!"Unimplemented");
+}
+
+VkResult pvr_GetEventStatus(VkDevice _device, VkEvent _event)
+{
+   assert(!"Unimplemented");
+   return VK_SUCCESS;
+}
+
+VkResult pvr_SetEvent(VkDevice _device, VkEvent _event)
+{
+   assert(!"Unimplemented");
+   return VK_SUCCESS;
+}
+
+VkResult pvr_ResetEvent(VkDevice _device, VkEvent _event)
+{
+   assert(!"Unimplemented");
+   return VK_SUCCESS;
+}
+
+/* Buffer functions. */
+
+VkResult pvr_CreateBuffer(VkDevice _device,
+                          const VkBufferCreateInfo *pCreateInfo,
+                          const VkAllocationCallbacks *pAllocator,
+                          VkBuffer *pBuffer)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   const uint32_t alignment = 4096;
+   struct pvr_buffer *buffer;
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
+   assert(pCreateInfo->usage != 0);
+
+   /* We check against (ULONG_MAX - alignment) to prevent overflow issues */
+   if (pCreateInfo->size >= ULONG_MAX - alignment)
+      return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+
+   buffer = vk_object_zalloc(&device->vk,
+                             pAllocator,
+                             sizeof(*buffer),
+                             VK_OBJECT_TYPE_BUFFER);
+   if (!buffer)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   buffer->size = pCreateInfo->size;
+   buffer->alignment = alignment;
+
+   *pBuffer = pvr_buffer_to_handle(buffer);
+
+   return VK_SUCCESS;
+}
+
+void pvr_DestroyBuffer(VkDevice _device,
+                       VkBuffer _buffer,
+                       const VkAllocationCallbacks *pAllocator)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_buffer, buffer, _buffer);
+
+   if (!buffer)
+      return;
+
+   pvr_unbind_memory(device, buffer->vma);
+   vk_object_free(&device->vk, pAllocator, buffer);
+}
+
+void pvr_DestroySampler(VkDevice _device,
+                        VkSampler _sampler,
+                        const VkAllocationCallbacks *pAllocator)
+{
+   assert(!"Unimplemented");
+}
+
+VkResult pvr_gpu_upload(struct pvr_device *device,
+                        struct pvr_winsys_heap *heap,
+                        const void *data,
+                        size_t size,
+                        uint64_t alignment,
+                        struct pvr_bo **const pvr_bo_out)
+{
+   struct pvr_bo *pvr_bo = NULL;
+   VkResult result;
+
+   assert(size > 0);
+
+   result = pvr_bo_alloc(device,
+                         heap,
+                         size,
+                         alignment,
+                         PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+                         &pvr_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   memcpy(pvr_bo->bo->map, data, size);
+   pvr_bo_cpu_unmap(device, pvr_bo);
+
+   *pvr_bo_out = pvr_bo;
+
+   return VK_SUCCESS;
+}
+
+VkResult pvr_gpu_upload_usc(struct pvr_device *device,
+                            const void *code,
+                            size_t code_size,
+                            uint64_t code_alignment,
+                            struct pvr_bo **const pvr_bo_out)
+{
+   struct pvr_bo *pvr_bo = NULL;
+   VkResult result;
+
+   assert(code_size > 0);
+
+   /* The USC will prefetch the next instruction, so over allocate by 1
+    * instruction to prevent reading off the end of a page into a potentially
+    * unallocated page.
+    */
+   result = pvr_bo_alloc(device,
+                         device->heaps.usc_heap,
+                         code_size + ROGUE_MAX_INSTR_BYTES,
+                         code_alignment,
+                         PVR_BO_ALLOC_FLAG_CPU_MAPPED,
+                         &pvr_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   memcpy(pvr_bo->bo->map, code, code_size);
+   pvr_bo_cpu_unmap(device, pvr_bo);
+
+   *pvr_bo_out = pvr_bo;
+
+   return VK_SUCCESS;
+}
+
+/**
+ * \brief Upload PDS program data and code segments from host memory to device
+ * memory.
+ *
+ * \param[in] device            Logical device pointer.
+ * \param[in] data              Pointer to PDS data segment to upload.
+ * \param[in] data_size_dwords  Size of PDS data segment in dwords.
+ * \param[in] data_alignment    Required alignment of the PDS data segment in
+ *                              bytes. Must be a power of two.
+ * \param[in] code              Pointer to PDS code segment to upload.
+ * \param[in] code_size_dwords  Size of PDS code segment in dwords.
+ * \param[in] code_alignment    Required alignment of the PDS code segment in
+ *                              bytes. Must be a power of two.
+ * \param[in] min_alignment     Minimum alignment of the bo holding the PDS
+ *                              program in bytes.
+ * \param[out] pds_upload_out   On success will be initialized based on the
+ *                              uploaded PDS program.
+ * \return VK_SUCCESS on success, or error code otherwise.
+ */
+VkResult pvr_gpu_upload_pds(struct pvr_device *device,
+                            const uint32_t *data,
+                            uint32_t data_size_dwords,
+                            uint32_t data_alignment,
+                            const uint32_t *code,
+                            uint32_t code_size_dwords,
+                            uint32_t code_alignment,
+                            uint64_t min_alignment,
+                            struct pvr_pds_upload *const pds_upload_out)
+{
+   /* All alignment and sizes below are in bytes. */
+   const size_t data_size = data_size_dwords * sizeof(*data);
+   const size_t code_size = code_size_dwords * sizeof(*code);
+   const uint64_t data_aligned_size = ALIGN_POT(data_size, data_alignment);
+   const uint64_t code_aligned_size = ALIGN_POT(code_size, code_alignment);
+   const uint32_t code_offset = ALIGN_POT(data_aligned_size, code_alignment);
+   const uint64_t bo_alignment = MAX2(min_alignment, data_alignment);
+   const uint64_t bo_size = (!!code) ? (code_offset + code_aligned_size)
+                                     : data_aligned_size;
+   const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_CPU_MAPPED |
+                             PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC;
+   VkResult result;
+
+   assert(code || data);
+   assert(!code || (code_size_dwords != 0 && code_alignment != 0));
+   assert(!data || (data_size_dwords != 0 && data_alignment != 0));
+
+   result = pvr_bo_alloc(device,
+                         device->heaps.pds_heap,
+                         bo_size,
+                         bo_alignment,
+                         bo_flags,
+                         &pds_upload_out->pvr_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   if (data) {
+      memcpy(pds_upload_out->pvr_bo->bo->map, data, data_size);
+
+      pds_upload_out->data_offset = pds_upload_out->pvr_bo->vma->dev_addr.addr -
+                                    device->heaps.pds_heap->base_addr.addr;
+
+      /* Store data size in dwords. */
+      assert(data_aligned_size % 4 == 0);
+      pds_upload_out->data_size = data_aligned_size / 4;
+   } else {
+      pds_upload_out->data_offset = 0;
+      pds_upload_out->data_size = 0;
+   }
+
+   if (code) {
+      memcpy((uint8_t *)pds_upload_out->pvr_bo->bo->map + code_offset,
+             code,
+             code_size);
+
+      pds_upload_out->code_offset =
+         (pds_upload_out->pvr_bo->vma->dev_addr.addr + code_offset) -
+         device->heaps.pds_heap->base_addr.addr;
+
+      /* Store code size in dwords. */
+      assert(code_aligned_size % 4 == 0);
+      pds_upload_out->code_size = code_aligned_size / 4;
+   } else {
+      pds_upload_out->code_offset = 0;
+      pds_upload_out->code_size = 0;
+   }
+
+   pvr_bo_cpu_unmap(device, pds_upload_out->pvr_bo);
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+pvr_framebuffer_create_ppp_state(struct pvr_device *device,
+                                 struct pvr_framebuffer *framebuffer)
+{
+   const uint32_t cache_line_size =
+      rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+   uint32_t ppp_state[3];
+   VkResult result;
+
+   pvr_csb_pack (&ppp_state[0], TA_STATE_HEADER, header) {
+      header.pres_terminate = true;
+   }
+
+   pvr_csb_pack (&ppp_state[1], TA_STATE_TERMINATE0, term0) {
+      term0.clip_right =
+         DIV_ROUND_UP(
+            framebuffer->width,
+            PVRX(TA_STATE_TERMINATE0_CLIP_RIGHT_BLOCK_SIZE_IN_PIXELS)) -
+         1;
+      term0.clip_bottom =
+         DIV_ROUND_UP(
+            framebuffer->height,
+            PVRX(TA_STATE_TERMINATE0_CLIP_BOTTOM_BLOCK_SIZE_IN_PIXELS)) -
+         1;
+   }
+
+   pvr_csb_pack (&ppp_state[2], TA_STATE_TERMINATE1, term1) {
+      term1.render_target = 0;
+      term1.clip_left = 0;
+   }
+
+   result = pvr_gpu_upload(device,
+                           device->heaps.general_heap,
+                           ppp_state,
+                           sizeof(ppp_state),
+                           cache_line_size,
+                           &framebuffer->ppp_state_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   /* Calculate the size of PPP state in dwords. */
+   framebuffer->ppp_state_size = sizeof(ppp_state) / sizeof(uint32_t);
+
+   return VK_SUCCESS;
+}
+
+static bool pvr_render_targets_init(struct pvr_render_target *render_targets,
+                                    uint32_t render_targets_count)
+{
+   uint32_t i;
+
+   for (i = 0; i < render_targets_count; i++) {
+      if (pthread_mutex_init(&render_targets[i].mutex, NULL))
+         goto err_mutex_destroy;
+   }
+
+   return true;
+
+err_mutex_destroy:
+   while (i--)
+      pthread_mutex_destroy(&render_targets[i].mutex);
+
+   return false;
+}
+
+static void pvr_render_targets_fini(struct pvr_render_target *render_targets,
+                                    uint32_t render_targets_count)
+{
+   for (uint32_t i = 0; i < render_targets_count; i++) {
+      if (render_targets[i].valid) {
+         pvr_render_target_dataset_destroy(render_targets[i].rt_dataset);
+         render_targets[i].valid = false;
+      }
+
+      pthread_mutex_destroy(&render_targets[i].mutex);
+   }
+}
+
+VkResult pvr_CreateFramebuffer(VkDevice _device,
+                               const VkFramebufferCreateInfo *pCreateInfo,
+                               const VkAllocationCallbacks *pAllocator,
+                               VkFramebuffer *pFramebuffer)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   struct pvr_render_target *render_targets;
+   struct pvr_framebuffer *framebuffer;
+   struct pvr_image_view **attachments;
+   uint32_t render_targets_count;
+   VkResult result;
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
+
+   render_targets_count =
+      PVR_RENDER_TARGETS_PER_FRAMEBUFFER(&device->pdevice->dev_info);
+
+   VK_MULTIALLOC(ma);
+   vk_multialloc_add(&ma, &framebuffer, __typeof__(*framebuffer), 1);
+   vk_multialloc_add(&ma,
+                     &attachments,
+                     __typeof__(*attachments),
+                     pCreateInfo->attachmentCount);
+   vk_multialloc_add(&ma,
+                     &render_targets,
+                     __typeof__(*render_targets),
+                     render_targets_count);
+
+   if (!vk_multialloc_zalloc2(&ma,
+                              &device->vk.alloc,
+                              pAllocator,
+                              VK_OBJECT_TYPE_FRAMEBUFFER))
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   vk_object_base_init(&device->vk,
+                       &framebuffer->base,
+                       VK_OBJECT_TYPE_FRAMEBUFFER);
+
+   framebuffer->width = pCreateInfo->width;
+   framebuffer->height = pCreateInfo->height;
+   framebuffer->layers = pCreateInfo->layers;
+
+   framebuffer->attachments = attachments;
+   framebuffer->attachment_count = pCreateInfo->attachmentCount;
+   for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
+      framebuffer->attachments[i] =
+         pvr_image_view_from_handle(pCreateInfo->pAttachments[i]);
+   }
+
+   result = pvr_framebuffer_create_ppp_state(device, framebuffer);
+   if (result != VK_SUCCESS)
+      goto err_free_framebuffer;
+
+   framebuffer->render_targets = render_targets;
+   framebuffer->render_targets_count = render_targets_count;
+   if (!pvr_render_targets_init(framebuffer->render_targets,
+                                render_targets_count)) {
+      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+      goto err_free_ppp_state_bo;
+   }
+
+   *pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
+
+   return VK_SUCCESS;
+
+err_free_ppp_state_bo:
+   pvr_bo_free(device, framebuffer->ppp_state_bo);
+
+err_free_framebuffer:
+   vk_object_base_finish(&framebuffer->base);
+   vk_free2(&device->vk.alloc, pAllocator, framebuffer);
+
+   return result;
+}
+
+void pvr_DestroyFramebuffer(VkDevice _device,
+                            VkFramebuffer _fb,
+                            const VkAllocationCallbacks *pAllocator)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_framebuffer, framebuffer, _fb);
+
+   if (!framebuffer)
+      return;
+
+   pvr_render_targets_fini(framebuffer->render_targets,
+                           framebuffer->render_targets_count);
+   pvr_bo_free(device, framebuffer->ppp_state_bo);
+   vk_object_base_finish(&framebuffer->base);
+   vk_free2(&device->vk.alloc, pAllocator, framebuffer);
+}
+
+PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
+vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
+{
+   /* For the full details on loader interface versioning, see
+    * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
+    * What follows is a condensed summary, to help you navigate the large and
+    * confusing official doc.
+    *
+    *   - Loader interface v0 is incompatible with later versions. We don't
+    *     support it.
+    *
+    *   - In loader interface v1:
+    *       - The first ICD entrypoint called by the loader is
+    *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
+    *         entrypoint.
+    *       - The ICD must statically expose no other Vulkan symbol unless it
+    *         is linked with -Bsymbolic.
+    *       - Each dispatchable Vulkan handle created by the ICD must be
+    *         a pointer to a struct whose first member is VK_LOADER_DATA. The
+    *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
+    *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
+    *         vkDestroySurfaceKHR(). The ICD must be capable of working with
+    *         such loader-managed surfaces.
+    *
+    *    - Loader interface v2 differs from v1 in:
+    *       - The first ICD entrypoint called by the loader is
+    *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
+    *         statically expose this entrypoint.
+    *
+    *    - Loader interface v3 differs from v2 in:
+    *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
+    *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
+    *          because the loader no longer does so.
+    *
+    *    - Loader interface v4 differs from v3 in:
+    *        - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
+    */
+   *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
+   return VK_SUCCESS;
+}
+
+VkResult pvr_CreateSampler(VkDevice _device,
+                           const VkSamplerCreateInfo *pCreateInfo,
+                           const VkAllocationCallbacks *pAllocator,
+                           VkSampler *pSampler)
+{
+   assert(!"Unimplemented");
+   return VK_SUCCESS;
+}
+
+void pvr_GetBufferMemoryRequirements2(
+   VkDevice _device,
+   const VkBufferMemoryRequirementsInfo2 *pInfo,
+   VkMemoryRequirements2 *pMemoryRequirements)
+{
+   PVR_FROM_HANDLE(pvr_buffer, buffer, pInfo->buffer);
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+
+   /* The Vulkan 1.0.166 spec says:
+    *
+    *    memoryTypeBits is a bitmask and contains one bit set for every
+    *    supported memory type for the resource. Bit 'i' is set if and only
+    *    if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
+    *    structure for the physical device is supported for the resource.
+    *
+    * All types are currently supported for buffers.
+    */
+   pMemoryRequirements->memoryRequirements.memoryTypeBits =
+      (1ul << device->pdevice->memory.memoryTypeCount) - 1;
+
+   pMemoryRequirements->memoryRequirements.alignment = buffer->alignment;
+   pMemoryRequirements->memoryRequirements.size =
+      ALIGN_POT(buffer->size, buffer->alignment);
+}
+
+void pvr_GetDeviceQueue(VkDevice _device,
+                        uint32_t queueFamilyIndex,
+                        uint32_t queueIndex,
+                        VkQueue *pQueue)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+
+   assert(queueFamilyIndex == 0);
+
+   *pQueue = pvr_queue_to_handle(&device->queues[queueIndex]);
+}
+
+void pvr_GetImageMemoryRequirements2(VkDevice _device,
+                                     const VkImageMemoryRequirementsInfo2 *pInfo,
+                                     VkMemoryRequirements2 *pMemoryRequirements)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_image, image, pInfo->image);
+
+   /* The Vulkan 1.0.166 spec says:
+    *
+    *    memoryTypeBits is a bitmask and contains one bit set for every
+    *    supported memory type for the resource. Bit 'i' is set if and only
+    *    if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
+    *    structure for the physical device is supported for the resource.
+    *
+    * All types are currently supported for images.
+    */
+   const uint32_t memory_types =
+      (1ul << device->pdevice->memory.memoryTypeCount) - 1;
+
+   /* TODO: The returned size is aligned here in case of arrays/CEM (as is done
+    * in GetImageMemoryRequirements()), but this should be known at image
+    * creation time (pCreateInfo->arrayLayers > 1). This is confirmed in
+    * ImageCreate()/ImageGetMipMapOffsetInBytes() where it aligns the size to
+    * 4096 if pCreateInfo->arrayLayers > 1. So is the alignment here actually
+    * necessary? If not, what should it be when pCreateInfo->arrayLayers == 1?
+    *
+    * Note: Presumably the 4096 alignment requirement comes from the Vulkan
+    * driver setting RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN when setting up
+    * render and compute jobs.
+    */
+   pMemoryRequirements->memoryRequirements.alignment = image->alignment;
+   pMemoryRequirements->memoryRequirements.size =
+      ALIGN(image->size, image->alignment);
+   pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
+}
diff --git a/src/imagination/vulkan/pvr_formats.c b/src/imagination/vulkan/pvr_formats.c
new file mode 100644 (file)
index 0000000..33a0a34
--- /dev/null
@@ -0,0 +1,348 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+
+#include "pvr_formats.h"
+#include "pvr_private.h"
+#include "vk_format.h"
+#include "vk_log.h"
+#include "vk_util.h"
+
+#define FORMAT(vk, tex_fmt, pack_mode)                     \
+   [VK_FORMAT_##vk] = {                                    \
+      .vk_format = VK_FORMAT_##vk,                         \
+      .tex_format = ROGUE_TEXSTATE_FORMAT_##tex_fmt,       \
+      .pbe_packmode = ROGUE_PBESTATE_PACKMODE_##pack_mode, \
+      .supported = true,                                   \
+   }
+
+struct pvr_format {
+   VkFormat vk_format;
+   uint32_t tex_format;
+   uint32_t pbe_packmode;
+   bool supported;
+};
+
+/* TODO: add all supported core formats */
+static const struct pvr_format pvr_format_table[] = {
+   FORMAT(B8G8R8A8_UNORM, U8U8U8U8, U8U8U8U8),
+   FORMAT(D32_SFLOAT, F32, F32),
+};
+
+#undef FORMAT
+
+static inline const struct pvr_format *pvr_get_format(VkFormat vk_format)
+{
+   if (vk_format < ARRAY_SIZE(pvr_format_table) &&
+       pvr_format_table[vk_format].supported) {
+      return &pvr_format_table[vk_format];
+   }
+
+   return NULL;
+}
+
+uint32_t pvr_get_tex_format(VkFormat vk_format)
+{
+   const struct pvr_format *pvr_format = pvr_get_format(vk_format);
+   if (pvr_format) {
+      return pvr_format->tex_format;
+   }
+
+   return ROGUE_TEXSTATE_FORMAT_INVALID;
+}
+
+uint32_t pvr_get_pbe_packmode(VkFormat vk_format)
+{
+   const struct pvr_format *pvr_format = pvr_get_format(vk_format);
+   if (pvr_format)
+      return pvr_format->pbe_packmode;
+
+   return ROGUE_PBESTATE_PACKMODE_INVALID;
+}
+
+static VkFormatFeatureFlags
+pvr_get_image_format_features(const struct pvr_format *pvr_format,
+                              VkImageTiling vk_tiling)
+{
+   VkFormatFeatureFlags flags = 0;
+   VkImageAspectFlags aspects;
+
+   if (!pvr_format)
+      return 0;
+
+   aspects = vk_format_aspects(pvr_format->vk_format);
+   if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
+      flags |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT |
+               VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
+               VK_FORMAT_FEATURE_BLIT_SRC_BIT;
+   }
+
+   return flags;
+}
+
+const uint8_t *pvr_get_format_swizzle(VkFormat vk_format)
+{
+   const struct util_format_description *vf = vk_format_description(vk_format);
+   static const uint8_t fallback[] = { PIPE_SWIZZLE_X,
+                                       PIPE_SWIZZLE_Y,
+                                       PIPE_SWIZZLE_Z,
+                                       PIPE_SWIZZLE_W };
+
+   if (vf)
+      return vf->swizzle;
+
+   assert(!"Unsupported format");
+   return fallback;
+}
+
+static VkFormatFeatureFlags
+pvr_get_buffer_format_features(const struct pvr_format *pvr_format)
+{
+   VkFormatFeatureFlags flags = 0;
+
+   if (!pvr_format)
+      return 0;
+
+   return flags;
+}
+
+void pvr_GetPhysicalDeviceFormatProperties2(
+   VkPhysicalDevice physicalDevice,
+   VkFormat format,
+   VkFormatProperties2 *pFormatProperties)
+{
+   const struct pvr_format *pvr_format = pvr_get_format(format);
+
+   pFormatProperties->formatProperties = (VkFormatProperties){
+      .linearTilingFeatures =
+         pvr_get_image_format_features(pvr_format, VK_IMAGE_TILING_LINEAR),
+      .optimalTilingFeatures =
+         pvr_get_image_format_features(pvr_format, VK_IMAGE_TILING_OPTIMAL),
+      .bufferFeatures = pvr_get_buffer_format_features(pvr_format),
+   };
+
+   vk_foreach_struct (ext, pFormatProperties->pNext) {
+      pvr_debug_ignored_stype(ext->sType);
+   }
+}
+
+static VkResult
+pvr_get_image_format_properties(struct pvr_physical_device *pdevice,
+                                const VkPhysicalDeviceImageFormatInfo2 *info,
+                                VkImageFormatProperties *pImageFormatProperties)
+{
+   assert(!"Unimplemented");
+   return VK_SUCCESS;
+}
+
+VkResult pvr_GetPhysicalDeviceImageFormatProperties2(
+   VkPhysicalDevice physicalDevice,
+   const VkPhysicalDeviceImageFormatInfo2 *pImageFormatInfo,
+   VkImageFormatProperties2 *pImageFormatProperties)
+{
+   const VkPhysicalDeviceExternalImageFormatInfo *external_info = NULL;
+   PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
+   VkExternalImageFormatProperties *external_props = NULL;
+   VkResult result;
+
+   result = pvr_get_image_format_properties(
+      pdevice,
+      pImageFormatInfo,
+      &pImageFormatProperties->imageFormatProperties);
+   if (result != VK_SUCCESS)
+      return result;
+
+   /* Extract input structs */
+   vk_foreach_struct_const (ext, pImageFormatInfo->pNext) {
+      switch (ext->sType) {
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO:
+         external_info = (const void *)ext;
+         break;
+      default:
+         pvr_debug_ignored_stype(ext->sType);
+         break;
+      }
+   }
+
+   /* Extract output structs */
+   vk_foreach_struct (ext, pImageFormatProperties->pNext) {
+      switch (ext->sType) {
+      case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES:
+         external_props = (void *)ext;
+         break;
+      default:
+         pvr_debug_ignored_stype(ext->sType);
+         break;
+      }
+   }
+
+   /* From the Vulkan 1.0.42 spec:
+    *
+    *    If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2 will
+    *    behave as if VkPhysicalDeviceExternalImageFormatInfo was not
+    *    present and VkExternalImageFormatProperties will be ignored.
+    */
+   if (external_info && external_info->handleType != 0) {
+      switch (external_info->handleType) {
+      case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
+      case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
+         if (!external_props)
+            break;
+
+         external_props->externalMemoryProperties.externalMemoryFeatures =
+            VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
+            VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+         external_props->externalMemoryProperties.compatibleHandleTypes =
+            VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
+            VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+         external_props->externalMemoryProperties.exportFromImportedHandleTypes =
+            VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
+            VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+         break;
+      default:
+         return vk_error(pdevice, VK_ERROR_FORMAT_NOT_SUPPORTED);
+      }
+   }
+
+   return VK_SUCCESS;
+}
+
+void pvr_GetPhysicalDeviceSparseImageFormatProperties(
+   VkPhysicalDevice physicalDevice,
+   VkFormat format,
+   VkImageType type,
+   uint32_t samples,
+   VkImageUsageFlags usage,
+   VkImageTiling tiling,
+   uint32_t *pNumProperties,
+   VkSparseImageFormatProperties *pProperties)
+{
+   /* Sparse images are not yet supported. */
+   *pNumProperties = 0;
+}
+
+void pvr_GetPhysicalDeviceSparseImageFormatProperties2(
+   VkPhysicalDevice physicalDevice,
+   const VkPhysicalDeviceSparseImageFormatInfo2 *pFormatInfo,
+   uint32_t *pPropertyCount,
+   VkSparseImageFormatProperties2 *pProperties)
+{
+   /* Sparse images are not yet supported. */
+   *pPropertyCount = 0;
+}
+
+void pvr_GetPhysicalDeviceExternalBufferProperties(
+   VkPhysicalDevice physicalDevice,
+   const VkPhysicalDeviceExternalBufferInfo *pExternalBufferInfo,
+   VkExternalBufferProperties *pExternalBufferProperties)
+{
+   /* The Vulkan 1.0.42 spec says "handleType must be a valid
+    * VkExternalMemoryHandleTypeFlagBits value" in
+    * VkPhysicalDeviceExternalBufferInfo. This differs from
+    * VkPhysicalDeviceExternalImageFormatInfo, which surprisingly permits
+    * handleType == 0.
+    */
+   assert(pExternalBufferInfo->handleType != 0);
+
+   /* All of the current flags are for sparse which we don't support. */
+   if (pExternalBufferInfo->flags)
+      goto unsupported;
+
+   switch (pExternalBufferInfo->handleType) {
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
+   case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT:
+      /* clang-format off */
+      pExternalBufferProperties->externalMemoryProperties.externalMemoryFeatures =
+         VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT |
+         VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT;
+      pExternalBufferProperties->externalMemoryProperties.exportFromImportedHandleTypes =
+         VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
+         VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+      pExternalBufferProperties->externalMemoryProperties.compatibleHandleTypes =
+         VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
+         VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT;
+      /* clang-format on */
+      return;
+   default:
+      break;
+   }
+
+unsupported:
+   /* From the Vulkan 1.1.113 spec:
+    *
+    *    compatibleHandleTypes must include at least handleType.
+    */
+   pExternalBufferProperties->externalMemoryProperties =
+      (VkExternalMemoryProperties){
+         .compatibleHandleTypes = pExternalBufferInfo->handleType,
+      };
+}
+
+bool pvr_format_is_pbe_downscalable(VkFormat vk_format)
+{
+   if (vk_format_is_pure_integer(vk_format)) {
+      /* PBE downscale behavior for integer formats does not match Vulkan
+       * spec. Vulkan requires a single sample to be chosen instead of
+       * taking the average sample color.
+       */
+      return false;
+   }
+
+   switch (pvr_get_pbe_packmode(vk_format)) {
+   default:
+      return true;
+
+   case ROGUE_PBESTATE_PACKMODE_U16U16U16U16:
+   case ROGUE_PBESTATE_PACKMODE_S16S16S16S16:
+   case ROGUE_PBESTATE_PACKMODE_U32U32U32U32:
+   case ROGUE_PBESTATE_PACKMODE_S32S32S32S32:
+   case ROGUE_PBESTATE_PACKMODE_F32F32F32F32:
+   case ROGUE_PBESTATE_PACKMODE_U16U16U16:
+   case ROGUE_PBESTATE_PACKMODE_S16S16S16:
+   case ROGUE_PBESTATE_PACKMODE_U32U32U32:
+   case ROGUE_PBESTATE_PACKMODE_S32S32S32:
+   case ROGUE_PBESTATE_PACKMODE_F32F32F32:
+   case ROGUE_PBESTATE_PACKMODE_U16U16:
+   case ROGUE_PBESTATE_PACKMODE_S16S16:
+   case ROGUE_PBESTATE_PACKMODE_U32U32:
+   case ROGUE_PBESTATE_PACKMODE_S32S32:
+   case ROGUE_PBESTATE_PACKMODE_F32F32:
+   case ROGUE_PBESTATE_PACKMODE_U24ST8:
+   case ROGUE_PBESTATE_PACKMODE_ST8U24:
+   case ROGUE_PBESTATE_PACKMODE_U16:
+   case ROGUE_PBESTATE_PACKMODE_S16:
+   case ROGUE_PBESTATE_PACKMODE_U32:
+   case ROGUE_PBESTATE_PACKMODE_S32:
+   case ROGUE_PBESTATE_PACKMODE_F32:
+   case ROGUE_PBESTATE_PACKMODE_X24U8F32:
+   case ROGUE_PBESTATE_PACKMODE_X24X8F32:
+   case ROGUE_PBESTATE_PACKMODE_X24G8X32:
+   case ROGUE_PBESTATE_PACKMODE_X8U24:
+   case ROGUE_PBESTATE_PACKMODE_U8X24:
+   case ROGUE_PBESTATE_PACKMODE_PBYTE:
+   case ROGUE_PBESTATE_PACKMODE_PWORD:
+   case ROGUE_PBESTATE_PACKMODE_INVALID:
+      return false;
+   }
+}
diff --git a/src/imagination/vulkan/pvr_formats.h b/src/imagination/vulkan/pvr_formats.h
new file mode 100644 (file)
index 0000000..c038be7
--- /dev/null
@@ -0,0 +1,36 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_FORMATS_H
+#define PVR_FORMATS_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+const uint8_t *pvr_get_format_swizzle(VkFormat vk_format);
+uint32_t pvr_get_tex_format(VkFormat vk_format);
+uint32_t pvr_get_pbe_packmode(VkFormat vk_format);
+bool pvr_format_is_pbe_downscalable(VkFormat vk_format);
+
+#endif /* PVR_FORMATS_H */
diff --git a/src/imagination/vulkan/pvr_hw_pass.c b/src/imagination/vulkan/pvr_hw_pass.c
new file mode 100644 (file)
index 0000000..653f927
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+
+#include "pvr_hw_pass.h"
+#include "pvr_private.h"
+#include "vk_alloc.h"
+
+void pvr_destroy_renderpass_hwsetup(struct pvr_device *device,
+                                    struct pvr_renderpass_hwsetup *hw_setup)
+{
+   vk_free(&device->vk.alloc, hw_setup);
+}
+
+struct pvr_renderpass_hwsetup *
+pvr_create_renderpass_hwsetup(struct pvr_device *device,
+                              struct pvr_render_pass *pass,
+                              bool disable_merge)
+{
+   struct pvr_renderpass_hwsetup_eot_surface *eot_surface;
+   enum pvr_renderpass_surface_initop *color_initops;
+   struct pvr_renderpass_hwsetup_subpass *subpasses;
+   struct pvr_renderpass_hwsetup_render *renders;
+   struct pvr_renderpass_colorinit *color_inits;
+   struct pvr_renderpass_hwsetup *hw_setup;
+   struct pvr_renderpass_hw_map *subpass_map;
+   struct usc_mrt_resource *mrt_resources;
+
+   VK_MULTIALLOC(ma);
+   vk_multialloc_add(&ma, &hw_setup, __typeof__(*hw_setup), 1);
+   vk_multialloc_add(&ma, &renders, __typeof__(*renders), 1);
+   vk_multialloc_add(&ma, &color_inits, __typeof__(*color_inits), 1);
+   vk_multialloc_add(&ma, &subpass_map, __typeof__(*subpass_map), 1);
+   vk_multialloc_add(&ma, &mrt_resources, __typeof__(*mrt_resources), 2);
+   vk_multialloc_add(&ma, &subpasses, __typeof__(*subpasses), 1);
+   vk_multialloc_add(&ma, &eot_surface, __typeof__(*eot_surface), 1);
+   vk_multialloc_add(&ma,
+                     &color_initops,
+                     __typeof__(*color_initops),
+                     pass->subpasses[0].color_count);
+   /* Note, no more multialloc slots available (maximum supported is 8). */
+
+   if (!vk_multialloc_zalloc(&ma,
+                             &device->vk.alloc,
+                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) {
+      return NULL;
+   }
+
+   /* FIXME: Remove hardcoding of hw_setup structure. */
+   subpasses[0].z_replicate = -1;
+   subpasses[0].depth_initop = RENDERPASS_SURFACE_INITOP_CLEAR;
+   subpasses[0].stencil_clear = false;
+   subpasses[0].driver_id = 0;
+   color_initops[0] = RENDERPASS_SURFACE_INITOP_NOP;
+   subpasses[0].color_initops = color_initops;
+   subpasses[0].client_data = NULL;
+   renders[0].subpass_count = 1;
+   renders[0].subpasses = subpasses;
+
+   renders[0].sample_count = 1;
+   renders[0].ds_surface_id = 1;
+   renders[0].depth_init = RENDERPASS_SURFACE_INITOP_CLEAR;
+   renders[0].stencil_init = RENDERPASS_SURFACE_INITOP_NOP;
+
+   mrt_resources[0].type = USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER;
+   mrt_resources[0].u.reg.out_reg = 0;
+   mrt_resources[0].u.reg.offset = 0;
+   renders[0].init_setup.render_targets_count = 1;
+   renders[0].init_setup.mrt_resources = &mrt_resources[0];
+
+   color_inits[0].op = RENDERPASS_SURFACE_INITOP_CLEAR;
+   color_inits[0].driver_id = 0;
+   renders[0].color_init_count = 1;
+   renders[0].color_init = color_inits;
+
+   mrt_resources[1].type = USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER;
+   mrt_resources[1].u.reg.out_reg = 0;
+   mrt_resources[1].u.reg.offset = 0;
+   renders[0].eot_setup.render_targets_count = 1;
+   renders[0].eot_setup.mrt_resources = &mrt_resources[1];
+
+   eot_surface->mrt_index = 0;
+   eot_surface->attachment_index = 0;
+   eot_surface->need_resolve = false;
+   eot_surface->resolve_type = PVR_RESOLVE_TYPE_INVALID;
+   eot_surface->src_attachment_index = 0;
+   renders[0].eot_surfaces = eot_surface;
+   renders[0].eot_surface_count = 1;
+
+   renders[0].output_regs_count = 1;
+   renders[0].tile_buffers_count = 0;
+   renders[0].client_data = NULL;
+   hw_setup->render_count = 1;
+   hw_setup->renders = renders;
+
+   subpass_map->render = 0;
+   subpass_map->subpass = 0;
+   hw_setup->subpass_map = subpass_map;
+
+   return hw_setup;
+}
diff --git a/src/imagination/vulkan/pvr_hw_pass.h b/src/imagination/vulkan/pvr_hw_pass.h
new file mode 100644 (file)
index 0000000..520ea79
--- /dev/null
@@ -0,0 +1,223 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_HW_PASS_H
+#define PVR_HW_PASS_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct pvr_device;
+struct pvr_render_pass;
+
+enum pvr_renderpass_surface_initop {
+   RENDERPASS_SURFACE_INITOP_CLEAR,
+   RENDERPASS_SURFACE_INITOP_LOAD,
+   RENDERPASS_SURFACE_INITOP_NOP,
+};
+
+struct pvr_renderpass_hwsetup_subpass {
+   /* If >=0 then copy the depth into this pixel output for all fragment
+    * programs in the subpass.
+    */
+   int32_t z_replicate;
+
+   /* The operation to perform on the depth at the start of the subpass. Loads
+    * are deferred to subpasses when depth has been replicated
+    */
+   enum pvr_renderpass_surface_initop depth_initop;
+
+   /* If true then clear the stencil at the start of the subpass. */
+   bool stencil_clear;
+
+   /* Driver Id from the input pvr_render_subpass structure. */
+   uint32_t driver_id;
+
+   /* For each color attachment to the subpass: the operation to perform at
+    * the start of the subpass.
+    */
+   enum pvr_renderpass_surface_initop *color_initops;
+
+   void *client_data;
+};
+
+struct pvr_renderpass_colorinit {
+   /* Source surface for the operation. */
+   uint32_t driver_id;
+
+   /* Type of operation: either clear or load. */
+   enum pvr_renderpass_surface_initop op;
+};
+
+/* FIXME: Adding these USC enums and structures here for now to avoid adding
+ * usc.h header. Needs to be moved to compiler specific header.
+ */
+/* Specifies the location of render target writes. */
+enum usc_mrt_resource_type {
+   USC_MRT_RESOURCE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */
+   USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER,
+   USC_MRT_RESOURCE_TYPE_MEMORY,
+};
+
+struct usc_mrt_resource {
+   /* Resource type allocated for render target. */
+   enum usc_mrt_resource_type type;
+
+   union {
+      /* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER. */
+      struct {
+         /* The output register to use. */
+         uint32_t out_reg;
+
+         /* The offset in bytes into the output register. */
+         uint32_t offset;
+      } reg;
+
+      /* If type == USC_MRT_RESOURCE_TYPE_MEMORY. */
+      struct {
+         /* The number of the tile buffer to use. */
+         uint32_t tile_buffer;
+
+         /* The offset in dwords within the tile buffer. */
+         uint32_t offset_in_dwords;
+      } mem;
+   } u;
+};
+
+struct usc_mrt_setup {
+   /* Number of render targets present. */
+   uint32_t render_targets_count;
+
+   /* Array of MRT resources allocated for each render target. The number of
+    * elements is determined by usc_mrt_setup::render_targets_count.
+    */
+   struct usc_mrt_resource *mrt_resources;
+};
+
+enum pvr_resolve_type {
+   PVR_RESOLVE_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */
+   PVR_RESOLVE_TYPE_PBE,
+   PVR_RESOLVE_TYPE_TRANSFER,
+};
+
+struct pvr_renderpass_hwsetup_eot_surface {
+   /* MRT index to store from. Also used to index into
+    * usc_mrt_setup::mrt_resources.
+    */
+   uint32_t mrt_index;
+
+   /* Index of pvr_render_pass_info::attachments to store into. */
+   uint32_t attachment_index;
+
+   /* True if the surface should be resolved. */
+   bool need_resolve;
+
+   /* How the surface should be resolved at the end of a render. Only valid if
+    * pvr_renderpass_hwsetup_eot_surface::need_resolve is set to true.
+    */
+   enum pvr_resolve_type resolve_type;
+
+   /* Index of pvr_render_pass_info::attachments to resolve from. Only valid if
+    * pvr_renderpass_hwsetup_eot_surface::need_resolve is set to true.
+    */
+   uint32_t src_attachment_index;
+};
+
+struct pvr_renderpass_hwsetup_render {
+   /* Number of pixel output registers to allocate for this render. */
+   uint32_t output_regs_count;
+
+   /* Number of tile buffers to allocate for this render. */
+   uint32_t tile_buffers_count;
+
+   /* Number of subpasses in this render. */
+   uint32_t subpass_count;
+
+   /* Description of each subpass. */
+   struct pvr_renderpass_hwsetup_subpass *subpasses;
+
+   /* The sample count of every color attachment (or depth attachment if
+    * z-only) in this render
+    */
+   uint32_t sample_count;
+
+   /* Driver Id for the surface to use for depth/stencil load/store in this
+    * render.
+    */
+   int32_t ds_surface_id;
+
+   /* Operation on the on-chip depth at the start of the render.
+    * Either load from 'ds_surface_id', clear using 'ds_surface_id' or leave
+    * uninitialized.
+    */
+   enum pvr_renderpass_surface_initop depth_init;
+
+   /* Operation on the on-chip stencil at the start of the render. */
+   enum pvr_renderpass_surface_initop stencil_init;
+
+   /* For each operation: the destination in the on-chip color storage. */
+   struct usc_mrt_setup init_setup;
+
+   /* Count of operations on on-chip color storage at the start of the render.
+    */
+   uint32_t color_init_count;
+
+   /* How to initialize render targets at the start of the render. */
+   struct pvr_renderpass_colorinit *color_init;
+
+   /* Describes the location of the source data for each stored surface. */
+   struct usc_mrt_setup eot_setup;
+
+   struct pvr_renderpass_hwsetup_eot_surface *eot_surfaces;
+   uint32_t eot_surface_count;
+
+   void *client_data;
+};
+
+struct pvr_renderpass_hw_map {
+   uint32_t render;
+   uint32_t subpass;
+};
+
+struct pvr_renderpass_hwsetup {
+   /* Number of renders. */
+   uint32_t render_count;
+
+   /* Description of each render. */
+   struct pvr_renderpass_hwsetup_render *renders;
+
+   /* Maps indices from pvr_render_pass::subpasses to the
+    * pvr_renderpass_hwsetup_render/pvr_renderpass_hwsetup_subpass relative to
+    * that render where the subpass is scheduled.
+    */
+   struct pvr_renderpass_hw_map *subpass_map;
+};
+
+struct pvr_renderpass_hwsetup *
+pvr_create_renderpass_hwsetup(struct pvr_device *device,
+                              struct pvr_render_pass *pass,
+                              bool disable_merge);
+void pvr_destroy_renderpass_hwsetup(struct pvr_device *device,
+                                    struct pvr_renderpass_hwsetup *hw_setup);
+
+#endif /* PVR_HW_PASS_H */
diff --git a/src/imagination/vulkan/pvr_image.c b/src/imagination/vulkan/pvr_image.c
new file mode 100644 (file)
index 0000000..4be62a1
--- /dev/null
@@ -0,0 +1,375 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdint.h>
+
+#include "pvr_csb.h"
+#include "pvr_device_info.h"
+#include "pvr_formats.h"
+#include "pvr_private.h"
+#include "pvr_tex_state.h"
+#include "util/macros.h"
+#include "util/u_math.h"
+#include "vk_format.h"
+#include "vk_image.h"
+#include "vk_log.h"
+#include "vk_util.h"
+#include "wsi_common.h"
+
+static void pvr_image_init_memlayout(struct pvr_image *image)
+{
+   switch (image->vk.tiling) {
+   default:
+      unreachable("bad VkImageTiling");
+   case VK_IMAGE_TILING_OPTIMAL:
+      if (image->vk.wsi_legacy_scanout)
+         image->memlayout = PVR_MEMLAYOUT_LINEAR;
+      else if (image->vk.image_type == VK_IMAGE_TYPE_3D)
+         image->memlayout = PVR_MEMLAYOUT_3DTWIDDLED;
+      else
+         image->memlayout = PVR_MEMLAYOUT_TWIDDLED;
+      break;
+   case VK_IMAGE_TILING_LINEAR:
+      image->memlayout = PVR_MEMLAYOUT_LINEAR;
+      break;
+   }
+}
+
+static void pvr_image_init_physical_extent(struct pvr_image *image)
+{
+   assert(image->memlayout != PVR_MEMLAYOUT_UNDEFINED);
+
+   /* clang-format off */
+   if (image->vk.mip_levels > 1 ||
+      image->memlayout == PVR_MEMLAYOUT_TWIDDLED ||
+      image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED) {
+      /* clang-format on */
+      image->physical_extent.width =
+         util_next_power_of_two(image->vk.extent.width);
+      image->physical_extent.height =
+         util_next_power_of_two(image->vk.extent.height);
+      image->physical_extent.depth =
+         util_next_power_of_two(image->vk.extent.depth);
+   } else {
+      assert(image->memlayout == PVR_MEMLAYOUT_LINEAR);
+      image->physical_extent = image->vk.extent;
+   }
+}
+
+static void pvr_image_setup_mip_levels(struct pvr_image *image)
+{
+   const uint32_t extent_alignment =
+      image->vk.image_type == VK_IMAGE_TYPE_3D ? 4 : 1;
+   const unsigned int cpp = vk_format_get_blocksize(image->vk.format);
+
+   /* Mip-mapped textures that are non-dword aligned need dword-aligned levels
+    * so they can be TQd from.
+    */
+   const uint32_t level_alignment = image->vk.mip_levels > 1 ? 4 : 1;
+
+   assert(image->vk.mip_levels <= ARRAY_SIZE(image->mip_levels));
+
+   image->layer_size = 0;
+
+   for (uint32_t i = 0; i < image->vk.mip_levels; i++) {
+      const uint32_t height = u_minify(image->physical_extent.height, i);
+      const uint32_t width = u_minify(image->physical_extent.width, i);
+      const uint32_t depth = u_minify(image->physical_extent.depth, i);
+      struct pvr_mip_level *mip_level = &image->mip_levels[i];
+
+      mip_level->pitch = cpp * ALIGN(width, extent_alignment);
+      mip_level->height_pitch = ALIGN(height, extent_alignment);
+      mip_level->size = image->vk.samples * mip_level->pitch *
+                        mip_level->height_pitch *
+                        ALIGN(depth, extent_alignment);
+      mip_level->size = ALIGN(mip_level->size, level_alignment);
+      mip_level->offset = image->layer_size;
+
+      image->layer_size += mip_level->size;
+   }
+
+   /* TODO: It might be useful to store the alignment in the image so it can be
+    * checked (via an assert?) when setting
+    * RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN, assuming this is where the
+    * requirement comes from.
+    */
+   if (image->vk.array_layers > 1)
+      image->layer_size = ALIGN(image->layer_size, image->alignment);
+
+   image->size = image->layer_size * image->vk.array_layers;
+}
+
+VkResult pvr_CreateImage(VkDevice _device,
+                         const VkImageCreateInfo *pCreateInfo,
+                         const VkAllocationCallbacks *pAllocator,
+                         VkImage *pImage)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   struct pvr_image *image;
+
+   pvr_finishme("Review whether all inputs are handled\n");
+
+   image =
+      vk_image_create(&device->vk, pCreateInfo, pAllocator, sizeof(*image));
+   if (!image)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   /* All images aligned to 4k, in case of arrays/CEM.
+    * Refer: pvr_GetImageMemoryRequirements for further details.
+    */
+   image->alignment = 4096U;
+
+   /* Initialize the image using the saved information from pCreateInfo */
+   pvr_image_init_memlayout(image);
+   pvr_image_init_physical_extent(image);
+   pvr_image_setup_mip_levels(image);
+
+   *pImage = pvr_image_to_handle(image);
+
+   return VK_SUCCESS;
+}
+
+void pvr_DestroyImage(VkDevice _device,
+                      VkImage _image,
+                      const VkAllocationCallbacks *pAllocator)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_image, image, _image);
+
+   if (!image)
+      return;
+
+   pvr_unbind_memory(device, image->vma);
+   vk_image_destroy(&device->vk, pAllocator, &image->vk);
+}
+
+/* clang-format off */
+/* Consider a 4 page buffer object.
+ *   _________________________________________
+ *  |         |          |         |          |
+ *  |_________|__________|_________|__________|
+ *                  |
+ *                  \__ offset (0.5 page size)
+ *
+ *                  |___size(2 pages)____|
+ *
+ *            |__VMA size required (3 pages)__|
+ *
+ *                  |
+ *                  \__ returned dev_addr = vma + offset % page_size
+ *
+ *   VMA size = align(size + offset % page_size, page_size);
+ *
+ *   Note: the above handling is currently divided between generic
+ *   driver code and winsys layer. Given are the details of how this is
+ *   being handled.
+ *   * As winsys vma allocation interface does not have offset information,
+ *     it can not calculate the extra size needed to adjust for the unaligned
+ *     offset. So generic code is responsible for allocating a VMA that has
+ *     extra space to deal with the above scenario.
+ *   * Remaining work of mapping the vma to bo is done by vma_map interface,
+ *     as it contains offset information, we don't need to do any adjustments
+ *     in the generic code for this part.
+ *
+ *  TODO: Look into merging heap_alloc and vma_map into single interface.
+ */
+/* clang-format on */
+
+VkResult pvr_BindImageMemory2(VkDevice _device,
+                              uint32_t bindInfoCount,
+                              const VkBindImageMemoryInfo *pBindInfos)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   uint32_t i;
+
+   for (i = 0; i < bindInfoCount; i++) {
+      PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory);
+      PVR_FROM_HANDLE(pvr_image, image, pBindInfos[i].image);
+
+      VkResult result = pvr_bind_memory(device,
+                                        mem,
+                                        pBindInfos[i].memoryOffset,
+                                        image->size,
+                                        image->alignment,
+                                        &image->vma,
+                                        &image->dev_addr);
+      if (result != VK_SUCCESS) {
+         while (i--) {
+            PVR_FROM_HANDLE(pvr_image, image, pBindInfos[i].image);
+
+            pvr_unbind_memory(device, image->vma);
+         }
+
+         return result;
+      }
+   }
+
+   return VK_SUCCESS;
+}
+
+void pvr_GetImageSubresourceLayout(VkDevice device,
+                                   VkImage _image,
+                                   const VkImageSubresource *subresource,
+                                   VkSubresourceLayout *layout)
+{
+   PVR_FROM_HANDLE(pvr_image, image, _image);
+   const struct pvr_mip_level *mip_level =
+      &image->mip_levels[subresource->mipLevel];
+
+   pvr_assert(subresource->mipLevel < image->vk.mip_levels);
+   pvr_assert(subresource->arrayLayer < image->vk.array_layers);
+
+   layout->offset =
+      subresource->arrayLayer * image->layer_size + mip_level->offset;
+   layout->rowPitch = mip_level->pitch;
+   layout->depthPitch = mip_level->pitch * mip_level->height_pitch;
+   layout->arrayPitch = image->layer_size;
+   layout->size = mip_level->size;
+}
+
+VkResult pvr_CreateImageView(VkDevice _device,
+                             const VkImageViewCreateInfo *pCreateInfo,
+                             const VkAllocationCallbacks *pAllocator,
+                             VkImageView *pView)
+{
+   PVR_FROM_HANDLE(pvr_image, image, pCreateInfo->image);
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   struct pvr_texture_state_info info;
+   unsigned char input_swizzle[4];
+   const uint8_t *format_swizzle;
+   struct pvr_image_view *iview;
+   VkResult result;
+
+   iview = vk_image_view_create(&device->vk,
+                                pCreateInfo,
+                                pAllocator,
+                                sizeof(*iview));
+   if (!iview)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   iview->image = image;
+
+   info.type = iview->vk.view_type;
+   info.base_level = iview->vk.base_mip_level;
+   info.mip_levels = iview->vk.level_count;
+   info.extent = image->vk.extent;
+   info.is_cube = (info.type == VK_IMAGE_VIEW_TYPE_CUBE ||
+                   info.type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY);
+   info.array_size = iview->vk.layer_count;
+   info.offset = iview->vk.base_array_layer * image->layer_size +
+                 image->mip_levels[info.base_level].offset;
+   info.mipmaps_present = (image->vk.mip_levels > 1) ? true : false;
+   info.stride = image->physical_extent.width - 1;
+   info.tex_state_type = PVR_TEXTURE_STATE_SAMPLE;
+   info.mem_layout = image->memlayout;
+   info.flags = 0;
+   info.sample_count = image->vk.samples;
+   info.addr = image->dev_addr;
+
+   /* TODO: if ERN_46863 is supported, Depth and stencil are sampled separately
+    * from images with combined depth+stencil. Add logic here to handle it.
+    */
+   info.format = iview->vk.format;
+
+   vk_component_mapping_to_pipe_swizzle(iview->vk.swizzle, input_swizzle);
+   format_swizzle = pvr_get_format_swizzle(info.format);
+   util_format_compose_swizzles(format_swizzle, input_swizzle, info.swizzle);
+
+   result = pvr_pack_tex_state(device,
+                               &info,
+                               iview->texture_state[info.tex_state_type]);
+   if (result != VK_SUCCESS)
+      goto err_vk_image_view_destroy;
+
+   /* Create an additional texture state for cube type if storage
+    * usage flat is set.
+    */
+   if (info.is_cube && image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT) {
+      info.tex_state_type = PVR_TEXTURE_STATE_STORAGE;
+      result = pvr_pack_tex_state(device,
+                                  &info,
+                                  iview->texture_state[info.tex_state_type]);
+      if (result != VK_SUCCESS)
+         goto err_vk_image_view_destroy;
+   }
+
+   /* Attachment state is created as if the mipmaps are not supported, so the
+    * baselevel is set to zero and num_mip_levels is set to 1. Which gives an
+    * impression that this is the only level in the image. This also requires
+    * that width, height and depth be adjusted as well. Given iview->vk.extent
+    * is already adjusted for base mip map level we use it here.
+    */
+   /* TODO: Investigate and document the reason for above approach. */
+   info.extent = iview->vk.extent;
+
+   info.mip_levels = 1;
+   info.mipmaps_present = false;
+   info.stride = u_minify(image->physical_extent.width, info.base_level) - 1;
+   info.base_level = 0;
+   info.tex_state_type = PVR_TEXTURE_STATE_ATTACHMENT;
+
+   result = pvr_pack_tex_state(device,
+                               &info,
+                               iview->texture_state[info.tex_state_type]);
+   if (result != VK_SUCCESS)
+      goto err_vk_image_view_destroy;
+
+   *pView = pvr_image_view_to_handle(iview);
+
+   return VK_SUCCESS;
+
+err_vk_image_view_destroy:
+   vk_image_view_destroy(&device->vk, pAllocator, &iview->vk);
+
+   return result;
+}
+
+void pvr_DestroyImageView(VkDevice _device,
+                          VkImageView _iview,
+                          const VkAllocationCallbacks *pAllocator)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_image_view, iview, _iview);
+
+   if (!iview)
+      return;
+
+   vk_image_view_destroy(&device->vk, pAllocator, &iview->vk);
+}
+
+VkResult pvr_CreateBufferView(VkDevice _device,
+                              const VkBufferViewCreateInfo *pCreateInfo,
+                              const VkAllocationCallbacks *pAllocator,
+                              VkBufferView *pView)
+{
+   assert(!"Unimplemented");
+   return VK_SUCCESS;
+}
+
+void pvr_DestroyBufferView(VkDevice _device,
+                           VkBufferView bufferView,
+                           const VkAllocationCallbacks *pAllocator)
+{
+   assert(!"Unimplemented");
+}
diff --git a/src/imagination/vulkan/pvr_job_common.c b/src/imagination/vulkan/pvr_job_common.c
new file mode 100644 (file)
index 0000000..747434e
--- /dev/null
@@ -0,0 +1,487 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_device_info.h"
+#include "pvr_job_common.h"
+#include "pvr_private.h"
+#include "util/macros.h"
+#include "util/u_math.h"
+#include "vk_alloc.h"
+#include "vk_format.h"
+#include "vk_object.h"
+
+/* clang-format off */
+static enum PVRX(PBESTATE_SWIZ)
+pvr_get_pbe_hw_swizzle(VkComponentSwizzle comp, enum pipe_swizzle swz)
+/* clang-format on */
+{
+   switch (swz) {
+   case PIPE_SWIZZLE_0:
+      return ROGUE_PBESTATE_SWIZ_ZERO;
+   case PIPE_SWIZZLE_1:
+      return ROGUE_PBESTATE_SWIZ_ONE;
+   case PIPE_SWIZZLE_X:
+      return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN0;
+   case PIPE_SWIZZLE_Y:
+      return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN1;
+   case PIPE_SWIZZLE_Z:
+      return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN2;
+   case PIPE_SWIZZLE_W:
+      return ROGUE_PBESTATE_SWIZ_SOURCE_CHAN3;
+   case PIPE_SWIZZLE_NONE:
+      if (comp == VK_COMPONENT_SWIZZLE_A)
+         return ROGUE_PBESTATE_SWIZ_ONE;
+      else
+         return ROGUE_PBESTATE_SWIZ_ZERO;
+   default:
+      unreachable("Unknown enum pipe_swizzle");
+   };
+}
+
+void pvr_pbe_get_src_format_and_gamma(VkFormat vk_format,
+                                      enum pvr_pbe_gamma default_gamma,
+                                      bool with_packed_usc_channel,
+                                      uint32_t *const src_format_out,
+                                      enum pvr_pbe_gamma *const gamma_out)
+{
+   uint32_t chan_0_width = vk_format_get_channel_width(vk_format, 0);
+
+   *gamma_out = default_gamma;
+
+   if (vk_format_has_32bit_component(vk_format) ||
+       vk_format_is_pure_integer(vk_format)) {
+      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
+   } else if (vk_format_is_float(vk_format)) {
+      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
+   } else if (vk_format_is_srgb(vk_format)) {
+      *gamma_out = PVR_PBE_GAMMA_ENABLED;
+
+      /* F16 source for gamma'd formats. */
+      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
+   } else if (vk_format_has_depth(vk_format) &&
+              vk_format_get_component_size_in_bits(vk_format,
+                                                   UTIL_FORMAT_COLORSPACE_ZS,
+                                                   0) > 16) {
+      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
+   } else if (vk_format_has_stencil(vk_format) &&
+              vk_format_get_component_size_in_bits(vk_format,
+                                                   UTIL_FORMAT_COLORSPACE_ZS,
+                                                   1) > 0) {
+      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
+   } else if (chan_0_width > 16) {
+      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
+   } else if (chan_0_width > 8) {
+      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
+   } else if (!with_packed_usc_channel) {
+      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_F16_PER_CHANNEL);
+   } else {
+      *src_format_out = PVRX(PBESTATE_SOURCE_FORMAT_8_PER_CHANNEL);
+   }
+}
+
+static void pvr_pbe_get_src_pos(struct pvr_device *device,
+                                enum pvr_pbe_source_start_pos source_start,
+                                uint32_t *const src_pos_out,
+                                bool *const src_pos_offset_128_out)
+{
+   *src_pos_offset_128_out = false;
+
+   switch (source_start) {
+   case PVR_PBE_STARTPOS_BIT32:
+      *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT32);
+      break;
+
+   case PVR_PBE_STARTPOS_BIT64:
+      *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT64);
+      break;
+
+   case PVR_PBE_STARTPOS_BIT96:
+      *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT96);
+      break;
+
+   case PVR_PBE_STARTPOS_BIT0:
+   default:
+      if (PVR_HAS_FEATURE(&device->pdevice->dev_info, eight_output_registers)) {
+         switch (source_start) {
+         case PVR_PBE_STARTPOS_BIT128:
+            *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0);
+            *src_pos_offset_128_out = true;
+            break;
+
+         case PVR_PBE_STARTPOS_BIT160:
+            *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT32);
+            *src_pos_offset_128_out = true;
+            break;
+
+         case PVR_PBE_STARTPOS_BIT192:
+            *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT64);
+            *src_pos_offset_128_out = true;
+            break;
+
+         case PVR_PBE_STARTPOS_BIT224:
+            *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT96);
+            *src_pos_offset_128_out = true;
+            break;
+
+         default:
+            *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0);
+            break;
+         }
+      } else {
+         *src_pos_out = PVRX(PBESTATE_SOURCE_POS_START_BIT0);
+      }
+      break;
+   }
+}
+
+void pvr_pbe_pack_state(
+   struct pvr_device *device,
+   const struct pvr_pbe_surf_params *surface_params,
+   const struct pvr_pbe_render_params *render_params,
+   uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
+   uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS])
+{
+   /* This function needs updating if the value of
+    * ROGUE_NUM_PBESTATE_STATE_WORDS changes, so check that it's the expected
+    * value.
+    */
+   STATIC_ASSERT(ROGUE_NUM_PBESTATE_STATE_WORDS == 2);
+
+   /* This function needs updating if the value of ROGUE_NUM_PBESTATE_REG_WORDS
+    * changes, so check that it's the expected value.
+    */
+   STATIC_ASSERT(ROGUE_NUM_PBESTATE_REG_WORDS == 3);
+
+   pbe_reg_words[2] = 0;
+
+   if (surface_params->z_only_render) {
+      pbe_cs_words[0] = 0;
+
+      pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) {
+         state.emptytile = true;
+      }
+
+      pbe_reg_words[0] = 0;
+      pbe_reg_words[1] = 0;
+
+      return;
+   }
+
+   pvr_csb_pack (&pbe_cs_words[0], PBESTATE_STATE_WORD0, state) {
+      state.address_low = surface_params->addr;
+   }
+
+   pvr_csb_pack (&pbe_cs_words[1], PBESTATE_STATE_WORD1, state) {
+      state.address_high = surface_params->addr;
+
+      state.source_format = surface_params->source_format;
+
+      pvr_pbe_get_src_pos(device,
+                          render_params->source_start,
+                          &state.source_pos,
+                          &state.source_pos_offset_128);
+
+      /* MRT index (Use 0 for a single render target)/ */
+      state.mrt_index = render_params->mrt_index;
+
+      /* Normalization flag based on output format. */
+      state.norm = surface_params->is_normalized;
+
+      state.packmode = surface_params->pbe_packmode;
+   }
+
+   pvr_csb_pack (&pbe_reg_words[0], PBESTATE_REG_WORD0, reg) {
+      reg.tilerelative = true;
+
+      switch (surface_params->mem_layout) {
+      case PVR_MEMLAYOUT_TWIDDLED:
+         reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_2D);
+         break;
+
+      case PVR_MEMLAYOUT_3DTWIDDLED:
+         reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_TWIDDLE_3D);
+         break;
+
+      case PVR_MEMLAYOUT_LINEAR:
+      default:
+         reg.memlayout = PVRX(PBESTATE_MEMLAYOUT_LINEAR);
+         break;
+      }
+
+      /* FIXME: Remove rotation and y_flip hardcoding if needed. */
+      reg.rotation = PVRX(PBESTATE_ROTATION_TYPE_0_DEG);
+      reg.y_flip = false;
+
+      /* Note: Due to gamma being overridden above, anything other than
+       * ENABLED/NONE is ignored.
+       */
+      if (surface_params->gamma == PVR_PBE_GAMMA_ENABLED) {
+         reg.gamma = true;
+
+         if (surface_params->nr_components == 2)
+            reg.twocomp_gamma =
+               PVRX(PBESTATE_TWOCOMP_GAMMA_GAMMA_BOTH_CHANNELS);
+      }
+
+      reg.linestride = (surface_params->stride - 1) /
+                       PVRX(PBESTATE_REG_WORD0_LINESTRIDE_UNIT_SIZE);
+      reg.minclip_x = render_params->min_x_clip;
+
+      reg.swiz_chan0 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_R,
+                                              surface_params->swizzle[0]);
+      reg.swiz_chan1 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_G,
+                                              surface_params->swizzle[1]);
+      reg.swiz_chan2 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_B,
+                                              surface_params->swizzle[2]);
+      reg.swiz_chan3 = pvr_get_pbe_hw_swizzle(VK_COMPONENT_SWIZZLE_A,
+                                              surface_params->swizzle[3]);
+
+      if (surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
+         reg.size_z = util_logbase2_ceil(surface_params->depth);
+
+      reg.downscale = surface_params->down_scale;
+   }
+
+   pvr_csb_pack (&pbe_reg_words[1], PBESTATE_REG_WORD1, reg) {
+      if (surface_params->mem_layout == PVR_MEMLAYOUT_TWIDDLED ||
+          surface_params->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
+         reg.size_x = util_logbase2_ceil(surface_params->width);
+         reg.size_y = util_logbase2_ceil(surface_params->height);
+      }
+
+      reg.minclip_y = render_params->min_y_clip;
+      reg.maxclip_x = render_params->max_x_clip;
+      reg.zslice = render_params->slice;
+      reg.maxclip_y = render_params->max_y_clip;
+   }
+}
+
+/* TODO: Split this into smaller functions to make it easier to follow. When
+ * doing this, it would be nice to have a function that returns
+ * total_tiles_in_flight so that CR_ISP_CTL can be fully packed in
+ * pvr_render_job_ws_fragment_state_init().
+ */
+void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info,
+                               uint32_t msaa_mode,
+                               uint32_t pixel_width,
+                               bool paired_tiles,
+                               uint32_t max_tiles_in_flight,
+                               uint32_t *const isp_ctl_out,
+                               uint32_t *const pixel_ctl_out)
+{
+   uint32_t total_tiles_in_flight = 0;
+   uint32_t usable_partition_size;
+   uint32_t partitions_available;
+   uint32_t usc_min_output_regs;
+   uint32_t max_partitions;
+   uint32_t partition_size;
+   uint32_t max_phantoms;
+   uint32_t tile_size_x;
+   uint32_t tile_size_y;
+   uint32_t isp_samples;
+
+   /* Round up the pixel width to the next allocation granularity. */
+   usc_min_output_regs =
+      PVR_GET_FEATURE_VALUE(dev_info, usc_min_output_registers_per_pix, 0);
+   pixel_width = MAX2(pixel_width, usc_min_output_regs);
+   pixel_width = util_next_power_of_two(pixel_width);
+
+   assert(pixel_width <= rogue_get_max_output_regs_per_pixel(dev_info));
+
+   partition_size = pixel_width;
+
+   isp_samples = PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 1);
+   if (isp_samples == 2) {
+      if (msaa_mode != PVRX(CR_ISP_AA_MODE_TYPE_AA_NONE))
+         partition_size *= 2U;
+   } else if (isp_samples == 4) {
+      if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_4X) ||
+          msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_8X))
+         partition_size *= 4U;
+      else if (msaa_mode == PVRX(CR_ISP_AA_MODE_TYPE_AA_2X))
+         partition_size *= 2U;
+   }
+
+   /* Cores with a tile size of 16x16 don't have quadrant affinity. Hence the
+    * partition size is the same as for a 32x32 tile quadrant (with no MSAA).
+    * When MSAA is enabled, the USC has to process half the tile (16x8 pixels).
+    */
+   tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 0);
+   tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 0);
+
+   /* We only support square tiles. */
+   assert(tile_size_x == tile_size_y);
+
+   if (tile_size_x == 16U) {
+      /* Cores with 16x16 tiles does not use tile quadrants. */
+      partition_size *= tile_size_x * tile_size_y;
+   } else {
+      /* Size of a tile quadrant (in dwords). */
+      partition_size *= (tile_size_x * tile_size_y / 4U);
+   }
+
+   /* Maximum available partition space for partitions of this size. */
+   max_partitions = PVR_GET_FEATURE_VALUE(dev_info, max_partitions, 0);
+   usable_partition_size =
+      MIN2(rogue_get_total_reserved_partition_size(dev_info),
+           partition_size * max_partitions);
+
+   if (PVR_GET_FEATURE_VALUE(dev_info, common_store_size_in_dwords, 0) <
+       (1024 * 4 * 4)) {
+      /* Do not apply the limit for cores with 16x16 tile size (no quadrant
+       * affinity). */
+      if (tile_size_x != 16) {
+         /* This is to counter the extremely limited CS size on some cores.
+          */
+         /* Available partition space is limited to 8 tile quadrants. */
+         usable_partition_size =
+            MIN2((tile_size_x * tile_size_y / 4U) * 8U, usable_partition_size);
+      }
+   }
+
+   /* Ensure that maximum number of partitions in use is not greater
+    * than the total number of partitions available.
+    */
+   partitions_available =
+      MIN2(max_partitions, usable_partition_size / partition_size);
+
+   if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure))
+      max_phantoms = rogue_get_num_phantoms(dev_info);
+   else if (PVR_HAS_FEATURE(dev_info, roguexe))
+      max_phantoms = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0);
+   else
+      max_phantoms = 1;
+
+   for (uint32_t i = 0; i < max_phantoms; i++) {
+      uint32_t usc_tiles_in_flight = partitions_available;
+      uint32_t isp_tiles_in_flight;
+
+      /* Cores with tiles size other than 16x16 use tile quadrants. */
+      if (tile_size_x != 16) {
+         uint32_t num_clusters =
+            PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 0U);
+         usc_tiles_in_flight =
+            (usc_tiles_in_flight * MIN2(4U, num_clusters - (4U * i))) / 4U;
+      }
+
+      assert(usc_tiles_in_flight > 0);
+
+      isp_tiles_in_flight =
+         PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 0);
+      /* Ensure that maximum number of ISP tiles in flight is not greater
+       * than the maximum number of USC tiles in flight.
+       */
+      if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ||
+          PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) !=
+             2) {
+         isp_tiles_in_flight /= rogue_get_num_phantoms(dev_info);
+      }
+
+      isp_tiles_in_flight = MIN2(usc_tiles_in_flight, isp_tiles_in_flight);
+
+      /* Limit the number of tiles in flight if the shaders have
+       * requested a large allocation of local memory.
+       */
+      if (max_tiles_in_flight > 0U) {
+         isp_tiles_in_flight = MIN2(usc_tiles_in_flight, max_tiles_in_flight);
+
+         if (PVR_HAS_FEATURE(dev_info, roguexe)) {
+            if (tile_size_x == 16) {
+               /* The FW infers the tiles in flight value from the
+                * partitions setting.
+                */
+               /* Partitions per tile. */
+               partitions_available = isp_tiles_in_flight;
+            } else {
+               /* Partitions per tile quadrant. */
+               partitions_available = isp_tiles_in_flight * 4U;
+            }
+         }
+      }
+
+      /* Due to limitations of ISP_CTL_PIPE there can only be a difference of
+       * 1 between Phantoms.
+       */
+      if (total_tiles_in_flight > (isp_tiles_in_flight + 1U))
+         total_tiles_in_flight = isp_tiles_in_flight + 1U;
+
+      total_tiles_in_flight += isp_tiles_in_flight;
+   }
+
+   if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) &&
+       PVR_GET_FEATURE_VALUE(dev_info, simple_parameter_format_version, 0) ==
+          2) {
+      /* Limit the ISP tiles in flight to fit into the available USC partition
+       * store.
+       */
+      total_tiles_in_flight = MIN2(total_tiles_in_flight, partitions_available);
+   }
+
+   if (PVR_HAS_FEATURE(dev_info, paired_tiles) && paired_tiles) {
+      total_tiles_in_flight =
+         MIN2(total_tiles_in_flight, partitions_available / 2);
+   }
+
+   pvr_csb_pack (pixel_ctl_out, CR_USC_PIXEL_OUTPUT_CTRL, reg) {
+      if (pixel_width == 1 && usc_min_output_regs == 1) {
+         reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER);
+      } else if (pixel_width == 2) {
+         reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS);
+      } else if (pixel_width == 4) {
+         reg.width = PVRX(CR_PIXEL_WIDTH_4REGISTERS);
+      } else if (pixel_width == 8 &&
+                 PVR_HAS_FEATURE(dev_info, eight_output_registers)) {
+         reg.width = PVRX(CR_PIXEL_WIDTH_8REGISTERS);
+      } else if (usc_min_output_regs == 1) {
+         reg.width = PVRX(CR_PIXEL_WIDTH_1REGISTER);
+      } else {
+         reg.width = PVRX(CR_PIXEL_WIDTH_2REGISTERS);
+      }
+
+      if (PVR_HAS_FEATURE(dev_info, usc_pixel_partition_mask)) {
+         /* Setup the partition mask based on the maximum number of
+          * partitions available.
+          */
+         reg.partition_mask = (1 << max_partitions) - 1;
+      } else {
+         reg.enable_4th_partition = true;
+
+         /* Setup the partition mask based on the number of partitions
+          * available.
+          */
+         reg.partition_mask = (1U << partitions_available) - 1U;
+      }
+   }
+
+   pvr_csb_pack (isp_ctl_out, CR_ISP_CTL, reg) {
+      if (PVR_HAS_FEATURE(dev_info, xt_top_infrastructure))
+         reg.pipe_enable = (2 * total_tiles_in_flight) - 1;
+      else
+         reg.pipe_enable = total_tiles_in_flight - 1;
+   }
+}
diff --git a/src/imagination/vulkan/pvr_job_common.h b/src/imagination/vulkan/pvr_job_common.h
new file mode 100644 (file)
index 0000000..f197002
--- /dev/null
@@ -0,0 +1,151 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_JOB_COMMON_H
+#define PVR_JOB_COMMON_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+
+enum pvr_pbe_gamma {
+   PVR_PBE_GAMMA_NONE,
+   /* For two-channel pack formats. */
+   PVR_PBE_GAMMA_RED,
+   PVR_PBE_GAMMA_REDGREEN,
+   /* For all other pack formats. */
+   PVR_PBE_GAMMA_ENABLED,
+};
+
+enum pvr_pbe_source_start_pos {
+   PVR_PBE_STARTPOS_BIT0,
+   PVR_PBE_STARTPOS_BIT32,
+   PVR_PBE_STARTPOS_BIT64,
+   PVR_PBE_STARTPOS_BIT96,
+   /* The below ones are available if has_eight_output_registers feature is
+    * enabled.
+    */
+   PVR_PBE_STARTPOS_BIT128,
+   PVR_PBE_STARTPOS_BIT160,
+   PVR_PBE_STARTPOS_BIT192,
+   PVR_PBE_STARTPOS_BIT224,
+};
+
+/**
+ * These are parameters specific to the surface being set up and hence can be
+ * typically set up at surface creation time.
+ */
+struct pvr_pbe_surf_params {
+   /* Swizzle for a format can be retrieved using pvr_get_format_swizzle(). */
+   uint8_t swizzle[4];
+   /* is_normalized can be retrieved using vk_format_is_normalized(). */
+   bool is_normalized;
+   /* pbe_packmode can be retrieved using pvr_get_pbe_packmode(). */
+   uint32_t pbe_packmode;
+   /* source_format and gamma can be retrieved using
+    * pvr_pbe_get_src_format_and_gamma().
+    */
+   uint32_t source_format;
+   enum pvr_pbe_gamma gamma;
+   /* nr_components can be retrieved using vk_format_get_nr_components(). */
+   uint32_t nr_components;
+
+   /* When an RT of MRT is packed using less USC outputs, this flag needs to be
+    * setup to true.
+    *
+    * Currently, this flag is only considered when has_usc_f16_sop is enabled.
+    * And it needs to be true when a render target by default should use F16
+    * USC channel but uses U8 channel instead for squeezing into on-chip MRT.
+    *
+    * It is better to make this member with FF_ACCUMFORMAT type or, at least,
+    * describe USC channel size. But for now, only use this flag for
+    * simplicity.
+    */
+
+   pvr_dev_addr_t addr;
+   enum pvr_memlayout mem_layout;
+   uint32_t stride;
+
+   /* Depth size for renders */
+   uint32_t depth;
+
+   /* Pre-rotation dimensions of surface */
+   uint32_t width;
+   uint32_t height;
+
+   bool z_only_render;
+   bool down_scale;
+   uint32_t msaa_mode;
+};
+
+/**
+ * These parameters are generally render-specific and need to be set up at the
+ * time #pvr_pbe_pack_state() is called.
+ */
+struct pvr_pbe_render_params {
+   /* Clipping params are in terms of pixels and are inclusive. */
+   uint32_t min_x_clip;
+   uint32_t max_x_clip;
+
+   uint32_t min_y_clip;
+   uint32_t max_y_clip;
+
+   /* Start position of pixels to be read within 128bit USC output buffer. */
+   enum pvr_pbe_source_start_pos source_start;
+
+   /* 9-bit slice number to be used when memlayout is 3D twiddle. */
+   uint32_t slice;
+
+   /* Index */
+   uint32_t mrt_index;
+};
+
+void pvr_pbe_pack_state(
+   struct pvr_device *device,
+   const struct pvr_pbe_surf_params *surface_params,
+   const struct pvr_pbe_render_params *render_params,
+   uint32_t pbe_cs_words[static const ROGUE_NUM_PBESTATE_STATE_WORDS],
+   uint64_t pbe_reg_words[static const ROGUE_NUM_PBESTATE_REG_WORDS]);
+
+/* Helper to calculate pvr_pbe_surf_params::gamma and
+ * pvr_pbe_surf_params::source_format.
+ */
+void pvr_pbe_get_src_format_and_gamma(VkFormat vk_format,
+                                      enum pvr_pbe_gamma default_gamma,
+                                      bool with_packed_usc_channel,
+                                      uint32_t *const src_format_out,
+                                      enum pvr_pbe_gamma *const gamma_out);
+
+void pvr_setup_tiles_in_flight(const struct pvr_device_info *dev_info,
+                               uint32_t msaa_mode,
+                               uint32_t pixel_width,
+                               bool paired_tiles,
+                               uint32_t max_tiles_in_flight,
+                               uint32_t *const isp_ctl_out,
+                               uint32_t *const pixel_ctl_out);
+
+#endif /* PVR_JOB_COMMON_H */
diff --git a/src/imagination/vulkan/pvr_job_compute.c b/src/imagination/vulkan/pvr_job_compute.c
new file mode 100644 (file)
index 0000000..34b55ca
--- /dev/null
@@ -0,0 +1,101 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_csb.h"
+#include "pvr_job_common.h"
+#include "pvr_job_context.h"
+#include "pvr_job_compute.h"
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+#include "util/macros.h"
+
+static void pvr_compute_job_ws_submit_info_init(
+   struct pvr_compute_ctx *ctx,
+   struct pvr_sub_cmd *sub_cmd,
+   const VkSemaphore *semaphores,
+   uint32_t semaphore_count,
+   uint32_t *stage_flags,
+   struct pvr_winsys_compute_submit_info *submit_info)
+{
+   const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch;
+   uint32_t shared_regs = sub_cmd->compute.num_shared_regs;
+
+   submit_info->frame_num = ctx->device->global_queue_present_count;
+   submit_info->job_num = ctx->device->global_queue_job_count;
+
+   submit_info->semaphores = semaphores;
+   submit_info->semaphore_count = semaphore_count;
+   submit_info->stage_flags = stage_flags;
+
+   /* Other registers are initialized in pvr_sub_cmd_compute_job_init(). */
+   pvr_csb_pack (&submit_info->regs.cdm_resume_pds1,
+                 CR_CDM_CONTEXT_PDS1,
+                 state) {
+      /* Convert the data size from dwords to bytes. */
+      const uint32_t load_program_data_size =
+         ctx_switch->sr[0].pds.load_program.data_size * 4U;
+
+      state.pds_seq_dep = false;
+      state.usc_seq_dep = false;
+      state.target = false;
+      state.unified_size = ctx_switch->sr[0].usc.unified_size;
+      state.common_shared = true;
+      state.common_size =
+         DIV_ROUND_UP(shared_regs << 2,
+                      PVRX(CR_CDM_CONTEXT_PDS1_COMMON_SIZE_UNIT_SIZE));
+      state.temp_size = 0;
+
+      assert(load_program_data_size %
+                PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE) ==
+             0);
+      state.data_size =
+         load_program_data_size / PVRX(CR_CDM_CONTEXT_PDS1_DATA_SIZE_UNIT_SIZE);
+      state.fence = false;
+   }
+}
+
+VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx,
+                                struct pvr_sub_cmd *sub_cmd,
+                                const VkSemaphore *semaphores,
+                                uint32_t semaphore_count,
+                                uint32_t *stage_flags,
+                                struct pvr_winsys_syncobj **const syncobj_out)
+{
+   struct pvr_device *device = ctx->device;
+
+   pvr_compute_job_ws_submit_info_init(ctx,
+                                       sub_cmd,
+                                       semaphores,
+                                       semaphore_count,
+                                       stage_flags,
+                                       &sub_cmd->compute.submit_info);
+
+   return device->ws->ops->compute_submit(ctx->ws_ctx,
+                                          &sub_cmd->compute.submit_info,
+                                          syncobj_out);
+}
diff --git a/src/imagination/vulkan/pvr_job_compute.h b/src/imagination/vulkan/pvr_job_compute.h
new file mode 100644 (file)
index 0000000..b71add8
--- /dev/null
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_JOB_COMPUTE_H
+#define PVR_JOB_COMPUTE_H
+
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+struct pvr_compute_ctx;
+struct pvr_sub_cmd;
+struct pvr_winsys_syncobj;
+
+VkResult pvr_compute_job_submit(struct pvr_compute_ctx *ctx,
+                                struct pvr_sub_cmd *sub_cmd,
+                                const VkSemaphore *semaphores,
+                                uint32_t semaphore_count,
+                                uint32_t *stage_flags,
+                                struct pvr_winsys_syncobj **const syncobj_out);
+
+#endif /* PVR_JOB_COMPUTE_H */
diff --git a/src/imagination/vulkan/pvr_job_context.c b/src/imagination/vulkan/pvr_job_context.c
new file mode 100644 (file)
index 0000000..871a75b
--- /dev/null
@@ -0,0 +1,1183 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_bo.h"
+#include "pvr_csb.h"
+#include "pvr_job_context.h"
+#include "pvr_pds.h"
+#include "pvr_private.h"
+#include "pvr_cdm_load_sr.h"
+#include "pvr_vdm_load_sr.h"
+#include "pvr_vdm_store_sr.h"
+#include "pvr_winsys.h"
+#include "util/macros.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+
+/* TODO: Is there some way to ensure the Vulkan driver doesn't exceed this
+ * value when constructing the control stream?
+ */
+/* The VDM callstack is used by the hardware to implement control stream links
+ * with a return, i.e. sub-control streams/subroutines. This value specifies the
+ * maximum callstack depth.
+ */
+#define PVR_VDM_CALLSTACK_MAX_DEPTH 1U
+
+#define ROGUE_PDS_TASK_PROGRAM_SIZE 256U
+
+static VkResult pvr_ctx_reset_cmd_init(struct pvr_device *device,
+                                       struct pvr_reset_cmd *const reset_cmd)
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+
+   /* The reset framework depends on compute support in the hw. */
+   assert(PVR_HAS_FEATURE(dev_info, compute));
+
+   if (PVR_HAS_QUIRK(dev_info, 51764))
+      pvr_finishme("Missing reset support for brn51764");
+
+   if (PVR_HAS_QUIRK(dev_info, 58839))
+      pvr_finishme("Missing reset support for brn58839");
+
+   return VK_SUCCESS;
+}
+
+static void pvr_ctx_reset_cmd_fini(struct pvr_device *device,
+                                   struct pvr_reset_cmd *reset_cmd)
+
+{
+   /* TODO: reset command cleanup. */
+}
+
+static VkResult pvr_pds_pt_store_program_create_and_upload(
+   struct pvr_device *device,
+   struct pvr_bo *pt_bo,
+   uint32_t pt_bo_size,
+   struct pvr_pds_upload *const pds_upload_out)
+{
+   struct pvr_pds_stream_out_terminate_program program = { 0 };
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+   size_t staging_buffer_size;
+   uint32_t *staging_buffer;
+   uint32_t *data_buffer;
+   uint32_t *code_buffer;
+   VkResult result;
+
+   /* Check the bo size can be converted to dwords without any rounding. */
+   assert(pt_bo_size % 4 == 0);
+
+   program.pds_persistent_temp_size_to_store = pt_bo_size / 4;
+   program.dev_address_for_storing_persistent_temp = pt_bo->vma->dev_addr.addr;
+
+   pvr_pds_generate_stream_out_terminate_program(&program,
+                                                 NULL,
+                                                 PDS_GENERATE_SIZES,
+                                                 dev_info);
+
+   staging_buffer_size = (program.stream_out_terminate_pds_data_size +
+                          program.stream_out_terminate_pds_code_size) *
+                         sizeof(*staging_buffer);
+
+   staging_buffer = vk_zalloc(&device->vk.alloc,
+                              staging_buffer_size,
+                              8,
+                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!staging_buffer)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   data_buffer = staging_buffer;
+   code_buffer =
+      pvr_pds_generate_stream_out_terminate_program(&program,
+                                                    data_buffer,
+                                                    PDS_GENERATE_DATA_SEGMENT,
+                                                    dev_info);
+   pvr_pds_generate_stream_out_terminate_program(&program,
+                                                 code_buffer,
+                                                 PDS_GENERATE_CODE_SEGMENT,
+                                                 dev_info);
+
+   /* This PDS program is passed to the HW via the PPP state words. These only
+    * allow the data segment address to be specified and expect the code
+    * segment to immediately follow. Assume the code alignment is the same as
+    * the data.
+    */
+   result =
+      pvr_gpu_upload_pds(device,
+                         data_buffer,
+                         program.stream_out_terminate_pds_data_size,
+                         PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
+                         code_buffer,
+                         program.stream_out_terminate_pds_code_size,
+                         PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
+                         cache_line_size,
+                         pds_upload_out);
+
+   vk_free(&device->vk.alloc, staging_buffer);
+
+   return result;
+}
+
+static VkResult pvr_pds_pt_resume_program_create_and_upload(
+   struct pvr_device *device,
+   struct pvr_bo *pt_bo,
+   uint32_t pt_bo_size,
+   struct pvr_pds_upload *const pds_upload_out)
+{
+   struct pvr_pds_stream_out_init_program program = { 0 };
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+   size_t staging_buffer_size;
+   uint32_t *staging_buffer;
+   uint32_t *data_buffer;
+   uint32_t *code_buffer;
+   VkResult result;
+
+   /* Check the bo size can be converted to dwords without any rounding. */
+   assert(pt_bo_size % 4 == 0);
+
+   program.num_buffers = 1;
+   program.pds_buffer_data_size[0] = pt_bo_size / 4;
+   program.dev_address_for_buffer_data[0] = pt_bo->vma->dev_addr.addr;
+
+   pvr_pds_generate_stream_out_init_program(&program,
+                                            NULL,
+                                            false,
+                                            PDS_GENERATE_SIZES,
+                                            dev_info);
+
+   staging_buffer_size = (program.stream_out_init_pds_data_size +
+                          program.stream_out_init_pds_code_size) *
+                         sizeof(*staging_buffer);
+
+   staging_buffer = vk_zalloc(&device->vk.alloc,
+                              staging_buffer_size,
+                              8,
+                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!staging_buffer)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   data_buffer = staging_buffer;
+   code_buffer =
+      pvr_pds_generate_stream_out_init_program(&program,
+                                               data_buffer,
+                                               false,
+                                               PDS_GENERATE_DATA_SEGMENT,
+                                               dev_info);
+   pvr_pds_generate_stream_out_init_program(&program,
+                                            code_buffer,
+                                            false,
+                                            PDS_GENERATE_CODE_SEGMENT,
+                                            dev_info);
+
+   /* This PDS program is passed to the HW via the PPP state words. These only
+    * allow the data segment address to be specified and expect the code
+    * segment to immediately follow. Assume the code alignment is the same as
+    * the data.
+    */
+   result =
+      pvr_gpu_upload_pds(device,
+                         data_buffer,
+                         program.stream_out_init_pds_data_size,
+                         PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
+                         code_buffer,
+                         program.stream_out_init_pds_code_size,
+                         PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE),
+                         cache_line_size,
+                         pds_upload_out);
+
+   vk_free(&device->vk.alloc, staging_buffer);
+
+   return result;
+}
+
+static VkResult
+pvr_render_job_pt_programs_setup(struct pvr_device *device,
+                                 struct rogue_pt_programs *pt_programs)
+{
+   VkResult result;
+
+   result = pvr_bo_alloc(device,
+                         device->heaps.pds_heap,
+                         ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
+                         ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_ALIGNMENT,
+                         PVR_BO_ALLOC_FLAG_CPU_ACCESS,
+                         &pt_programs->store_resume_state_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   result = pvr_pds_pt_store_program_create_and_upload(
+      device,
+      pt_programs->store_resume_state_bo,
+      ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
+      &pt_programs->pds_store_program);
+   if (result != VK_SUCCESS)
+      goto err_free_store_resume_state_bo;
+
+   result = pvr_pds_pt_resume_program_create_and_upload(
+      device,
+      pt_programs->store_resume_state_bo,
+      ROGUE_LLS_PDS_PERSISTENT_TEMPS_BUFFER_SIZE,
+      &pt_programs->pds_resume_program);
+   if (result != VK_SUCCESS)
+      goto err_free_pds_store_program;
+
+   return VK_SUCCESS;
+
+err_free_pds_store_program:
+   pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo);
+
+err_free_store_resume_state_bo:
+   pvr_bo_free(device, pt_programs->store_resume_state_bo);
+
+   return result;
+}
+
+static void
+pvr_render_job_pt_programs_cleanup(struct pvr_device *device,
+                                   struct rogue_pt_programs *pt_programs)
+{
+   pvr_bo_free(device, pt_programs->pds_resume_program.pvr_bo);
+   pvr_bo_free(device, pt_programs->pds_store_program.pvr_bo);
+   pvr_bo_free(device, pt_programs->store_resume_state_bo);
+}
+
+static void pvr_pds_ctx_sr_program_setup(
+   bool cc_enable,
+   uint64_t usc_program_upload_offset,
+   uint8_t usc_temps,
+   pvr_dev_addr_t sr_addr,
+   struct pvr_pds_shared_storing_program *const program_out)
+{
+   /* The PDS task is the same for stores and loads. */
+   *program_out = (struct pvr_pds_shared_storing_program){
+               .cc_enable = cc_enable,
+               .doutw_control = {
+                       .dest_store = PDS_UNIFIED_STORE,
+                       .num_const64 = 2,
+                       .doutw_data = {
+                               [0] = sr_addr.addr,
+                               [1] = sr_addr.addr + ROGUE_LLS_SHARED_REGS_RESERVE_SIZE,
+                       },
+                       .last_instruction = false,
+               },
+       };
+
+   pvr_pds_setup_doutu(&program_out->usc_task.usc_task_control,
+                       usc_program_upload_offset,
+                       usc_temps,
+                       PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
+                       false);
+}
+
+/* Note: pvr_pds_compute_ctx_sr_program_create_and_upload() is very similar to
+ * this. If there is a problem here it's likely that the same problem exists
+ * there so don't forget to update the compute function.
+ */
+static VkResult pvr_pds_render_ctx_sr_program_create_and_upload(
+   struct pvr_device *device,
+   uint64_t usc_program_upload_offset,
+   uint8_t usc_temps,
+   pvr_dev_addr_t sr_addr,
+   struct pvr_pds_upload *const pds_upload_out)
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+   const uint32_t pds_data_alignment =
+      PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
+
+   /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data
+    * and code size when using the PDS_GENERATE_SIZES mode.
+    */
+   STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0);
+   uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 };
+   struct pvr_pds_shared_storing_program program;
+   ASSERTED uint32_t *buffer_end;
+   uint32_t code_offset;
+
+   pvr_pds_ctx_sr_program_setup(false,
+                                usc_program_upload_offset,
+                                usc_temps,
+                                sr_addr,
+                                &program);
+
+   pvr_pds_generate_shared_storing_program(&program,
+                                           &staging_buffer[0],
+                                           PDS_GENERATE_DATA_SEGMENT,
+                                           dev_info);
+
+   code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
+
+   buffer_end =
+      pvr_pds_generate_shared_storing_program(&program,
+                                              &staging_buffer[code_offset],
+                                              PDS_GENERATE_CODE_SEGMENT,
+                                              dev_info);
+
+   assert((uint32_t)(buffer_end - staging_buffer) * 4 <
+          ROGUE_PDS_TASK_PROGRAM_SIZE);
+
+   return pvr_gpu_upload_pds(device,
+                             &staging_buffer[0],
+                             program.data_size,
+                             PVRX(VDMCTRL_PDS_STATE1_PDS_DATA_ADDR_ALIGNMENT),
+                             &staging_buffer[code_offset],
+                             program.code_size,
+                             PVRX(VDMCTRL_PDS_STATE2_PDS_CODE_ADDR_ALIGNMENT),
+                             cache_line_size,
+                             pds_upload_out);
+}
+
+/* Note: pvr_pds_render_ctx_sr_program_create_and_upload() is very similar to
+ * this. If there is a problem here it's likely that the same problem exists
+ * there so don't forget to update the render_ctx function.
+ */
+static VkResult pvr_pds_compute_ctx_sr_program_create_and_upload(
+   struct pvr_device *device,
+   bool is_loading_program,
+   uint64_t usc_program_upload_offset,
+   uint8_t usc_temps,
+   pvr_dev_addr_t sr_addr,
+   struct pvr_pds_upload *const pds_upload_out)
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+   const uint32_t pds_data_alignment =
+      PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
+
+   /* FIXME: pvr_pds_generate_shared_storing_program() doesn't return the data
+    * and code size when using the PDS_GENERATE_SIZES mode.
+    */
+   STATIC_ASSERT(ROGUE_PDS_TASK_PROGRAM_SIZE % 4 == 0);
+   uint32_t staging_buffer[ROGUE_PDS_TASK_PROGRAM_SIZE / 4U] = { 0 };
+   struct pvr_pds_shared_storing_program program;
+   uint32_t *buffer_ptr;
+   uint32_t code_offset;
+
+   pvr_pds_ctx_sr_program_setup(PVR_HAS_ERN(dev_info, 35421),
+                                usc_program_upload_offset,
+                                usc_temps,
+                                sr_addr,
+                                &program);
+
+   if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
+      pvr_pds_generate_compute_shared_loading_program(&program,
+                                                      &staging_buffer[0],
+                                                      PDS_GENERATE_DATA_SEGMENT,
+                                                      dev_info);
+   } else {
+      pvr_pds_generate_shared_storing_program(&program,
+                                              &staging_buffer[0],
+                                              PDS_GENERATE_DATA_SEGMENT,
+                                              dev_info);
+   }
+
+   code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
+
+   buffer_ptr =
+      pvr_pds_generate_compute_barrier_conditional(&staging_buffer[code_offset],
+                                                   PDS_GENERATE_CODE_SEGMENT);
+
+   if (is_loading_program && PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
+      buffer_ptr = pvr_pds_generate_compute_shared_loading_program(
+         &program,
+         buffer_ptr,
+         PDS_GENERATE_CODE_SEGMENT,
+         dev_info);
+   } else {
+      buffer_ptr =
+         pvr_pds_generate_shared_storing_program(&program,
+                                                 buffer_ptr,
+                                                 PDS_GENERATE_CODE_SEGMENT,
+                                                 dev_info);
+   }
+
+   assert((uint32_t)(buffer_ptr - staging_buffer) * 4 <
+          ROGUE_PDS_TASK_PROGRAM_SIZE);
+
+   STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT) ==
+                 PVRX(CR_CDM_CONTEXT_LOAD_PDS0_DATA_ADDR_ALIGNMENT));
+
+   STATIC_ASSERT(PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT) ==
+                 PVRX(CR_CDM_CONTEXT_LOAD_PDS0_CODE_ADDR_ALIGNMENT));
+
+   return pvr_gpu_upload_pds(
+      device,
+      &staging_buffer[0],
+      program.data_size,
+      PVRX(CR_CDM_CONTEXT_PDS0_DATA_ADDR_ALIGNMENT),
+      &staging_buffer[code_offset],
+      (uint32_t)(buffer_ptr - &staging_buffer[code_offset]),
+      PVRX(CR_CDM_CONTEXT_PDS0_CODE_ADDR_ALIGNMENT),
+      cache_line_size,
+      pds_upload_out);
+}
+
+enum pvr_ctx_sr_program_target {
+   PVR_CTX_SR_RENDER_TARGET,
+   PVR_CTX_SR_COMPUTE_TARGET,
+};
+
+static VkResult pvr_ctx_sr_programs_setup(struct pvr_device *device,
+                                          enum pvr_ctx_sr_program_target target,
+                                          struct rogue_sr_programs *sr_programs)
+{
+   const uint64_t store_load_state_bo_size =
+      PVRX(LLS_USC_SHARED_REGS_BUFFER_SIZE) +
+      ROGUE_LLS_SHARED_REGS_RESERVE_SIZE;
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+   uint64_t usc_store_program_upload_offset;
+   uint64_t usc_load_program_upload_offset;
+   const uint8_t *usc_load_sr_code;
+   uint32_t usc_load_sr_code_size;
+   VkResult result;
+
+   /* Note that this is being used for both compute and render ctx. There is no
+    * compute equivalent define for the VDMCTRL unit size.
+    */
+   /* 4 blocks (16 dwords / 64 bytes) in USC to prevent fragmentation. */
+   sr_programs->usc.unified_size =
+      DIV_ROUND_UP(64, PVRX(VDMCTRL_PDS_STATE0_USC_UNIFIED_SIZE_UNIT_SIZE));
+
+   result = pvr_bo_alloc(device,
+                         device->heaps.pds_heap,
+                         store_load_state_bo_size,
+                         cache_line_size,
+                         PVR_WINSYS_BO_FLAG_CPU_ACCESS,
+                         &sr_programs->store_load_state_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   /* USC state update: SR state store. */
+
+   assert(sizeof(pvr_vdm_store_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
+
+   result = pvr_gpu_upload_usc(device,
+                               pvr_vdm_store_sr_code,
+                               sizeof(pvr_vdm_store_sr_code),
+                               cache_line_size,
+                               &sr_programs->usc.store_program_bo);
+   if (result != VK_SUCCESS)
+      goto err_free_store_load_state_bo;
+
+   usc_store_program_upload_offset =
+      sr_programs->usc.store_program_bo->vma->dev_addr.addr -
+      device->heaps.usc_heap->base_addr.addr;
+
+   /* USC state update: SR state load. */
+
+   if (target == PVR_CTX_SR_COMPUTE_TARGET && PVR_HAS_QUIRK(dev_info, 62269)) {
+      STATIC_ASSERT(sizeof(pvr_cdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
+
+      usc_load_sr_code = pvr_cdm_load_sr_code;
+      usc_load_sr_code_size = sizeof(pvr_cdm_load_sr_code);
+   } else {
+      STATIC_ASSERT(sizeof(pvr_vdm_load_sr_code) < ROGUE_USC_TASK_PROGRAM_SIZE);
+
+      usc_load_sr_code = pvr_vdm_load_sr_code;
+      usc_load_sr_code_size = sizeof(pvr_vdm_load_sr_code);
+   }
+
+   result = pvr_gpu_upload_usc(device,
+                               usc_load_sr_code,
+                               usc_load_sr_code_size,
+                               cache_line_size,
+                               &sr_programs->usc.load_program_bo);
+   if (result != VK_SUCCESS)
+      goto err_free_usc_store_program_bo;
+
+   usc_load_program_upload_offset =
+      sr_programs->usc.load_program_bo->vma->dev_addr.addr -
+      device->heaps.usc_heap->base_addr.addr;
+
+   /* FIXME: The number of USC temps should be output alongside
+    * pvr_vdm_store_sr_code rather than hard coded.
+    */
+   /* Create and upload the PDS load and store programs. Point them to the
+    * appropriate USC load and store programs.
+    */
+   switch (target) {
+   case PVR_CTX_SR_RENDER_TARGET:
+      /* PDS state update: SR state store. */
+      result = pvr_pds_render_ctx_sr_program_create_and_upload(
+         device,
+         usc_store_program_upload_offset,
+         8,
+         sr_programs->store_load_state_bo->vma->dev_addr,
+         &sr_programs->pds.store_program);
+      if (result != VK_SUCCESS)
+         goto err_free_usc_load_program_bo;
+
+      /* PDS state update: SR state load. */
+      result = pvr_pds_render_ctx_sr_program_create_and_upload(
+         device,
+         usc_load_program_upload_offset,
+         20,
+         sr_programs->store_load_state_bo->vma->dev_addr,
+         &sr_programs->pds.load_program);
+      if (result != VK_SUCCESS)
+         goto err_free_pds_store_program_bo;
+
+      break;
+
+   case PVR_CTX_SR_COMPUTE_TARGET:
+      /* PDS state update: SR state store. */
+      result = pvr_pds_compute_ctx_sr_program_create_and_upload(
+         device,
+         false,
+         usc_store_program_upload_offset,
+         8,
+         sr_programs->store_load_state_bo->vma->dev_addr,
+         &sr_programs->pds.store_program);
+      if (result != VK_SUCCESS)
+         goto err_free_usc_load_program_bo;
+
+      /* PDS state update: SR state load. */
+      result = pvr_pds_compute_ctx_sr_program_create_and_upload(
+         device,
+         true,
+         usc_load_program_upload_offset,
+         20,
+         sr_programs->store_load_state_bo->vma->dev_addr,
+         &sr_programs->pds.load_program);
+      if (result != VK_SUCCESS)
+         goto err_free_pds_store_program_bo;
+
+      break;
+
+   default:
+      unreachable("Invalid target.");
+      break;
+   }
+
+   return VK_SUCCESS;
+
+err_free_pds_store_program_bo:
+   pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo);
+
+err_free_usc_load_program_bo:
+   pvr_bo_free(device, sr_programs->usc.load_program_bo);
+
+err_free_usc_store_program_bo:
+   pvr_bo_free(device, sr_programs->usc.store_program_bo);
+
+err_free_store_load_state_bo:
+   pvr_bo_free(device, sr_programs->store_load_state_bo);
+
+   return VK_SUCCESS;
+}
+
+static void pvr_ctx_sr_programs_cleanup(struct pvr_device *device,
+                                        struct rogue_sr_programs *sr_programs)
+{
+   pvr_bo_free(device, sr_programs->pds.load_program.pvr_bo);
+   pvr_bo_free(device, sr_programs->pds.store_program.pvr_bo);
+   pvr_bo_free(device, sr_programs->usc.load_program_bo);
+   pvr_bo_free(device, sr_programs->usc.store_program_bo);
+   pvr_bo_free(device, sr_programs->store_load_state_bo);
+}
+
+static VkResult
+pvr_render_ctx_switch_programs_setup(struct pvr_device *device,
+                                     struct pvr_render_ctx_programs *programs)
+{
+   VkResult result;
+
+   result = pvr_render_job_pt_programs_setup(device, &programs->pt);
+   if (result != VK_SUCCESS)
+      return result;
+
+   result = pvr_ctx_sr_programs_setup(device,
+                                      PVR_CTX_SR_RENDER_TARGET,
+                                      &programs->sr);
+   if (result != VK_SUCCESS)
+      goto err_pt_programs_cleanup;
+
+   return VK_SUCCESS;
+
+err_pt_programs_cleanup:
+   pvr_render_job_pt_programs_cleanup(device, &programs->pt);
+
+   return result;
+}
+
+static void
+pvr_render_ctx_switch_programs_cleanup(struct pvr_device *device,
+                                       struct pvr_render_ctx_programs *programs)
+{
+   pvr_ctx_sr_programs_cleanup(device, &programs->sr);
+   pvr_render_job_pt_programs_cleanup(device, &programs->pt);
+}
+
+static VkResult pvr_render_ctx_switch_init(struct pvr_device *device,
+                                           struct pvr_render_ctx *ctx)
+{
+   struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch;
+   const uint64_t vdm_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
+                                       PVR_BO_ALLOC_FLAG_CPU_ACCESS;
+   const uint64_t geom_state_bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
+                                        PVR_BO_ALLOC_FLAG_CPU_ACCESS;
+   VkResult result;
+
+   result = pvr_bo_alloc(device,
+                         device->heaps.general_heap,
+                         ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_SIZE,
+                         ROGUE_LLS_VDM_CONTEXT_RESUME_BUFFER_ALIGNMENT,
+                         vdm_state_bo_flags,
+                         &ctx_switch->vdm_state_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   result = pvr_bo_alloc(device,
+                         device->heaps.general_heap,
+                         ROGUE_LLS_TA_STATE_BUFFER_SIZE,
+                         ROGUE_LLS_TA_STATE_BUFFER_ALIGNMENT,
+                         geom_state_bo_flags,
+                         &ctx_switch->geom_state_bo);
+   if (result != VK_SUCCESS)
+      goto err_pvr_bo_free_vdm_state_bo;
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
+      result =
+         pvr_render_ctx_switch_programs_setup(device, &ctx_switch->programs[i]);
+      if (result)
+         goto err_programs_cleanup;
+   }
+
+   return result;
+
+err_programs_cleanup:
+   for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
+      pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]);
+   }
+
+   pvr_bo_free(device, ctx_switch->geom_state_bo);
+
+err_pvr_bo_free_vdm_state_bo:
+   pvr_bo_free(device, ctx_switch->vdm_state_bo);
+
+   return result;
+}
+
+static void pvr_render_ctx_switch_fini(struct pvr_device *device,
+                                       struct pvr_render_ctx *ctx)
+{
+   struct pvr_render_ctx_switch *ctx_switch = &ctx->ctx_switch;
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(ctx_switch->programs); i++) {
+      pvr_render_ctx_switch_programs_cleanup(device, &ctx_switch->programs[i]);
+   }
+
+   pvr_bo_free(device, ctx_switch->geom_state_bo);
+   pvr_bo_free(device, ctx_switch->vdm_state_bo);
+}
+
+static void
+pvr_rogue_get_vdmctrl_pds_state_words(struct pvr_pds_upload *pds_program,
+                                      enum PVRX(VDMCTRL_USC_TARGET) usc_target,
+                                      uint8_t usc_unified_size,
+                                      uint32_t *const state0_out,
+                                      uint32_t *const state1_out)
+{
+   pvr_csb_pack (state0_out, VDMCTRL_PDS_STATE0, state) {
+      /* Convert the data size from dwords to bytes. */
+      const uint32_t pds_data_size = pds_program->data_size * 4;
+
+      state.dm_target = PVRX(VDMCTRL_DM_TARGET_VDM);
+      state.usc_target = usc_target;
+      state.usc_common_size = 0;
+      state.usc_unified_size = usc_unified_size;
+      state.pds_temp_size = 0;
+
+      assert(pds_data_size % PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
+             0);
+      state.pds_data_size =
+         pds_data_size / PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
+   };
+
+   pvr_csb_pack (state1_out, VDMCTRL_PDS_STATE1, state) {
+      state.pds_data_addr.addr = pds_program->data_offset;
+      state.sd_type = PVRX(VDMCTRL_SD_TYPE_PDS);
+      state.sd_next_type = PVRX(VDMCTRL_SD_TYPE_PDS);
+   }
+}
+
+static void
+pvr_rogue_get_geom_state_stream_out_words(struct pvr_pds_upload *pds_program,
+                                          uint32_t *const stream_out1_out,
+                                          uint32_t *const stream_out2_out)
+{
+   pvr_csb_pack (stream_out1_out, TA_STATE_STREAM_OUT1, state) {
+      /* Convert the data size from dwords to bytes. */
+      const uint32_t pds_data_size = pds_program->data_size * 4;
+
+      state.sync = true;
+
+      assert(pds_data_size %
+                PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE) ==
+             0);
+      state.pds_data_size =
+         pds_data_size / PVRX(TA_STATE_STREAM_OUT1_PDS_DATA_SIZE_UNIT_SIZE);
+
+      state.pds_temp_size = 0;
+   }
+
+   pvr_csb_pack (stream_out2_out, TA_STATE_STREAM_OUT2, state) {
+      state.pds_data_addr.addr = pds_program->data_offset;
+   }
+}
+
+static void pvr_render_ctx_ws_static_state_init(
+   struct pvr_render_ctx *ctx,
+   struct pvr_winsys_render_ctx_static_state *static_state)
+{
+   void *dst;
+
+   dst = &static_state->vdm_ctx_state_base_addr;
+   pvr_csb_pack (dst, CR_VDM_CONTEXT_STATE_BASE, base) {
+      base.addr = ctx->ctx_switch.vdm_state_bo->vma->dev_addr;
+   }
+
+   dst = &static_state->geom_ctx_state_base_addr;
+   pvr_csb_pack (dst, CR_TA_CONTEXT_STATE_BASE, base) {
+      base.addr = ctx->ctx_switch.geom_state_bo->vma->dev_addr;
+   }
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.programs); i++) {
+      struct rogue_pt_programs *pt_prog = &ctx->ctx_switch.programs[i].pt;
+      struct rogue_sr_programs *sr_prog = &ctx->ctx_switch.programs[i].sr;
+
+      /* Context store state. */
+      dst = &static_state->geom_state[i].vdm_ctx_store_task0;
+      pvr_csb_pack (dst, CR_VDM_CONTEXT_STORE_TASK0, task0) {
+         pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.store_program,
+                                               PVRX(VDMCTRL_USC_TARGET_ANY),
+                                               sr_prog->usc.unified_size,
+                                               &task0.pds_state0,
+                                               &task0.pds_state1);
+      }
+
+      dst = &static_state->geom_state[i].vdm_ctx_store_task1;
+      pvr_csb_pack (dst, CR_VDM_CONTEXT_STORE_TASK1, task1) {
+         pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) {
+            state.pds_code_addr.addr = sr_prog->pds.store_program.code_offset;
+         }
+      }
+
+      dst = &static_state->geom_state[i].vdm_ctx_store_task2;
+      pvr_csb_pack (dst, CR_VDM_CONTEXT_STORE_TASK2, task2) {
+         pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_store_program,
+                                                   &task2.stream_out1,
+                                                   &task2.stream_out2);
+      }
+
+      /* Context resume state. */
+      dst = &static_state->geom_state[i].vdm_ctx_resume_task0;
+      pvr_csb_pack (dst, CR_VDM_CONTEXT_RESUME_TASK0, task0) {
+         pvr_rogue_get_vdmctrl_pds_state_words(&sr_prog->pds.load_program,
+                                               PVRX(VDMCTRL_USC_TARGET_ALL),
+                                               sr_prog->usc.unified_size,
+                                               &task0.pds_state0,
+                                               &task0.pds_state1);
+      }
+
+      dst = &static_state->geom_state[i].vdm_ctx_resume_task1;
+      pvr_csb_pack (dst, CR_VDM_CONTEXT_RESUME_TASK1, task1) {
+         pvr_csb_pack (&task1.pds_state2, VDMCTRL_PDS_STATE2, state) {
+            state.pds_code_addr.addr = sr_prog->pds.load_program.code_offset;
+         }
+      }
+
+      dst = &static_state->geom_state[i].vdm_ctx_resume_task2;
+      pvr_csb_pack (dst, CR_VDM_CONTEXT_RESUME_TASK2, task2) {
+         pvr_rogue_get_geom_state_stream_out_words(&pt_prog->pds_resume_program,
+                                                   &task2.stream_out1,
+                                                   &task2.stream_out2);
+      }
+   }
+}
+
+static void pvr_render_ctx_ws_create_info_init(
+   struct pvr_render_ctx *ctx,
+   enum pvr_winsys_ctx_priority priority,
+   struct pvr_winsys_render_ctx_create_info *create_info)
+{
+   create_info->priority = priority;
+   create_info->vdm_callstack_addr = ctx->vdm_callstack_bo->vma->dev_addr;
+
+   pvr_render_ctx_ws_static_state_init(ctx, &create_info->static_state);
+}
+
+VkResult pvr_render_ctx_create(struct pvr_device *device,
+                               enum pvr_winsys_ctx_priority priority,
+                               struct pvr_render_ctx **const ctx_out)
+{
+   const uint64_t vdm_callstack_size =
+      sizeof(uint64_t) * PVR_VDM_CALLSTACK_MAX_DEPTH;
+   struct pvr_winsys_render_ctx_create_info create_info;
+   struct pvr_render_ctx *ctx;
+   VkResult result;
+
+   ctx = vk_alloc(&device->vk.alloc,
+                  sizeof(*ctx),
+                  8,
+                  VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!ctx)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   ctx->device = device;
+
+   result = pvr_bo_alloc(device,
+                         device->heaps.general_heap,
+                         vdm_callstack_size,
+                         PVRX(CR_VDM_CALL_STACK_POINTER_ADDR_ALIGNMENT),
+                         0,
+                         &ctx->vdm_callstack_bo);
+   if (result != VK_SUCCESS)
+      goto err_vk_free_ctx;
+
+   result = pvr_render_ctx_switch_init(device, ctx);
+   if (result != VK_SUCCESS)
+      goto err_free_vdm_callstack_bo;
+
+   result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
+   if (result != VK_SUCCESS)
+      goto err_render_ctx_switch_fini;
+
+   /* ctx must be fully initialized by this point since
+    * pvr_render_ctx_ws_create_info_init() depends on this.
+    */
+   pvr_render_ctx_ws_create_info_init(ctx, priority, &create_info);
+
+   result = device->ws->ops->render_ctx_create(device->ws,
+                                               &create_info,
+                                               &ctx->ws_ctx);
+   if (result != VK_SUCCESS)
+      goto err_render_ctx_reset_cmd_fini;
+
+   *ctx_out = ctx;
+
+   return VK_SUCCESS;
+
+err_render_ctx_reset_cmd_fini:
+   pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
+
+err_render_ctx_switch_fini:
+   pvr_render_ctx_switch_fini(device, ctx);
+
+err_free_vdm_callstack_bo:
+   pvr_bo_free(device, ctx->vdm_callstack_bo);
+
+err_vk_free_ctx:
+   vk_free(&device->vk.alloc, ctx);
+
+   return result;
+}
+
+void pvr_render_ctx_destroy(struct pvr_render_ctx *ctx)
+{
+   struct pvr_device *device = ctx->device;
+
+   device->ws->ops->render_ctx_destroy(ctx->ws_ctx);
+
+   pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
+   pvr_render_ctx_switch_fini(device, ctx);
+   pvr_bo_free(device, ctx->vdm_callstack_bo);
+   vk_free(&device->vk.alloc, ctx);
+}
+
+static VkResult pvr_pds_sr_fence_terminate_program_create_and_upload(
+   struct pvr_device *device,
+   struct pvr_pds_upload *const pds_upload_out)
+{
+   const uint32_t pds_data_alignment =
+      PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) / 4U;
+   ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   uint32_t staging_buffer[PVRX(PDS_TASK_PROGRAM_SIZE) >> 2U];
+   struct pvr_pds_fence_program program = { 0 };
+   ASSERTED uint32_t *buffer_end;
+   uint32_t code_offset;
+   uint32_t data_size;
+
+   /* SW_COMPUTE_PDS_BARRIER is not supported with 2 or more phantoms. */
+   assert(!(PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info) &&
+            rogue_get_num_phantoms(dev_info) >= 2));
+
+   pvr_pds_generate_fence_terminate_program(&program,
+                                            staging_buffer,
+                                            PDS_GENERATE_DATA_SEGMENT,
+                                            &device->pdevice->dev_info);
+
+   /* FIXME: pvr_pds_generate_fence_terminate_program() zeros out the data_size
+    * when we generate the code segment. Implement
+    * PDS_GENERATE_CODEDATA_SEGMENTS? Or wait for the pds gen api to change?
+    * This behavior doesn't seem consistent with the rest of the api. For now
+    * we store the size in a variable.
+    */
+   data_size = program.data_size;
+   code_offset = ALIGN_POT(program.data_size, pds_data_alignment);
+
+   buffer_end =
+      pvr_pds_generate_fence_terminate_program(&program,
+                                               &staging_buffer[code_offset],
+                                               PDS_GENERATE_CODE_SEGMENT,
+                                               &device->pdevice->dev_info);
+
+   assert((uint64_t)(buffer_end - staging_buffer) * 4U <
+          ROGUE_PDS_TASK_PROGRAM_SIZE);
+
+   return pvr_gpu_upload_pds(device,
+                             staging_buffer,
+                             data_size,
+                             PVRX(CR_CDM_TERMINATE_PDS_DATA_ADDR_ALIGNMENT),
+                             &staging_buffer[code_offset],
+                             program.code_size,
+                             PVRX(CR_CDM_TERMINATE_PDS_CODE_ADDR_ALIGNMENT),
+                             0,
+                             pds_upload_out);
+}
+
+static void pvr_compute_ctx_ws_static_state_init(
+   const struct pvr_device_info *const dev_info,
+   const struct pvr_compute_ctx *const ctx,
+   struct pvr_winsys_compute_ctx_static_state *const static_state)
+{
+   const struct pvr_compute_ctx_switch *const ctx_switch = &ctx->ctx_switch;
+
+   pvr_csb_pack (&static_state->cdm_ctx_state_base_addr,
+                 CR_CDM_CONTEXT_STATE_BASE,
+                 state) {
+      state.addr = ctx_switch->compute_state_bo->vma->dev_addr;
+   }
+
+   /* CR_CDM_CONTEXT_... use state store program info. */
+
+   pvr_csb_pack (&static_state->cdm_ctx_store_pds0,
+                 CR_CDM_CONTEXT_PDS0,
+                 state) {
+      state.data_addr.addr = ctx_switch->sr[0].pds.store_program.data_offset;
+      state.code_addr.addr = ctx_switch->sr[0].pds.store_program.code_offset;
+   }
+
+   pvr_csb_pack (&static_state->cdm_ctx_store_pds0_b,
+                 CR_CDM_CONTEXT_PDS0,
+                 state) {
+      state.data_addr.addr = ctx_switch->sr[1].pds.store_program.data_offset;
+      state.code_addr.addr = ctx_switch->sr[1].pds.store_program.code_offset;
+   }
+
+   pvr_csb_pack (&static_state->cdm_ctx_store_pds1,
+                 CR_CDM_CONTEXT_PDS1,
+                 state) {
+      /* Convert the data size from dwords to bytes. */
+      const uint32_t store_program_data_size =
+         ctx_switch->sr[0].pds.store_program.data_size * 4U;
+
+      state.pds_seq_dep = true;
+      state.usc_seq_dep = false;
+      state.target = true;
+      state.unified_size = ctx_switch->sr[0].usc.unified_size;
+      state.common_shared = false;
+      state.common_size = 0;
+      state.temp_size = 0;
+
+      assert(store_program_data_size %
+                PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
+             0);
+      state.data_size = store_program_data_size /
+                        PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
+
+      state.fence = true;
+   }
+
+   /* CR_CDM_TERMINATE_... use fence terminate info. */
+
+   pvr_csb_pack (&static_state->cdm_ctx_terminate_pds,
+                 CR_CDM_TERMINATE_PDS,
+                 state) {
+      state.data_addr.addr = ctx_switch->sr_fence_terminate_program.data_offset;
+      state.code_addr.addr = ctx_switch->sr_fence_terminate_program.code_offset;
+   }
+
+   pvr_csb_pack (&static_state->cdm_ctx_terminate_pds1,
+                 CR_CDM_TERMINATE_PDS1,
+                 state) {
+      /* Convert the data size from dwords to bytes. */
+      const uint32_t fence_terminate_program_data_size =
+         ctx_switch->sr_fence_terminate_program.data_size * 4U;
+
+      state.pds_seq_dep = true;
+      state.usc_seq_dep = false;
+      state.target = !PVR_HAS_FEATURE(dev_info, compute_morton_capable);
+      state.unified_size = 0;
+      /* Common store is for shareds -- this will free the partitions. */
+      state.common_shared = true;
+      state.common_size = 0;
+      state.temp_size = 0;
+
+      assert(fence_terminate_program_data_size %
+                PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE) ==
+             0);
+      state.data_size = fence_terminate_program_data_size /
+                        PVRX(VDMCTRL_PDS_STATE0_PDS_DATA_SIZE_UNIT_SIZE);
+      state.fence = true;
+   }
+
+   /* CR_CDM_RESUME_... use state load program info. */
+
+   pvr_csb_pack (&static_state->cdm_ctx_resume_pds0,
+                 CR_CDM_CONTEXT_LOAD_PDS0,
+                 state) {
+      state.data_addr.addr = ctx_switch->sr[0].pds.load_program.data_offset;
+      state.code_addr.addr = ctx_switch->sr[0].pds.load_program.code_offset;
+   }
+
+   pvr_csb_pack (&static_state->cdm_ctx_resume_pds0_b,
+                 CR_CDM_CONTEXT_LOAD_PDS0,
+                 state) {
+      state.data_addr.addr = ctx_switch->sr[1].pds.load_program.data_offset;
+      state.code_addr.addr = ctx_switch->sr[1].pds.load_program.code_offset;
+   }
+}
+
+static void pvr_compute_ctx_ws_create_info_init(
+   const struct pvr_compute_ctx *const ctx,
+   enum pvr_winsys_ctx_priority priority,
+   struct pvr_winsys_compute_ctx_create_info *const create_info)
+{
+   create_info->priority = priority;
+
+   pvr_compute_ctx_ws_static_state_init(&ctx->device->pdevice->dev_info,
+                                        ctx,
+                                        &create_info->static_state);
+}
+
+VkResult pvr_compute_ctx_create(struct pvr_device *const device,
+                                enum pvr_winsys_ctx_priority priority,
+                                struct pvr_compute_ctx **const ctx_out)
+{
+   struct pvr_winsys_compute_ctx_create_info create_info;
+   struct pvr_compute_ctx *ctx;
+   VkResult result;
+
+   ctx = vk_alloc(&device->vk.alloc,
+                  sizeof(*ctx),
+                  8,
+                  VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!ctx)
+      return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   ctx->device = device;
+
+   result = pvr_bo_alloc(
+      device,
+      device->heaps.general_heap,
+      rogue_get_cdm_context_resume_buffer_size(&device->pdevice->dev_info),
+      rogue_get_cdm_context_resume_buffer_alignment(&device->pdevice->dev_info),
+      PVR_WINSYS_BO_FLAG_CPU_ACCESS | PVR_WINSYS_BO_FLAG_GPU_UNCACHED,
+      &ctx->ctx_switch.compute_state_bo);
+   if (result != VK_SUCCESS)
+      goto err_free_ctx;
+
+   /* TODO: Change this so that enabling storage to B doesn't change the array
+    * size. Instead of looping we could unroll this and have the second
+    * programs setup depending on the B enable. Doing it that way would make
+    * things more obvious.
+    */
+   for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); i++) {
+      result = pvr_ctx_sr_programs_setup(device,
+                                         PVR_CTX_SR_COMPUTE_TARGET,
+                                         &ctx->ctx_switch.sr[i]);
+      if (result != VK_SUCCESS) {
+         for (uint32_t j = 0; j < i; j++)
+            pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[j]);
+
+         goto err_free_state_buffer;
+      }
+   }
+
+   result = pvr_pds_sr_fence_terminate_program_create_and_upload(
+      device,
+      &ctx->ctx_switch.sr_fence_terminate_program);
+   if (result != VK_SUCCESS)
+      goto err_free_sr_programs;
+
+   pvr_compute_ctx_ws_create_info_init(ctx, priority, &create_info);
+
+   result = pvr_ctx_reset_cmd_init(device, &ctx->reset_cmd);
+   if (result != VK_SUCCESS)
+      goto err_free_pds_fence_terminate_program;
+
+   result = device->ws->ops->compute_ctx_create(device->ws,
+                                                &create_info,
+                                                &ctx->ws_ctx);
+   if (result != VK_SUCCESS)
+      goto err_fini_reset_cmd;
+
+   *ctx_out = ctx;
+
+   return VK_SUCCESS;
+
+err_fini_reset_cmd:
+   pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
+
+err_free_pds_fence_terminate_program:
+   pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo);
+
+err_free_sr_programs:
+   for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i)
+      pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]);
+
+err_free_state_buffer:
+   pvr_bo_free(device, ctx->ctx_switch.compute_state_bo);
+
+err_free_ctx:
+   vk_free(&device->vk.alloc, ctx);
+
+   return result;
+}
+
+void pvr_compute_ctx_destroy(struct pvr_compute_ctx *const ctx)
+{
+   struct pvr_device *device = ctx->device;
+
+   device->ws->ops->compute_ctx_destroy(ctx->ws_ctx);
+
+   pvr_ctx_reset_cmd_fini(device, &ctx->reset_cmd);
+
+   pvr_bo_free(device, ctx->ctx_switch.sr_fence_terminate_program.pvr_bo);
+   for (uint32_t i = 0; i < ARRAY_SIZE(ctx->ctx_switch.sr); ++i)
+      pvr_ctx_sr_programs_cleanup(device, &ctx->ctx_switch.sr[i]);
+
+   pvr_bo_free(device, ctx->ctx_switch.compute_state_bo);
+
+   vk_free(&device->vk.alloc, ctx);
+}
diff --git a/src/imagination/vulkan/pvr_job_context.h b/src/imagination/vulkan/pvr_job_context.h
new file mode 100644 (file)
index 0000000..0e3f81b
--- /dev/null
@@ -0,0 +1,126 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_JOB_CONTEXT_H
+#define PVR_JOB_CONTEXT_H
+
+#include "pvr_winsys.h"
+#include "pvr_private.h"
+
+/* Support PDS code/data loading/storing to the 'B' shared register state
+ * buffers.
+ */
+#define ROGUE_NUM_SHADER_STATE_BUFFERS 2U
+
+/* TODO: Add reset framework support. */
+struct pvr_reset_cmd {
+};
+
+struct pvr_compute_ctx;
+
+struct rogue_sr_programs {
+   struct pvr_bo *store_load_state_bo;
+
+   struct {
+      uint8_t unified_size;
+
+      struct pvr_bo *store_program_bo;
+
+      struct pvr_bo *load_program_bo;
+   } usc;
+
+   struct {
+      struct pvr_pds_upload store_program;
+      struct pvr_pds_upload load_program;
+   } pds;
+};
+
+struct pvr_render_ctx {
+   struct pvr_device *device;
+
+   struct pvr_winsys_render_ctx *ws_ctx;
+
+   /* Buffer to hold the VDM call stack */
+   struct pvr_bo *vdm_callstack_bo;
+
+   struct pvr_render_ctx_switch {
+      /* Buffer to hold the VDM context resume control stream. */
+      struct pvr_bo *vdm_state_bo;
+
+      struct pvr_bo *geom_state_bo;
+
+      struct pvr_render_ctx_programs {
+         /* Context switch persistent state programs. */
+         struct rogue_pt_programs {
+            /* Buffer used to hold the persistent state. */
+            struct pvr_bo *store_resume_state_bo;
+
+            /* PDS program to store out the persistent state in
+             * 'store_resume_state_bo'.
+             */
+            struct pvr_pds_upload pds_store_program;
+
+            /* PDS program to load in the persistent state in
+             * 'store_resume_state_bo'.
+             */
+            struct pvr_pds_upload pds_resume_program;
+         } pt;
+
+         /* Context switch shared register programs. */
+         struct rogue_sr_programs sr;
+
+      } programs[ROGUE_NUM_SHADER_STATE_BUFFERS];
+   } ctx_switch;
+
+   /* Reset framework. */
+   struct pvr_reset_cmd reset_cmd;
+};
+
+struct pvr_compute_ctx {
+   struct pvr_device *device;
+
+   struct pvr_winsys_compute_ctx *ws_ctx;
+
+   struct pvr_compute_ctx_switch {
+      struct pvr_bo *compute_state_bo;
+
+      struct rogue_sr_programs sr[ROGUE_NUM_SHADER_STATE_BUFFERS];
+
+      struct pvr_pds_upload sr_fence_terminate_program;
+   } ctx_switch;
+
+   /* Reset framework. */
+   struct pvr_reset_cmd reset_cmd;
+};
+
+VkResult pvr_render_ctx_create(struct pvr_device *device,
+                               enum pvr_winsys_ctx_priority priority,
+                               struct pvr_render_ctx **const ctx_out);
+void pvr_render_ctx_destroy(struct pvr_render_ctx *ctx);
+
+VkResult pvr_compute_ctx_create(struct pvr_device *const device,
+                                enum pvr_winsys_ctx_priority priority,
+                                struct pvr_compute_ctx **const ctx_out);
+void pvr_compute_ctx_destroy(struct pvr_compute_ctx *ctx);
+
+#endif /* PVR_JOB_CONTEXT_H */
diff --git a/src/imagination/vulkan/pvr_job_render.c b/src/imagination/vulkan/pvr_job_render.c
new file mode 100644 (file)
index 0000000..8c6b15f
--- /dev/null
@@ -0,0 +1,1595 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_bo.h"
+#include "pvr_csb.h"
+#include "pvr_job_common.h"
+#include "pvr_job_context.h"
+#include "pvr_job_render.h"
+#include "pvr_pds.h"
+#include "pvr_private.h"
+#include "pvr_rogue_fw.h"
+#include "pvr_winsys.h"
+#include "util/compiler.h"
+#include "util/macros.h"
+#include "util/u_math.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+#include "vk_util.h"
+
+#define ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE 16U
+
+/* FIXME: Is there a hardware define we can use instead? */
+/* 1 DWord per PM physical page stored in the free list */
+#define ROGUE_FREE_LIST_ENTRY_SIZE ((uint32_t)sizeof(uint32_t))
+
+/* FIXME: The three defines below, for the number of PC, PD and PT entries in a
+ * 4KB page, come from rgxmmudefs_km.h (meaning they're part of the
+ * auto-generated hwdefs). Should these be defined in rogue_mmu.xml? Keeping in
+ * mind that we probably only need these three values. */
+#define ROGUE_NUM_PC_ENTRIES_PER_PAGE 0x400U
+
+#define ROGUE_NUM_PD_ENTRIES_PER_PAGE 0x200U
+
+#define ROGUE_NUM_PT_ENTRIES_PER_PAGE 0x200U
+
+struct pvr_free_list {
+   struct pvr_device *device;
+
+   uint64_t size;
+
+   struct pvr_bo *bo;
+
+   struct pvr_winsys_free_list *ws_free_list;
+};
+
+/* Macrotile information. */
+struct pvr_rt_mtile_info {
+   uint32_t tile_size_x;
+   uint32_t tile_size_y;
+
+   uint32_t num_tiles_x;
+   uint32_t num_tiles_y;
+
+   uint32_t tiles_per_mtile_x;
+   uint32_t tiles_per_mtile_y;
+
+   uint32_t x_tile_max;
+   uint32_t y_tile_max;
+
+   uint32_t mtiles_x;
+   uint32_t mtiles_y;
+
+   uint32_t mtile_x1;
+   uint32_t mtile_y1;
+   uint32_t mtile_x2;
+   uint32_t mtile_y2;
+   uint32_t mtile_x3;
+   uint32_t mtile_y3;
+
+   uint32_t mtile_stride;
+};
+
+struct pvr_rt_dataset {
+   struct pvr_device *device;
+
+   /* RT dataset information */
+   uint32_t width;
+   uint32_t height;
+   uint32_t samples;
+   uint32_t layers;
+
+   struct pvr_free_list *global_free_list;
+   struct pvr_free_list *local_free_list;
+
+   struct pvr_bo *vheap_rtc_bo;
+   pvr_dev_addr_t vheap_dev_addr;
+   pvr_dev_addr_t rtc_dev_addr;
+
+   struct pvr_bo *tpc_bo;
+   uint64_t tpc_stride;
+   uint64_t tpc_size;
+
+   struct pvr_winsys_rt_dataset *ws_rt_dataset;
+
+   /* RT data information */
+   struct pvr_bo *mta_mlist_bo;
+
+   struct pvr_bo *rgn_headers_bo;
+   uint64_t rgn_headers_stride;
+
+   bool need_frag;
+
+   uint8_t rt_data_idx;
+
+   struct {
+      pvr_dev_addr_t mta_dev_addr;
+      pvr_dev_addr_t mlist_dev_addr;
+      pvr_dev_addr_t rgn_headers_dev_addr;
+   } rt_datas[ROGUE_NUM_RTDATAS];
+};
+
+VkResult pvr_free_list_create(struct pvr_device *device,
+                              uint32_t initial_size,
+                              uint32_t max_size,
+                              uint32_t grow_size,
+                              uint32_t grow_threshold,
+                              struct pvr_free_list *parent_free_list,
+                              struct pvr_free_list **const free_list_out)
+{
+   struct pvr_winsys_free_list *parent_ws_free_list =
+      parent_free_list ? parent_free_list->ws_free_list : NULL;
+   const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
+                             PVR_BO_ALLOC_FLAG_PM_FW_PROTECT;
+   struct pvr_free_list *free_list;
+   uint32_t cache_line_size;
+   uint32_t initial_num_pages;
+   uint32_t grow_num_pages;
+   uint32_t max_num_pages;
+   uint64_t addr_alignment;
+   uint64_t size_alignment;
+   uint64_t size;
+   VkResult result;
+
+   assert((initial_size + grow_size) <= max_size);
+   assert(max_size != 0);
+   assert(grow_threshold <= 100);
+
+   /* Make sure the free list is created with at least a single page. */
+   if (initial_size == 0)
+      initial_size = ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE;
+
+   /* The freelists sizes must respect the PM freelist base address alignment
+    * requirement. As the freelist entries are cached by the SLC, it's also
+    * necessary to ensure the sizes respect the SLC cache line size to avoid
+    * invalid entries appearing in the cache, which would be problematic after
+    * a grow operation, as the SLC entries aren't invalidated. We do this by
+    * making sure the freelist values are appropriately aligned.
+    *
+    * To calculate the alignment, we first take the largest of the freelist
+    * base address alignment and the SLC cache line size. We then divide this
+    * by the freelist entry size to determine the number of freelist entries
+    * required by the PM. Finally, as each entry holds a single PM physical
+    * page, we multiple the number of entries by the page size.
+    *
+    * As an example, if the base address alignment is 16 bytes, the SLC cache
+    * line size is 64 bytes and the freelist entry size is 4 bytes then 16
+    * entries are required, as we take the SLC cacheline size (being the larger
+    * of the two values) and divide this by 4. If the PM page size is 4096
+    * bytes then we end up with an alignment of 65536 bytes.
+    */
+   cache_line_size = rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+
+   addr_alignment =
+      MAX2(ROGUE_BIF_PM_FREELIST_BASE_ADDR_ALIGNSIZE, cache_line_size);
+   size_alignment = (addr_alignment / ROGUE_FREE_LIST_ENTRY_SIZE) *
+                    ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE;
+
+   assert(util_is_power_of_two_nonzero(size_alignment));
+
+   initial_size = align64(initial_size, size_alignment);
+   max_size = align64(max_size, size_alignment);
+   grow_size = align64(grow_size, size_alignment);
+
+   /* Make sure the 'max' size doesn't exceed what the firmware supports and
+    * adjust the other sizes accordingly.
+    */
+   if (max_size > ROGUE_FREE_LIST_MAX_SIZE) {
+      max_size = ROGUE_FREE_LIST_MAX_SIZE;
+      assert(align64(max_size, size_alignment) == max_size);
+   }
+
+   if (initial_size > max_size)
+      initial_size = max_size;
+
+   if (initial_size == max_size)
+      grow_size = 0;
+
+   initial_num_pages = initial_size >> ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
+   max_num_pages = max_size >> ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
+   grow_num_pages = grow_size >> ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
+
+   /* Calculate the size of the buffer needed to store the free list entries
+    * based on the maximum number of pages we can have.
+    */
+   size = max_num_pages * ROGUE_FREE_LIST_ENTRY_SIZE;
+   assert(align64(size, addr_alignment) == size);
+
+   free_list = vk_alloc(&device->vk.alloc,
+                        sizeof(*free_list),
+                        8,
+                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!free_list)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   /* FIXME: The memory is mapped GPU uncached, but this seems to contradict
+    * the comment above about aligning to the SLC cache line size.
+    */
+   result = pvr_bo_alloc(device,
+                         device->heaps.general_heap,
+                         size,
+                         addr_alignment,
+                         bo_flags,
+                         &free_list->bo);
+   if (result != VK_SUCCESS)
+      goto err_vk_free_free_list;
+
+   result = device->ws->ops->free_list_create(device->ws,
+                                              free_list->bo->vma,
+                                              initial_num_pages,
+                                              max_num_pages,
+                                              grow_num_pages,
+                                              grow_threshold,
+                                              parent_ws_free_list,
+                                              &free_list->ws_free_list);
+   if (result != VK_SUCCESS)
+      goto err_pvr_bo_free_bo;
+
+   free_list->device = device;
+   free_list->size = size;
+
+   *free_list_out = free_list;
+
+   return VK_SUCCESS;
+
+err_pvr_bo_free_bo:
+   pvr_bo_free(device, free_list->bo);
+
+err_vk_free_free_list:
+   vk_free(&device->vk.alloc, free_list);
+
+   return result;
+}
+
+void pvr_free_list_destroy(struct pvr_free_list *free_list)
+{
+   struct pvr_device *device = free_list->device;
+
+   device->ws->ops->free_list_destroy(free_list->ws_free_list);
+   pvr_bo_free(device, free_list->bo);
+   vk_free(&device->vk.alloc, free_list);
+}
+
+static inline void pvr_get_samples_in_xy(uint32_t samples,
+                                         uint32_t *const x_out,
+                                         uint32_t *const y_out)
+{
+   switch (samples) {
+   case 1:
+      *x_out = 1;
+      *y_out = 1;
+      break;
+   case 2:
+      *x_out = 1;
+      *y_out = 2;
+      break;
+   case 4:
+      *x_out = 2;
+      *y_out = 2;
+      break;
+   case 8:
+      *x_out = 2;
+      *y_out = 4;
+      break;
+   default:
+      unreachable("Unsupported number of samples");
+   }
+}
+
+static void pvr_rt_mtile_info_init(struct pvr_device *device,
+                                   struct pvr_rt_mtile_info *info,
+                                   uint32_t width,
+                                   uint32_t height,
+                                   uint32_t samples)
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   uint32_t samples_in_x;
+   uint32_t samples_in_y;
+
+   pvr_get_samples_in_xy(samples, &samples_in_x, &samples_in_y);
+
+   info->tile_size_x = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 1);
+   info->tile_size_y = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 1);
+
+   info->num_tiles_x = DIV_ROUND_UP(width, info->tile_size_x);
+   info->num_tiles_y = DIV_ROUND_UP(height, info->tile_size_y);
+
+   rogue_get_num_macrotiles_xy(dev_info, &info->mtiles_x, &info->mtiles_y);
+
+   if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+      assert(PVR_GET_FEATURE_VALUE(dev_info,
+                                   simple_parameter_format_version,
+                                   0) == 2);
+      /* Set up 16 macrotiles with a multiple of 2x2 tiles per macrotile,
+       * which is aligned to a tile group.
+       */
+      info->mtile_x1 = DIV_ROUND_UP(info->num_tiles_x, 8) * 2;
+      info->mtile_y1 = DIV_ROUND_UP(info->num_tiles_y, 8) * 2;
+      info->mtile_x2 = 0;
+      info->mtile_y2 = 0;
+      info->mtile_x3 = 0;
+      info->mtile_y3 = 0;
+      info->x_tile_max = ALIGN_POT(info->num_tiles_x, 2) - 1;
+      info->y_tile_max = ALIGN_POT(info->num_tiles_y, 2) - 1;
+   } else {
+      /* Set up 16 macrotiles with a multiple of 4x4 tiles per macrotile. */
+      info->mtile_x1 = ALIGN_POT(DIV_ROUND_UP(info->num_tiles_x, 4), 4);
+      info->mtile_y1 = ALIGN_POT(DIV_ROUND_UP(info->num_tiles_y, 4), 4);
+      info->mtile_x2 = info->mtile_x1 * 2;
+      info->mtile_y2 = info->mtile_y1 * 2;
+      info->mtile_x3 = info->mtile_x1 * 3;
+      info->mtile_y3 = info->mtile_y1 * 3;
+      info->x_tile_max = info->num_tiles_x - 1;
+      info->y_tile_max = info->num_tiles_y - 1;
+   }
+
+   info->tiles_per_mtile_x = info->mtile_x1 * samples_in_x;
+   info->tiles_per_mtile_y = info->mtile_y1 * samples_in_y;
+
+   info->mtile_stride = info->mtile_x1 * info->mtile_y1;
+}
+
+/* Note that the unit of the return value depends on the GPU. For cores with the
+ * simple_internal_parameter_format feature the returned size is interpreted as
+ * the number of region headers. For cores without this feature its interpreted
+ * as the size in dwords.
+ */
+static uint64_t
+pvr_rt_get_isp_region_size(struct pvr_device *device,
+                           const struct pvr_rt_mtile_info *mtile_info)
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   uint64_t rgn_size =
+      mtile_info->tiles_per_mtile_x * mtile_info->tiles_per_mtile_y;
+
+   if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+      uint32_t version;
+
+      rgn_size *= mtile_info->mtiles_x * mtile_info->mtiles_y;
+
+      if (PVR_FEATURE_VALUE(dev_info,
+                            simple_parameter_format_version,
+                            &version)) {
+         version = 0;
+      }
+
+      if (version == 2) {
+         /* One region header per 2x2 tile group. */
+         rgn_size /= (2U * 2U);
+      }
+   } else {
+      const uint64_t rgn_header_size = rogue_get_region_header_size(dev_info);
+
+      /* Round up to next dword to prevent IPF overrun and convert to bytes.
+       */
+      rgn_size = DIV_ROUND_UP(rgn_size * rgn_header_size, 4);
+   }
+
+   return rgn_size;
+}
+
+static VkResult pvr_rt_vheap_rtc_data_init(struct pvr_device *device,
+                                           struct pvr_rt_dataset *rt_dataset,
+                                           uint32_t layers)
+{
+   const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
+                             PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC;
+   uint64_t vheap_size;
+   uint32_t alignment;
+   uint64_t rtc_size;
+   VkResult result;
+
+   vheap_size = ROGUE_CR_PM_VHEAP_TABLE_SIZE * ROGUE_PM_VHEAP_ENTRY_SIZE;
+
+   if (layers > 1) {
+      uint64_t rtc_entries;
+
+      vheap_size = ALIGN_POT(vheap_size, PVRX(CR_TA_RTC_ADDR_BASE_ALIGNMENT));
+
+      rtc_entries = ROGUE_NUM_TEAC + ROGUE_NUM_TE + ROGUE_NUM_VCE;
+      if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 48545))
+         rtc_entries += ROGUE_NUM_TE;
+
+      rtc_size = rtc_entries * ROGUE_RTC_SIZE_IN_BYTES;
+   } else {
+      rtc_size = 0;
+   }
+
+   alignment = MAX2(PVRX(CR_PM_VHEAP_TABLE_BASE_ADDR_ALIGNMENT),
+                    PVRX(CR_TA_RTC_ADDR_BASE_ALIGNMENT));
+
+   result = pvr_bo_alloc(device,
+                         device->heaps.general_heap,
+                         vheap_size + rtc_size,
+                         alignment,
+                         bo_flags,
+                         &rt_dataset->vheap_rtc_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   rt_dataset->vheap_dev_addr = rt_dataset->vheap_rtc_bo->vma->dev_addr;
+
+   if (rtc_size > 0) {
+      rt_dataset->rtc_dev_addr.addr =
+         rt_dataset->vheap_dev_addr.addr + vheap_size;
+   } else {
+      rt_dataset->rtc_dev_addr = PVR_DEV_ADDR_INVALID;
+   }
+
+   return VK_SUCCESS;
+}
+
+static void pvr_rt_vheap_rtc_data_fini(struct pvr_rt_dataset *rt_dataset)
+{
+   rt_dataset->rtc_dev_addr = PVR_DEV_ADDR_INVALID;
+
+   pvr_bo_free(rt_dataset->device, rt_dataset->vheap_rtc_bo);
+   rt_dataset->vheap_rtc_bo = NULL;
+}
+
+static void
+pvr_rt_get_tail_ptr_stride_size(const struct pvr_device *device,
+                                const struct pvr_rt_mtile_info *mtile_info,
+                                uint32_t layers,
+                                uint64_t *const stride_out,
+                                uint64_t *const size_out)
+{
+   uint32_t max_num_mtiles;
+   uint32_t num_mtiles_x;
+   uint32_t num_mtiles_y;
+   uint32_t version;
+   uint64_t size;
+
+   num_mtiles_x = mtile_info->mtiles_x * mtile_info->tiles_per_mtile_x;
+   num_mtiles_y = mtile_info->mtiles_y * mtile_info->tiles_per_mtile_y;
+
+   max_num_mtiles = MAX2(util_next_power_of_two64(num_mtiles_x),
+                         util_next_power_of_two64(num_mtiles_y));
+
+   size = max_num_mtiles * max_num_mtiles;
+
+   if (PVR_FEATURE_VALUE(&device->pdevice->dev_info,
+                         simple_parameter_format_version,
+                         &version)) {
+      version = 0;
+   }
+
+   if (version == 2) {
+      /* One tail pointer cache entry per 2x2 tile group. */
+      size /= (2U * 2U);
+   }
+
+   size *= ROGUE_TAIL_POINTER_SIZE;
+
+   if (layers > 1) {
+      size = ALIGN_POT(size, ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE);
+
+      *stride_out = size / ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE;
+      *size_out = size * layers;
+   } else {
+      *stride_out = 0;
+      *size_out = size;
+   }
+}
+
+static VkResult pvr_rt_tpc_data_init(struct pvr_device *device,
+                                     struct pvr_rt_dataset *rt_dataset,
+                                     const struct pvr_rt_mtile_info *mtile_info,
+                                     uint32_t layers)
+{
+   const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_GPU_UNCACHED |
+                             PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC;
+   uint64_t tpc_size;
+
+   pvr_rt_get_tail_ptr_stride_size(device,
+                                   mtile_info,
+                                   layers,
+                                   &rt_dataset->tpc_stride,
+                                   &rt_dataset->tpc_size);
+   tpc_size = ALIGN_POT(rt_dataset->tpc_size, ROGUE_TE_TPC_CACHE_LINE_SIZE);
+
+   return pvr_bo_alloc(device,
+                       device->heaps.general_heap,
+                       tpc_size,
+                       PVRX(CR_TE_TPC_ADDR_BASE_ALIGNMENT),
+                       bo_flags,
+                       &rt_dataset->tpc_bo);
+}
+
+static void pvr_rt_tpc_data_fini(struct pvr_rt_dataset *rt_dataset)
+{
+   pvr_bo_free(rt_dataset->device, rt_dataset->tpc_bo);
+   rt_dataset->tpc_bo = NULL;
+}
+
+static uint32_t
+pvr_rt_get_mlist_size(const struct pvr_free_list *global_free_list,
+                      const struct pvr_free_list *local_free_list)
+{
+   uint32_t num_pte_pages;
+   uint32_t num_pde_pages;
+   uint32_t num_pce_pages;
+   uint64_t total_pages;
+   uint32_t mlist_size;
+
+   assert(global_free_list->size + local_free_list->size <=
+          ROGUE_PM_MAX_PB_VIRT_ADDR_SPACE);
+
+   total_pages = (global_free_list->size + local_free_list->size) >>
+                 ROGUE_BIF_PM_PHYSICAL_PAGE_SHIFT;
+
+   /* Calculate the total number of physical pages required to hold the page
+    * table, directory and catalog entries for the freelist pages.
+    */
+   num_pte_pages = DIV_ROUND_UP(total_pages, ROGUE_NUM_PT_ENTRIES_PER_PAGE);
+   num_pde_pages = DIV_ROUND_UP(num_pte_pages, ROGUE_NUM_PD_ENTRIES_PER_PAGE);
+   num_pce_pages = DIV_ROUND_UP(num_pde_pages, ROGUE_NUM_PC_ENTRIES_PER_PAGE);
+
+   /* Calculate the MList size considering the total number of pages in the PB
+    * are shared among all the PM address spaces.
+    */
+   mlist_size = (num_pce_pages + num_pde_pages + num_pte_pages) *
+                ROGUE_NUM_PM_ADDRESS_SPACES * ROGUE_MLIST_ENTRY_STRIDE;
+
+   return ALIGN_POT(mlist_size, ROGUE_BIF_PM_PHYSICAL_PAGE_SIZE);
+}
+
+static void pvr_rt_get_region_headers_stride_size(
+   const struct pvr_device *device,
+   const struct pvr_rt_mtile_info *mtile_info,
+   uint32_t layers,
+   uint64_t *const stride_out,
+   uint64_t *const size_out)
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   const uint32_t rgn_header_size = rogue_get_region_header_size(dev_info);
+   uint32_t rgn_headers_size;
+   uint32_t num_tiles_x;
+   uint32_t num_tiles_y;
+   uint32_t group_size;
+   uint32_t version;
+
+   if (PVR_FEATURE_VALUE(dev_info, simple_parameter_format_version, &version))
+      version = 0;
+
+   group_size = version == 2 ? 2 : 1;
+
+   num_tiles_x = mtile_info->mtiles_x * mtile_info->tiles_per_mtile_x;
+   num_tiles_y = mtile_info->mtiles_y * mtile_info->tiles_per_mtile_y;
+
+   rgn_headers_size =
+      (num_tiles_x / group_size) * (num_tiles_y / group_size) * rgn_header_size;
+
+   if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+      rgn_headers_size =
+         ALIGN_POT(rgn_headers_size, PVRX(CR_TE_PSGREGION_ADDR_BASE_ALIGNMENT));
+   }
+
+   if (layers > 1) {
+      rgn_headers_size =
+         ALIGN_POT(rgn_headers_size, PVRX(CR_TE_PSG_REGION_STRIDE_UNIT_SIZE));
+   }
+
+   *stride_out = rgn_header_size;
+   *size_out = rgn_headers_size * layers;
+}
+
+static VkResult
+pvr_rt_mta_mlist_data_init(struct pvr_device *device,
+                           struct pvr_rt_dataset *rt_dataset,
+                           const struct pvr_free_list *global_free_list,
+                           const struct pvr_free_list *local_free_list,
+                           const struct pvr_rt_mtile_info *mtile_info)
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   const uint32_t mlist_size =
+      pvr_rt_get_mlist_size(global_free_list, local_free_list);
+   uint32_t mta_size = rogue_get_macrotile_array_size(dev_info);
+   const uint32_t num_rt_datas = ARRAY_SIZE(rt_dataset->rt_datas);
+   uint32_t rt_datas_mlist_size;
+   uint32_t rt_datas_mta_size;
+   pvr_dev_addr_t dev_addr;
+   VkResult result;
+
+   /* Allocate memory for macrotile array and Mlist for all RT datas.
+    *
+    * Allocation layout: MTA[0..N] + Mlist alignment padding + Mlist[0..N].
+    *
+    * N is number of RT datas.
+    */
+   rt_datas_mta_size = ALIGN_POT(mta_size * num_rt_datas,
+                                 PVRX(CR_PM_MLIST0_BASE_ADDR_ALIGNMENT));
+   rt_datas_mlist_size = mlist_size * num_rt_datas;
+
+   result = pvr_bo_alloc(device,
+                         device->heaps.general_heap,
+                         rt_datas_mta_size + rt_datas_mlist_size,
+                         PVRX(CR_PM_MTILE_ARRAY_BASE_ADDR_ALIGNMENT),
+                         PVR_BO_ALLOC_FLAG_GPU_UNCACHED,
+                         &rt_dataset->mta_mlist_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   dev_addr = rt_dataset->mta_mlist_bo->vma->dev_addr;
+
+   for (uint32_t i = 0; i < num_rt_datas; i++) {
+      if (mta_size != 0) {
+         rt_dataset->rt_datas[i].mta_dev_addr = dev_addr;
+         dev_addr.addr += mta_size;
+      } else {
+         rt_dataset->rt_datas[i].mta_dev_addr = PVR_DEV_ADDR_INVALID;
+      }
+   }
+
+   dev_addr.addr =
+      rt_dataset->mta_mlist_bo->vma->dev_addr.addr + rt_datas_mta_size;
+
+   for (uint32_t i = 0; i < num_rt_datas; i++) {
+      if (mlist_size != 0) {
+         rt_dataset->rt_datas[i].mlist_dev_addr = dev_addr;
+         dev_addr.addr += mlist_size;
+      } else {
+         rt_dataset->rt_datas[i].mlist_dev_addr = PVR_DEV_ADDR_INVALID;
+      }
+   }
+
+   return VK_SUCCESS;
+}
+
+static void pvr_rt_mta_mlist_data_fini(struct pvr_rt_dataset *rt_dataset)
+{
+   for (uint32_t i = 0; i < ARRAY_SIZE(rt_dataset->rt_datas); i++) {
+      rt_dataset->rt_datas[i].mlist_dev_addr = PVR_DEV_ADDR_INVALID;
+      rt_dataset->rt_datas[i].mta_dev_addr = PVR_DEV_ADDR_INVALID;
+   }
+
+   pvr_bo_free(rt_dataset->device, rt_dataset->mta_mlist_bo);
+   rt_dataset->mta_mlist_bo = NULL;
+}
+
+static VkResult
+pvr_rt_rgn_headers_data_init(struct pvr_device *device,
+                             struct pvr_rt_dataset *rt_dataset,
+                             const struct pvr_rt_mtile_info *mtile_info,
+                             uint32_t layers)
+{
+   const uint32_t num_rt_datas = ARRAY_SIZE(rt_dataset->rt_datas);
+   uint64_t rgn_headers_size;
+   pvr_dev_addr_t dev_addr;
+   VkResult result;
+
+   pvr_rt_get_region_headers_stride_size(device,
+                                         mtile_info,
+                                         layers,
+                                         &rt_dataset->rgn_headers_stride,
+                                         &rgn_headers_size);
+
+   result = pvr_bo_alloc(device,
+                         device->heaps.rgn_hdr_heap,
+                         rgn_headers_size * num_rt_datas,
+                         PVRX(CR_TE_PSGREGION_ADDR_BASE_ALIGNMENT),
+                         PVR_BO_ALLOC_FLAG_GPU_UNCACHED,
+                         &rt_dataset->rgn_headers_bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   dev_addr = rt_dataset->rgn_headers_bo->vma->dev_addr;
+
+   for (uint32_t i = 0; i < num_rt_datas; i++) {
+      rt_dataset->rt_datas[i].rgn_headers_dev_addr = dev_addr;
+      dev_addr.addr += rgn_headers_size;
+   }
+
+   return VK_SUCCESS;
+}
+
+static void pvr_rt_rgn_headers_data_fini(struct pvr_rt_dataset *rt_dataset)
+{
+   for (uint32_t i = 0; i < ARRAY_SIZE(rt_dataset->rt_datas); i++)
+      rt_dataset->rt_datas[i].rgn_headers_dev_addr = PVR_DEV_ADDR_INVALID;
+
+   pvr_bo_free(rt_dataset->device, rt_dataset->rgn_headers_bo);
+   rt_dataset->rgn_headers_bo = NULL;
+}
+
+static VkResult pvr_rt_datas_init(struct pvr_device *device,
+                                  struct pvr_rt_dataset *rt_dataset,
+                                  const struct pvr_free_list *global_free_list,
+                                  const struct pvr_free_list *local_free_list,
+                                  const struct pvr_rt_mtile_info *mtile_info,
+                                  uint32_t layers)
+{
+   VkResult result;
+
+   result = pvr_rt_mta_mlist_data_init(device,
+                                       rt_dataset,
+                                       global_free_list,
+                                       local_free_list,
+                                       mtile_info);
+   if (result != VK_SUCCESS)
+      return result;
+
+   result =
+      pvr_rt_rgn_headers_data_init(device, rt_dataset, mtile_info, layers);
+   if (result != VK_SUCCESS)
+      goto err_pvr_rt_mta_mlist_data_fini;
+
+   return VK_SUCCESS;
+
+err_pvr_rt_mta_mlist_data_fini:
+   pvr_rt_mta_mlist_data_fini(rt_dataset);
+
+   return VK_SUCCESS;
+}
+
+static void pvr_rt_datas_fini(struct pvr_rt_dataset *rt_dataset)
+{
+   pvr_rt_rgn_headers_data_fini(rt_dataset);
+   pvr_rt_mta_mlist_data_fini(rt_dataset);
+}
+
+static uint32_t
+pvr_rogue_get_cr_isp_mtile_size_val(const struct pvr_device_info *dev_info,
+                                    uint32_t samples,
+                                    const struct pvr_rt_mtile_info *mtile_info)
+{
+   uint32_t samples_per_pixel =
+      PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0);
+   uint32_t isp_mtile_size;
+
+   pvr_csb_pack (&isp_mtile_size, CR_ISP_MTILE_SIZE, value) {
+      value.x = mtile_info->mtile_x1;
+      value.y = mtile_info->mtile_y1;
+
+      if (samples_per_pixel == 1) {
+         if (samples >= 4)
+            value.x <<= 1;
+
+         if (samples >= 2)
+            value.y <<= 1;
+      } else if (samples_per_pixel == 2) {
+         if (samples >= 8)
+            value.x <<= 1;
+
+         if (samples >= 4)
+            value.y <<= 1;
+      } else if (samples_per_pixel == 4) {
+         if (samples >= 8)
+            value.y <<= 1;
+      } else {
+         assert(!"Unsupported ISP samples per pixel value");
+      }
+   }
+
+   return isp_mtile_size;
+}
+
+static uint64_t pvr_rogue_get_cr_multisamplectl_val(uint32_t samples,
+                                                    bool y_flip)
+{
+   static const struct {
+      uint8_t x[8];
+      uint8_t y[8];
+   } sample_positions[4] = {
+      /* 1 sample */
+      {
+         .x = { 8 },
+         .y = { 8 },
+      },
+      /* 2 samples */
+      {
+         .x = { 12, 4 },
+         .y = { 12, 4 },
+      },
+      /* 4 samples */
+      {
+         .x = { 6, 14, 2, 10 },
+         .y = { 2, 6, 10, 14 },
+      },
+      /* 8 samples */
+      {
+         .x = { 9, 7, 13, 5, 3, 1, 11, 15 },
+         .y = { 5, 11, 9, 3, 13, 7, 15, 1 },
+      },
+   };
+   uint64_t multisamplectl;
+   uint8_t idx;
+
+   idx = util_fast_log2(samples);
+   assert(idx < ARRAY_SIZE(sample_positions));
+
+   pvr_csb_pack (&multisamplectl, CR_PPP_MULTISAMPLECTL, value) {
+      switch (samples) {
+      case 8:
+         value.msaa_x7 = sample_positions[idx].x[7];
+         value.msaa_x6 = sample_positions[idx].x[6];
+         value.msaa_x5 = sample_positions[idx].x[5];
+         value.msaa_x4 = sample_positions[idx].x[4];
+
+         if (y_flip) {
+            value.msaa_y7 = 16U - sample_positions[idx].y[7];
+            value.msaa_y6 = 16U - sample_positions[idx].y[6];
+            value.msaa_y5 = 16U - sample_positions[idx].y[5];
+            value.msaa_y4 = 16U - sample_positions[idx].y[4];
+         } else {
+            value.msaa_y7 = sample_positions[idx].y[7];
+            value.msaa_y6 = sample_positions[idx].y[6];
+            value.msaa_y5 = sample_positions[idx].y[5];
+            value.msaa_y4 = sample_positions[idx].y[4];
+         }
+
+         FALLTHROUGH;
+      case 4:
+         value.msaa_x3 = sample_positions[idx].x[3];
+         value.msaa_x2 = sample_positions[idx].x[2];
+
+         if (y_flip) {
+            value.msaa_y3 = 16U - sample_positions[idx].y[3];
+            value.msaa_y2 = 16U - sample_positions[idx].y[2];
+         } else {
+            value.msaa_y3 = sample_positions[idx].y[3];
+            value.msaa_y2 = sample_positions[idx].y[2];
+         }
+
+         FALLTHROUGH;
+      case 2:
+         value.msaa_x1 = sample_positions[idx].x[1];
+
+         if (y_flip) {
+            value.msaa_y1 = 16U - sample_positions[idx].y[1];
+         } else {
+            value.msaa_y1 = sample_positions[idx].y[1];
+         }
+
+         FALLTHROUGH;
+      case 1:
+         value.msaa_x0 = sample_positions[idx].x[0];
+
+         if (y_flip) {
+            value.msaa_y0 = 16U - sample_positions[idx].y[0];
+         } else {
+            value.msaa_y0 = sample_positions[idx].y[0];
+         }
+
+         break;
+      default:
+         unreachable("Unsupported number of samples");
+      }
+   }
+
+   return multisamplectl;
+}
+
+static uint32_t
+pvr_rogue_get_cr_te_aa_val(const struct pvr_device_info *dev_info,
+                           uint32_t samples)
+{
+   uint32_t samples_per_pixel =
+      PVR_GET_FEATURE_VALUE(dev_info, isp_samples_per_pixel, 0);
+   uint32_t te_aa;
+
+   pvr_csb_pack (&te_aa, CR_TE_AA, value) {
+      if (samples_per_pixel == 1) {
+         if (samples >= 2)
+            value.y = true;
+         if (samples >= 4)
+            value.x = true;
+      } else if (samples_per_pixel == 2) {
+         if (samples >= 2)
+            value.x2 = true;
+         if (samples >= 4)
+            value.y = true;
+         if (samples >= 8)
+            value.x = true;
+      } else if (samples_per_pixel == 4) {
+         if (samples >= 2)
+            value.x2 = true;
+         if (samples >= 4)
+            value.y2 = true;
+         if (samples >= 8)
+            value.y = true;
+      } else {
+         assert(!"Unsupported ISP samples per pixel value");
+      }
+   }
+
+   return te_aa;
+}
+
+static void pvr_rt_dataset_ws_create_info_init(
+   struct pvr_rt_dataset *rt_dataset,
+   const struct pvr_rt_mtile_info *mtile_info,
+   struct pvr_winsys_rt_dataset_create_info *create_info)
+{
+   struct pvr_device *device = rt_dataset->device;
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+
+   memset(create_info, 0, sizeof(*create_info));
+
+   /* Local freelist. */
+   create_info->local_free_list = rt_dataset->local_free_list->ws_free_list;
+
+   /* ISP register values. */
+   if (PVR_HAS_ERN(dev_info, 42307) &&
+       !(PVR_HAS_FEATURE(dev_info, roguexe) && mtile_info->tile_size_x == 16)) {
+      float value;
+
+      if (rt_dataset->width != 0) {
+         value =
+            ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR / (float)rt_dataset->width;
+         create_info->isp_merge_lower_x = fui(value);
+
+         value =
+            ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR / (float)rt_dataset->width;
+         create_info->isp_merge_upper_x = fui(value);
+      }
+
+      if (rt_dataset->height != 0) {
+         value =
+            ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR / (float)rt_dataset->height;
+         create_info->isp_merge_lower_y = fui(value);
+
+         value =
+            ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR / (float)rt_dataset->height;
+         create_info->isp_merge_upper_y = fui(value);
+      }
+
+      value = ((float)rt_dataset->width * ROGUE_ISP_MERGE_SCALE_FACTOR) /
+              (ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR -
+               ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR);
+      create_info->isp_merge_scale_x = fui(value);
+
+      value = ((float)rt_dataset->height * ROGUE_ISP_MERGE_SCALE_FACTOR) /
+              (ROGUE_ISP_MERGE_UPPER_LIMIT_NUMERATOR -
+               ROGUE_ISP_MERGE_LOWER_LIMIT_NUMERATOR);
+      create_info->isp_merge_scale_y = fui(value);
+   }
+
+   create_info->isp_mtile_size =
+      pvr_rogue_get_cr_isp_mtile_size_val(dev_info,
+                                          rt_dataset->samples,
+                                          mtile_info);
+
+   /* PPP register values. */
+   create_info->ppp_multi_sample_ctl =
+      pvr_rogue_get_cr_multisamplectl_val(rt_dataset->samples, false);
+   create_info->ppp_multi_sample_ctl_y_flipped =
+      pvr_rogue_get_cr_multisamplectl_val(rt_dataset->samples, true);
+
+   pvr_csb_pack (&create_info->ppp_screen, CR_PPP_SCREEN, value) {
+      value.pixxmax = rt_dataset->width - 1;
+      value.pixymax = rt_dataset->height - 1;
+   }
+
+   /* TE register values. */
+   create_info->te_aa =
+      pvr_rogue_get_cr_te_aa_val(dev_info, rt_dataset->samples);
+
+   pvr_csb_pack (&create_info->te_mtile1, CR_TE_MTILE1, value) {
+      value.x1 = mtile_info->mtile_x1;
+      if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+         value.x2 = mtile_info->mtile_x2;
+         value.x3 = mtile_info->mtile_x3;
+      }
+   }
+
+   pvr_csb_pack (&create_info->te_mtile2, CR_TE_MTILE2, value) {
+      value.y1 = mtile_info->mtile_y1;
+      if (!PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+         value.y2 = mtile_info->mtile_y2;
+         value.y3 = mtile_info->mtile_y3;
+      }
+   }
+
+   pvr_csb_pack (&create_info->te_screen, CR_TE_SCREEN, value) {
+      value.xmax = mtile_info->x_tile_max;
+      value.ymax = mtile_info->y_tile_max;
+   }
+
+   /* Allocations and associated information. */
+   create_info->vheap_table_dev_addr = rt_dataset->vheap_dev_addr;
+   create_info->rtc_dev_addr = rt_dataset->rtc_dev_addr;
+
+   create_info->tpc_dev_addr = rt_dataset->tpc_bo->vma->dev_addr;
+   create_info->tpc_stride = rt_dataset->tpc_stride;
+   create_info->tpc_size = rt_dataset->tpc_size;
+
+   STATIC_ASSERT(ARRAY_SIZE(create_info->rt_datas) ==
+                 ARRAY_SIZE(rt_dataset->rt_datas));
+   for (uint32_t i = 0; i < ARRAY_SIZE(create_info->rt_datas); i++) {
+      create_info->rt_datas[i].pm_mlist_dev_addr =
+         rt_dataset->rt_datas[i].mlist_dev_addr;
+      create_info->rt_datas[i].macrotile_array_dev_addr =
+         rt_dataset->rt_datas[i].mta_dev_addr;
+      create_info->rt_datas[i].rgn_header_dev_addr =
+         rt_dataset->rt_datas[i].rgn_headers_dev_addr;
+   }
+
+   create_info->rgn_header_size =
+      pvr_rt_get_isp_region_size(device, mtile_info);
+
+   /* Miscellaneous. */
+   create_info->mtile_stride = mtile_info->mtile_stride;
+   create_info->max_rts = rt_dataset->layers;
+}
+
+VkResult
+pvr_render_target_dataset_create(struct pvr_device *device,
+                                 uint32_t width,
+                                 uint32_t height,
+                                 uint32_t samples,
+                                 uint32_t layers,
+                                 struct pvr_rt_dataset **const rt_dataset_out)
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   struct pvr_winsys_rt_dataset_create_info rt_dataset_create_info;
+   struct pvr_rt_mtile_info mtile_info;
+   struct pvr_rt_dataset *rt_dataset;
+   VkResult result;
+
+   assert(device->global_free_list);
+   assert(width <= rogue_get_render_size_max_x(dev_info));
+   assert(height <= rogue_get_render_size_max_y(dev_info));
+   assert(layers > 0 && layers <= PVR_MAX_FRAMEBUFFER_LAYERS);
+
+   pvr_rt_mtile_info_init(device, &mtile_info, width, height, samples);
+
+   rt_dataset = vk_zalloc(&device->vk.alloc,
+                          sizeof(*rt_dataset),
+                          8,
+                          VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!rt_dataset)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   rt_dataset->device = device;
+   rt_dataset->width = width;
+   rt_dataset->height = height;
+   rt_dataset->samples = samples;
+   rt_dataset->layers = layers;
+   rt_dataset->global_free_list = device->global_free_list;
+
+   /* The maximum supported free list size is based on the assumption that this
+    * freelist (the "local" freelist) is always the minimum size required by
+    * the hardware. See the documentation of ROGUE_FREE_LIST_MAX_SIZE for more
+    * details.
+    */
+   result = pvr_free_list_create(device,
+                                 rogue_get_min_free_list_size(dev_info),
+                                 rogue_get_min_free_list_size(dev_info),
+                                 0 /* grow_size */,
+                                 0 /* grow_threshold */,
+                                 rt_dataset->global_free_list,
+                                 &rt_dataset->local_free_list);
+   if (result != VK_SUCCESS)
+      goto err_vk_free_rt_dataset;
+
+   result = pvr_rt_vheap_rtc_data_init(device, rt_dataset, layers);
+   if (result != VK_SUCCESS)
+      goto err_pvr_free_list_destroy;
+
+   result = pvr_rt_tpc_data_init(device, rt_dataset, &mtile_info, layers);
+   if (result != VK_SUCCESS)
+      goto err_pvr_rt_vheap_rtc_data_fini;
+
+   result = pvr_rt_datas_init(device,
+                              rt_dataset,
+                              rt_dataset->global_free_list,
+                              rt_dataset->local_free_list,
+                              &mtile_info,
+                              layers);
+   if (result != VK_SUCCESS)
+      goto err_pvr_rt_tpc_data_fini;
+
+   /* rt_dataset must be fully initialized by this point since
+    * pvr_rt_dataset_ws_create_info_init() depends on this.
+    */
+   pvr_rt_dataset_ws_create_info_init(rt_dataset,
+                                      &mtile_info,
+                                      &rt_dataset_create_info);
+
+   result =
+      device->ws->ops->render_target_dataset_create(device->ws,
+                                                    &rt_dataset_create_info,
+                                                    &rt_dataset->ws_rt_dataset);
+   if (result != VK_SUCCESS)
+      goto err_pvr_rt_datas_fini;
+
+   *rt_dataset_out = rt_dataset;
+
+   return VK_SUCCESS;
+
+err_pvr_rt_datas_fini:
+   pvr_rt_datas_fini(rt_dataset);
+
+err_pvr_rt_tpc_data_fini:
+   pvr_rt_tpc_data_fini(rt_dataset);
+
+err_pvr_rt_vheap_rtc_data_fini:
+   pvr_rt_vheap_rtc_data_fini(rt_dataset);
+
+err_pvr_free_list_destroy:
+   pvr_free_list_destroy(rt_dataset->local_free_list);
+
+err_vk_free_rt_dataset:
+   vk_free(&device->vk.alloc, rt_dataset);
+
+   return result;
+}
+
+void pvr_render_target_dataset_destroy(struct pvr_rt_dataset *rt_dataset)
+{
+   struct pvr_device *device = rt_dataset->device;
+
+   device->ws->ops->render_target_dataset_destroy(rt_dataset->ws_rt_dataset);
+
+   pvr_rt_datas_fini(rt_dataset);
+   pvr_rt_tpc_data_fini(rt_dataset);
+   pvr_rt_vheap_rtc_data_fini(rt_dataset);
+
+   pvr_free_list_destroy(rt_dataset->local_free_list);
+
+   vk_free(&device->vk.alloc, rt_dataset);
+}
+
+static void
+pvr_render_job_ws_geometry_state_init(struct pvr_render_ctx *ctx,
+                                      struct pvr_render_job *job,
+                                      struct pvr_winsys_geometry_state *state)
+{
+   const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
+
+   /* FIXME: Should this just be done unconditionally? The firmware will just
+    * ignore the value anyway.
+    */
+   if (PVR_HAS_QUIRK(dev_info, 56279)) {
+      pvr_csb_pack (&state->regs.pds_ctrl, CR_PDS_CTRL, value) {
+         value.max_num_vdm_tasks = rogue_get_max_num_vdm_pds_tasks(dev_info);
+      }
+   } else {
+      state->regs.pds_ctrl = 0;
+   }
+
+   pvr_csb_pack (&state->regs.ppp_ctrl, CR_PPP_CTRL, value) {
+      value.wclampen = true;
+      value.fixed_point_format = 1;
+   }
+
+   pvr_csb_pack (&state->regs.te_psg, CR_TE_PSG, value) {
+      value.completeonterminate = job->geometry_terminate;
+
+      value.region_stride = job->rt_dataset->rgn_headers_stride /
+                            PVRX(CR_TE_PSG_REGION_STRIDE_UNIT_SIZE);
+
+      value.forcenewstate = PVR_HAS_QUIRK(dev_info, 52942);
+   }
+
+   /* The set up of CR_TPU must be identical to
+    * pvr_render_job_ws_fragment_state_init().
+    */
+   pvr_csb_pack (&state->regs.tpu, CR_TPU, value) {
+      value.tag_cem_4k_face_packing = true;
+   }
+
+   pvr_csb_pack (&state->regs.tpu_border_colour_table,
+                 CR_TPU_BORDER_COLOUR_TABLE_VDM,
+                 value) {
+      value.border_colour_table_address = job->border_colour_table_addr;
+   }
+
+   pvr_csb_pack (&state->regs.vdm_ctrl_stream_base,
+                 CR_VDM_CTRL_STREAM_BASE,
+                 value) {
+      value.addr = job->ctrl_stream_addr;
+   }
+
+   /* Set up the USC common size for the context switch resume/load program
+    * (ctx->ctx_switch.programs[i].sr->pds_load_program), which was created
+    * as part of the render context.
+    */
+   pvr_csb_pack (&state->regs.vdm_ctx_resume_task0_size,
+                 VDMCTRL_PDS_STATE0,
+                 value) {
+      /* Calculate the size in bytes. */
+      const uint16_t shared_registers_size = job->max_shared_registers * 4;
+
+      value.usc_common_size =
+         DIV_ROUND_UP(shared_registers_size,
+                      PVRX(VDMCTRL_PDS_STATE0_USC_COMMON_SIZE_UNIT_SIZE));
+   };
+
+   state->flags = 0;
+
+   if (!job->rt_dataset->need_frag)
+      state->flags |= PVR_WINSYS_GEOM_FLAG_FIRST_GEOMETRY;
+
+   if (job->geometry_terminate)
+      state->flags |= PVR_WINSYS_GEOM_FLAG_LAST_GEOMETRY;
+
+   if (job->frag_uses_atomic_ops)
+      state->flags |= PVR_WINSYS_GEOM_FLAG_SINGLE_CORE;
+}
+
+static inline void
+pvr_get_isp_num_tiles_xy(const struct pvr_device_info *dev_info,
+                         uint32_t samples,
+                         uint32_t width,
+                         uint32_t height,
+                         uint32_t *const x_out,
+                         uint32_t *const y_out)
+{
+   uint32_t tile_samples_x;
+   uint32_t tile_samples_y;
+   uint32_t scale_x;
+   uint32_t scale_y;
+
+   rogue_get_isp_samples_per_tile_xy(dev_info,
+                                     samples,
+                                     &tile_samples_x,
+                                     &tile_samples_y);
+
+   switch (samples) {
+   case 1:
+      scale_x = 1;
+      scale_y = 1;
+      break;
+   case 2:
+      scale_x = 1;
+      scale_y = 2;
+      break;
+   case 4:
+      scale_x = 2;
+      scale_y = 2;
+      break;
+   case 8:
+      scale_x = 2;
+      scale_y = 4;
+      break;
+   default:
+      unreachable("Unsupported number of samples");
+   }
+
+   *x_out = DIV_ROUND_UP(width * scale_x, tile_samples_x);
+   *y_out = DIV_ROUND_UP(height * scale_y, tile_samples_y);
+
+   if (PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format)) {
+      assert(PVR_GET_FEATURE_VALUE(dev_info,
+                                   simple_parameter_format_version,
+                                   0U) == 2U);
+      /* Align to a 2x2 tile block. */
+      *x_out = ALIGN_POT(*x_out, 2);
+      *y_out = ALIGN_POT(*y_out, 2);
+   }
+}
+
+static void
+pvr_render_job_ws_fragment_state_init(struct pvr_render_ctx *ctx,
+                                      struct pvr_render_job *job,
+                                      struct pvr_winsys_fragment_state *state)
+{
+   const struct pvr_device_info *dev_info = &ctx->device->pdevice->dev_info;
+   enum PVRX(CR_ISP_AA_MODE_TYPE) isp_aa_mode;
+   uint32_t isp_ctl;
+
+   /* FIXME: what to do when job->run_frag is false? */
+
+   switch (job->samples) {
+   case 1:
+      isp_aa_mode = PVRX(CR_ISP_AA_MODE_TYPE_AA_NONE);
+      break;
+   case 2:
+      isp_aa_mode = PVRX(CR_ISP_AA_MODE_TYPE_AA_2X);
+      break;
+   case 3:
+      isp_aa_mode = PVRX(CR_ISP_AA_MODE_TYPE_AA_4X);
+      break;
+   case 8:
+      isp_aa_mode = PVRX(CR_ISP_AA_MODE_TYPE_AA_8X);
+      break;
+   default:
+      unreachable("Unsupported number of samples");
+   }
+
+   /* FIXME: pass in the number of samples rather than isp_aa_mode? */
+   pvr_setup_tiles_in_flight(dev_info,
+                             isp_aa_mode,
+                             job->pixel_output_width,
+                             false,
+                             job->max_tiles_in_flight,
+                             &isp_ctl,
+                             &state->regs.usc_pixel_output_ctrl);
+
+   pvr_csb_pack (&state->regs.isp_ctl, CR_ISP_CTL, value) {
+      value.sample_pos = true;
+
+      /* FIXME: There are a number of things that cause this to be set, this
+       * is just one of them.
+       */
+      value.process_empty_tiles = job->process_empty_tiles;
+   }
+
+   /* FIXME: When pvr_setup_tiles_in_flight() is refactored it might be
+    * possible to fully pack CR_ISP_CTL above rather than having to OR in part
+    * of the value.
+    */
+   state->regs.isp_ctl |= isp_ctl;
+
+   pvr_csb_pack (&state->regs.isp_aa, CR_ISP_AA, value) {
+      value.mode = isp_aa_mode;
+   }
+
+   /* The set up of CR_TPU must be identical to
+    * pvr_render_job_ws_geometry_state_init().
+    */
+   pvr_csb_pack (&state->regs.tpu, CR_TPU, value) {
+      value.tag_cem_4k_face_packing = true;
+   }
+
+   if (PVR_HAS_FEATURE(dev_info, cluster_grouping) &&
+       PVR_HAS_FEATURE(dev_info, slc_mcu_cache_controls) &&
+       rogue_get_num_phantoms(dev_info) > 1 && job->frag_uses_atomic_ops) {
+      /* Each phantom has its own MCU, so atomicity can only be guaranteed
+       * when all work items are processed on the same phantom. This means we
+       * need to disable all USCs other than those of the first phantom, which
+       * has 4 clusters. Note that we only need to do this for atomic
+       * operations in fragment shaders, since hardware prevents the TA to run
+       * on more than one phantom anyway.
+       */
+      state->regs.pixel_phantom = 0xF;
+   } else {
+      state->regs.pixel_phantom = 0;
+   }
+
+   pvr_csb_pack (&state->regs.isp_bgobjvals, CR_ISP_BGOBJVALS, value) {
+      value.enablebgtag = job->enable_bg_tag;
+
+      value.mask = true;
+
+      /* FIXME: Hard code this for now as we don't currently support any
+       * stencil image formats.
+       */
+      value.stencil = 0xFF;
+   }
+
+   pvr_csb_pack (&state->regs.isp_bgobjdepth, CR_ISP_BGOBJDEPTH, value) {
+      /* FIXME: This is suitable for the single depth format the driver
+       * currently supports, but may need updating to handle other depth
+       * formats.
+       */
+      value.value = fui(job->depth_clear_value);
+   }
+
+   /* FIXME: Some additional set up needed to support depth and stencil
+    * load/store operations.
+    */
+   pvr_csb_pack (&state->regs.isp_zlsctl, CR_ISP_ZLSCTL, value) {
+      uint32_t aligned_width =
+         ALIGN_POT(job->depth_physical_width, ROGUE_IPF_TILE_SIZE_PIXELS);
+      uint32_t aligned_height =
+         ALIGN_POT(job->depth_physical_height, ROGUE_IPF_TILE_SIZE_PIXELS);
+
+      pvr_get_isp_num_tiles_xy(dev_info,
+                               job->samples,
+                               aligned_width,
+                               aligned_height,
+                               &value.zlsextent_x_z,
+                               &value.zlsextent_y_z);
+      value.zlsextent_x_z -= 1;
+      value.zlsextent_y_z -= 1;
+
+      if (job->depth_memlayout == PVR_MEMLAYOUT_TWIDDLED) {
+         value.loadtwiddled = true;
+         value.storetwiddled = true;
+      }
+
+      /* FIXME: This is suitable for the single depth format the driver
+       * currently supports, but may need updating to handle other depth
+       * formats.
+       */
+      assert(job->depth_vk_format == VK_FORMAT_D32_SFLOAT);
+      value.zloadformat = PVRX(CR_ZSTOREFORMAT_TYPE_F32Z);
+      value.zstoreformat = PVRX(CR_ZSTOREFORMAT_TYPE_F32Z);
+   }
+
+   if (PVR_HAS_FEATURE(dev_info, zls_subtile)) {
+      pvr_csb_pack (&state->regs.isp_zls_pixels, CR_ISP_ZLS_PIXELS, value) {
+         value.x = job->depth_stride - 1;
+         value.y = job->depth_height - 1;
+      }
+   } else {
+      state->regs.isp_zls_pixels = 0;
+   }
+
+   pvr_csb_pack (&state->regs.isp_zload_store_base, CR_ISP_ZLOAD_BASE, value) {
+      value.addr = job->depth_addr;
+   }
+
+   pvr_csb_pack (&state->regs.isp_stencil_load_store_base,
+                 CR_ISP_STENCIL_LOAD_BASE,
+                 value) {
+      value.addr = job->stencil_addr;
+
+      /* FIXME: May need to set value.enable to true. */
+   }
+
+   pvr_csb_pack (&state->regs.tpu_border_colour_table,
+                 CR_TPU_BORDER_COLOUR_TABLE_PDM,
+                 value) {
+      value.border_colour_table_address = job->border_colour_table_addr;
+   }
+
+   state->regs.isp_oclqry_base = 0;
+
+   pvr_csb_pack (&state->regs.isp_dbias_base, CR_ISP_DBIAS_BASE, value) {
+      value.addr = job->depth_bias_table_addr;
+   }
+
+   pvr_csb_pack (&state->regs.isp_scissor_base, CR_ISP_SCISSOR_BASE, value) {
+      value.addr = job->scissor_table_addr;
+   }
+
+   pvr_csb_pack (&state->regs.event_pixel_pds_info,
+                 CR_EVENT_PIXEL_PDS_INFO,
+                 value) {
+      value.const_size =
+         DIV_ROUND_UP(ctx->device->pixel_event_data_size_in_dwords,
+                      PVRX(CR_EVENT_PIXEL_PDS_INFO_CONST_SIZE_UNIT_SIZE));
+      value.temp_stride = 0;
+      value.usc_sr_size =
+         DIV_ROUND_UP(PVR_STATE_PBE_DWORDS,
+                      PVRX(CR_EVENT_PIXEL_PDS_INFO_USC_SR_SIZE_UNIT_SIZE));
+   }
+
+   pvr_csb_pack (&state->regs.event_pixel_pds_data,
+                 CR_EVENT_PIXEL_PDS_DATA,
+                 value) {
+      value.addr.addr = job->pds_pixel_event_data_offset;
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(state->regs.pbe_word) ==
+                 ARRAY_SIZE(job->pbe_reg_words));
+   STATIC_ASSERT(ARRAY_SIZE(state->regs.pbe_word[0]) ==
+                 ARRAY_SIZE(job->pbe_reg_words[0]));
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(job->pbe_reg_words); i++) {
+      state->regs.pbe_word[i][0] = job->pbe_reg_words[i][0];
+      state->regs.pbe_word[i][1] = job->pbe_reg_words[i][1];
+      state->regs.pbe_word[i][2] = job->pbe_reg_words[i][2];
+   }
+
+   STATIC_ASSERT(__same_type(state->regs.pds_bgnd, job->pds_bgnd_reg_values));
+   typed_memcpy(state->regs.pds_bgnd,
+                job->pds_bgnd_reg_values,
+                ARRAY_SIZE(state->regs.pds_bgnd));
+
+   memset(state->regs.pds_pr_bgnd, 0, sizeof(state->regs.pds_pr_bgnd));
+
+   /* FIXME: Merge geometry and fragment flags into a single flags member? */
+   /* FIXME: move to its own function? */
+   state->flags = 0;
+
+   if (job->depth_addr.addr)
+      state->flags |= PVR_WINSYS_FRAG_FLAG_DEPTH_BUFFER_PRESENT;
+
+   if (job->stencil_addr.addr)
+      state->flags |= PVR_WINSYS_FRAG_FLAG_STENCIL_BUFFER_PRESENT;
+
+   if (job->disable_compute_overlap)
+      state->flags |= PVR_WINSYS_FRAG_FLAG_PREVENT_CDM_OVERLAP;
+
+   if (job->frag_uses_atomic_ops)
+      state->flags |= PVR_WINSYS_FRAG_FLAG_SINGLE_CORE;
+
+   state->zls_stride = job->depth_layer_size;
+   state->sls_stride = job->depth_layer_size;
+}
+
+static void pvr_render_job_ws_submit_info_init(
+   struct pvr_render_ctx *ctx,
+   struct pvr_render_job *job,
+   const struct pvr_winsys_job_bo *bos,
+   uint32_t bo_count,
+   const VkSemaphore *semaphores,
+   uint32_t semaphore_count,
+   uint32_t *stage_flags,
+   struct pvr_winsys_render_submit_info *submit_info)
+{
+   memset(submit_info, 0, sizeof(*submit_info));
+
+   submit_info->rt_dataset = job->rt_dataset->ws_rt_dataset;
+   submit_info->rt_data_idx = job->rt_dataset->rt_data_idx;
+
+   submit_info->frame_num = ctx->device->global_queue_present_count;
+   submit_info->job_num = ctx->device->global_queue_job_count;
+
+   submit_info->run_frag = job->run_frag;
+
+   submit_info->bos = bos;
+   submit_info->bo_count = bo_count;
+
+   submit_info->semaphores = semaphores;
+   submit_info->semaphore_count = semaphore_count;
+   submit_info->stage_flags = stage_flags;
+
+   /* FIXME: add WSI image bos. */
+
+   pvr_render_job_ws_geometry_state_init(ctx, job, &submit_info->geometry);
+   pvr_render_job_ws_fragment_state_init(ctx, job, &submit_info->fragment);
+
+   /* These values are expected to match. */
+   assert(submit_info->geometry.regs.tpu == submit_info->fragment.regs.tpu);
+}
+
+VkResult
+pvr_render_job_submit(struct pvr_render_ctx *ctx,
+                      struct pvr_render_job *job,
+                      const struct pvr_winsys_job_bo *bos,
+                      uint32_t bo_count,
+                      const VkSemaphore *semaphores,
+                      uint32_t semaphore_count,
+                      uint32_t *stage_flags,
+                      struct pvr_winsys_syncobj **const syncobj_geom_out,
+                      struct pvr_winsys_syncobj **const syncobj_frag_out)
+{
+   struct pvr_rt_dataset *rt_dataset = job->rt_dataset;
+   struct pvr_winsys_render_submit_info submit_info;
+   struct pvr_device *device = ctx->device;
+   VkResult result;
+
+   pvr_render_job_ws_submit_info_init(ctx,
+                                      job,
+                                      bos,
+                                      bo_count,
+                                      semaphores,
+                                      semaphore_count,
+                                      stage_flags,
+                                      &submit_info);
+
+   result = device->ws->ops->render_submit(ctx->ws_ctx,
+                                           &submit_info,
+                                           syncobj_geom_out,
+                                           syncobj_frag_out);
+   if (result != VK_SUCCESS)
+      return result;
+
+   if (job->run_frag) {
+      /* Move to the next render target data now that a fragment job has been
+       * successfully submitted. This will allow the next geometry job to be
+       * submitted to been run in parallel with it.
+       */
+      rt_dataset->rt_data_idx =
+         (rt_dataset->rt_data_idx + 1) % ARRAY_SIZE(rt_dataset->rt_datas);
+
+      rt_dataset->need_frag = false;
+   } else {
+      rt_dataset->need_frag = true;
+   }
+
+   return VK_SUCCESS;
+}
diff --git a/src/imagination/vulkan/pvr_job_render.h b/src/imagination/vulkan/pvr_job_render.h
new file mode 100644 (file)
index 0000000..58fae42
--- /dev/null
@@ -0,0 +1,128 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_JOB_RENDER_H
+#define PVR_JOB_RENDER_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "pvr_limits.h"
+#include "pvr_winsys.h"
+
+struct pvr_device;
+struct pvr_free_list;
+struct pvr_render_ctx;
+struct pvr_rt_dataset;
+
+/* FIXME: Turn 'struct pvr_sub_cmd' into 'struct pvr_job' and change 'struct
+ * pvr_render_job' to subclass it? This is approximately what v3dv does
+ * (although it doesn't subclass).
+ */
+struct pvr_render_job {
+   struct pvr_rt_dataset *rt_dataset;
+
+   bool run_frag;
+   bool geometry_terminate;
+   bool frag_uses_atomic_ops;
+   bool disable_compute_overlap;
+   bool enable_bg_tag;
+   bool process_empty_tiles;
+
+   uint32_t pds_pixel_event_data_offset;
+
+   pvr_dev_addr_t ctrl_stream_addr;
+
+   pvr_dev_addr_t border_colour_table_addr;
+   pvr_dev_addr_t depth_bias_table_addr;
+   pvr_dev_addr_t scissor_table_addr;
+
+   pvr_dev_addr_t depth_addr;
+   uint32_t depth_stride;
+   uint32_t depth_height;
+   uint32_t depth_physical_width;
+   uint32_t depth_physical_height;
+   uint32_t depth_layer_size;
+   float depth_clear_value;
+   VkFormat depth_vk_format;
+   /* FIXME: This should be of type 'enum pvr_memlayout', but this is defined
+    * in pvr_private.h, which causes a circular include dependency. For now,
+    * treat it has a uint32_t. A couple of ways to possibly fix this:
+    *
+    *   1. Merge the contents of this header file into pvr_private.h.
+    *   2. Move 'enum pvr_memlayout' into it a new header that can be included
+    *      by both this header and pvr_private.h.
+    */
+   uint32_t depth_memlayout;
+
+   pvr_dev_addr_t stencil_addr;
+
+   uint32_t samples;
+
+   uint32_t pixel_output_width;
+
+   uint8_t max_shared_registers;
+
+   /* Upper limit for tiles in flight, '0' means use default limit based
+    * on partition store.
+    */
+   uint32_t max_tiles_in_flight;
+
+   uint64_t pbe_reg_words[PVR_MAX_COLOR_ATTACHMENTS]
+                         [ROGUE_NUM_PBESTATE_REG_WORDS];
+
+   uint64_t pds_bgnd_reg_values[ROGUE_NUM_CR_PDS_BGRND_WORDS];
+};
+
+VkResult pvr_free_list_create(struct pvr_device *device,
+                              uint32_t initial_size,
+                              uint32_t max_size,
+                              uint32_t grow_size,
+                              uint32_t grow_threshold,
+                              struct pvr_free_list *parent_free_list,
+                              struct pvr_free_list **const free_list_out);
+void pvr_free_list_destroy(struct pvr_free_list *free_list);
+
+VkResult
+pvr_render_target_dataset_create(struct pvr_device *device,
+                                 uint32_t width,
+                                 uint32_t height,
+                                 uint32_t samples,
+                                 uint32_t layers,
+                                 struct pvr_rt_dataset **const rt_dataset_out);
+void pvr_render_target_dataset_destroy(struct pvr_rt_dataset *dataset);
+
+VkResult
+pvr_render_job_submit(struct pvr_render_ctx *ctx,
+                      struct pvr_render_job *job,
+                      const struct pvr_winsys_job_bo *bos,
+                      uint32_t bo_count,
+                      const VkSemaphore *semaphores,
+                      uint32_t semaphore_count,
+                      uint32_t *stage_flags,
+                      struct pvr_winsys_syncobj **const syncobj_geom_out,
+                      struct pvr_winsys_syncobj **const syncobj_frag_out);
+
+#endif /* PVR_JOB_RENDER_H */
diff --git a/src/imagination/vulkan/pvr_limits.h b/src/imagination/vulkan/pvr_limits.h
new file mode 100644 (file)
index 0000000..cda91e5
--- /dev/null
@@ -0,0 +1,64 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * Constants for VkPhysicalDeviceLimits.
+ */
+
+#ifndef PVR_LIMITS_H
+#define PVR_LIMITS_H
+
+#include "hwdef/rogue_hw_defs.h"
+#include "pvr_device_info.h"
+#include "util/u_math.h"
+
+#define PVR_MAX_COLOR_ATTACHMENTS 8U
+#define PVR_MAX_QUEUES 2U
+#define PVR_MAX_VIEWPORTS 1U
+#define PVR_MAX_NEG_OFFSCREEN_OFFSET 4096U
+
+#define PVR_MAX_PUSH_CONSTANTS_SIZE 128U
+
+#define PVR_MAX_DESCRIPTOR_SETS 4U
+#define PVR_MAX_FRAMEBUFFER_LAYERS ROGUE_MAX_RENDER_TARGETS
+
+/* The limit is somewhat arbitrary, it just translates into more pds code
+ * and larger arrays, 32 appears to be the popular (and highest choice) across
+ * other implementations.
+ */
+#define PVR_MAX_VERTEX_INPUT_BINDINGS 16U
+
+/* We need one RenderTarget per supported MSAA mode as each render target
+ * contains state that is dependent on the sample count of the render that is
+ * rendering to it.
+ *
+ * As we do not know the sample count until we know the renderpass framebuffer
+ * combination being used, we create one per supported sample mode.
+ */
+#define PVR_RENDER_TARGETS_PER_FRAMEBUFFER(dev_info)                         \
+   ({                                                                        \
+      uint32_t __ret = PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 4U); \
+      util_logbase2(__ret) + 1;                                              \
+   })
+
+#endif
diff --git a/src/imagination/vulkan/pvr_pass.c b/src/imagination/vulkan/pvr_pass.c
new file mode 100644 (file)
index 0000000..3b18855
--- /dev/null
@@ -0,0 +1,587 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "hwdef/rogue_hw_utils.h"
+#include "pvr_bo.h"
+#include "pvr_device_info.h"
+#include "pvr_formats.h"
+#include "pvr_hw_pass.h"
+#include "pvr_pds.h"
+#include "pvr_private.h"
+#include "pvr_usc_fragment_shader.h"
+#include "rogue/rogue.h"
+#include "vk_alloc.h"
+#include "vk_format.h"
+#include "vk_log.h"
+
+/*****************************************************************************
+  PDS pre-baked program generation parameters and variables.
+*****************************************************************************/
+/* These would normally be produced by the compiler or other code. We're using
+ * them for now just to speed up things. All of these should eventually be
+ * removed.
+ */
+
+static const struct {
+   /* Indicates the amount of temporaries for the shader. */
+   uint32_t temp_count;
+   enum rogue_msaa_mode msaa_mode;
+   /* Indicates the presence of PHAS instruction. */
+   bool has_phase_rate_change;
+} pvr_pds_fragment_program_params = {
+   .temp_count = 0,
+   .msaa_mode = ROGUE_MSAA_MODE_PIXEL,
+   .has_phase_rate_change = false,
+};
+
+static inline bool pvr_subpass_has_msaa_input_attachment(
+   struct pvr_render_subpass *subpass,
+   const VkRenderPassCreateInfo2KHR *pCreateInfo)
+{
+   for (uint32_t i = 0; i < subpass->input_count; i++) {
+      const uint32_t attachment = subpass->input_attachments[i];
+
+      if (pCreateInfo->pAttachments[attachment].samples > 1)
+         return true;
+   }
+
+   return false;
+}
+
+static inline size_t
+pvr_num_subpass_attachments(const VkSubpassDescription2 *desc)
+{
+   return desc->inputAttachmentCount + desc->colorAttachmentCount +
+          (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
+          (desc->pDepthStencilAttachment != NULL);
+}
+
+static bool pvr_is_subpass_initops_flush_needed(
+   const struct pvr_render_pass *pass,
+   const struct pvr_renderpass_hwsetup_render *hw_render)
+{
+   struct pvr_render_subpass *subpass = &pass->subpasses[0];
+   uint32_t render_loadop_mask = 0;
+   uint32_t color_attachment_mask;
+
+   for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
+      if (hw_render->color_init[i].op != RENDERPASS_SURFACE_INITOP_NOP)
+         render_loadop_mask |= (1 << hw_render->color_init[i].driver_id);
+   }
+
+   /* If there are no load ops then there's nothing to flush. */
+   if (render_loadop_mask == 0)
+      return false;
+
+   /* If the first subpass has any input attachments, they need to be
+    * initialized with the result of the load op. Since the input attachment
+    * may be read from fragments with an opaque pass type, the load ops must be
+    * flushed or else they would be obscured and eliminated by HSR.
+    */
+   if (subpass->input_count != 0)
+      return true;
+
+   color_attachment_mask = 0;
+
+   for (uint32_t i = 0; i < subpass->color_count; i++) {
+      const int32_t color_idx = subpass->color_attachments[i];
+
+      if (color_idx != -1)
+         color_attachment_mask |= (1 << pass->attachments[color_idx].index);
+   }
+
+   /* If the first subpass does not write to all attachments which have a load
+    * op then the load ops need to be flushed to ensure they don't get obscured
+    * and removed by HSR.
+    */
+   return (render_loadop_mask & color_attachment_mask) != render_loadop_mask;
+}
+
+static void
+pvr_init_subpass_userpass_spawn(struct pvr_renderpass_hwsetup *hw_setup,
+                                struct pvr_render_pass *pass,
+                                struct pvr_render_subpass *subpasses)
+{
+   uint32_t subpass_idx = 0;
+
+   for (uint32_t i = 0; i < hw_setup->render_count; i++) {
+      struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i];
+      uint32_t initial_userpass_spawn =
+         (uint32_t)pvr_is_subpass_initops_flush_needed(pass, hw_render);
+
+      for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
+         subpasses[subpass_idx].userpass_spawn = (j + initial_userpass_spawn);
+         subpass_idx++;
+      }
+   }
+
+   assert(subpass_idx == pass->subpass_count);
+}
+
+static inline bool pvr_has_output_register_writes(
+   const struct pvr_renderpass_hwsetup_render *hw_render)
+{
+   for (uint32_t i = 0; i < hw_render->init_setup.render_targets_count; i++) {
+      struct usc_mrt_resource *mrt_resource =
+         &hw_render->init_setup.mrt_resources[i];
+
+      if (mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REGISTER)
+         return true;
+   }
+
+   return false;
+}
+
+static VkResult pvr_pds_texture_state_program_create_and_upload(
+   struct pvr_device *device,
+   const VkAllocationCallbacks *allocator,
+   struct pvr_pds_upload *const pds_upload_out)
+{
+   struct pvr_pds_pixel_shader_sa_program program = {
+      .num_texture_dma_kicks = 1,
+   };
+   uint32_t staging_buffer_size;
+   uint32_t *staging_buffer;
+   VkResult result;
+
+   pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&program);
+
+   staging_buffer_size = program.code_size * sizeof(*staging_buffer);
+
+   staging_buffer = vk_alloc2(&device->vk.alloc,
+                              allocator,
+                              staging_buffer_size,
+                              8,
+                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!staging_buffer)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   pvr_pds_generate_pixel_shader_sa_code_segment(&program, staging_buffer);
+
+   /* FIXME: Figure out the define for alignment of 16. */
+   result = pvr_gpu_upload_pds(device,
+                               NULL,
+                               0,
+                               0,
+                               &staging_buffer[program.data_size],
+                               program.code_size,
+                               16,
+                               16,
+                               pds_upload_out);
+   if (result != VK_SUCCESS) {
+      vk_free2(&device->vk.alloc, allocator, staging_buffer);
+      return result;
+   }
+
+   vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+pvr_load_op_create(struct pvr_device *device,
+                   const VkAllocationCallbacks *allocator,
+                   struct pvr_renderpass_hwsetup_render *hw_render,
+                   struct pvr_load_op **const load_op_out)
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
+   struct pvr_load_op *load_op;
+   VkResult result;
+
+   load_op = vk_zalloc2(&device->vk.alloc,
+                        allocator,
+                        sizeof(*load_op),
+                        8,
+                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!load_op)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
+      struct pvr_renderpass_colorinit *color_init = &hw_render->color_init[i];
+
+      if (color_init->op == RENDERPASS_SURFACE_INITOP_CLEAR)
+         load_op->clear_mask |= 1U << i;
+      else if (color_init->op == RENDERPASS_SURFACE_INITOP_LOAD)
+         pvr_finishme("Missing 'load' load op");
+   }
+
+   result = pvr_gpu_upload_usc(device,
+                               pvr_usc_fragment_shader,
+                               sizeof(pvr_usc_fragment_shader),
+                               cache_line_size,
+                               &load_op->usc_frag_prog_bo);
+   if (result != VK_SUCCESS)
+      goto err_free_load_op;
+
+   result = pvr_pds_fragment_program_create_and_upload(
+      device,
+      allocator,
+      load_op->usc_frag_prog_bo,
+      pvr_pds_fragment_program_params.temp_count,
+      pvr_pds_fragment_program_params.msaa_mode,
+      pvr_pds_fragment_program_params.has_phase_rate_change,
+      &load_op->pds_frag_prog);
+   if (result != VK_SUCCESS)
+      goto err_free_usc_frag_prog_bo;
+
+   result = pvr_pds_texture_state_program_create_and_upload(
+      device,
+      allocator,
+      &load_op->pds_tex_state_prog);
+   if (result != VK_SUCCESS)
+      goto err_free_pds_frag_prog;
+
+   load_op->is_hw_object = true;
+   /* FIXME: These should be based on the USC and PDS programs, but are hard
+    * coded for now.
+    */
+   load_op->const_shareds_count = 1;
+   load_op->shareds_dest_offset = 0;
+   load_op->shareds_count = 1;
+   load_op->temps_count = 1;
+
+   *load_op_out = load_op;
+
+   return VK_SUCCESS;
+
+err_free_pds_frag_prog:
+   pvr_bo_free(device, load_op->pds_frag_prog.pvr_bo);
+
+err_free_usc_frag_prog_bo:
+   pvr_bo_free(device, load_op->usc_frag_prog_bo);
+
+err_free_load_op:
+   vk_free2(&device->vk.alloc, allocator, load_op);
+
+   return result;
+}
+
+static void pvr_load_op_destroy(struct pvr_device *device,
+                                const VkAllocationCallbacks *allocator,
+                                struct pvr_load_op *load_op)
+{
+   pvr_bo_free(device, load_op->pds_tex_state_prog.pvr_bo);
+   pvr_bo_free(device, load_op->pds_frag_prog.pvr_bo);
+   pvr_bo_free(device, load_op->usc_frag_prog_bo);
+   vk_free2(&device->vk.alloc, allocator, load_op);
+}
+
+#define PVR_SPM_LOAD_IN_BUFFERS_COUNT(dev_info)              \
+   ({                                                        \
+      int __ret = 7U;                                        \
+      if (PVR_HAS_FEATURE(dev_info, eight_output_registers)) \
+         __ret = 3U;                                         \
+      __ret;                                                 \
+   })
+
+VkResult pvr_CreateRenderPass2(VkDevice _device,
+                               const VkRenderPassCreateInfo2KHR *pCreateInfo,
+                               const VkAllocationCallbacks *pAllocator,
+                               VkRenderPass *pRenderPass)
+{
+   struct pvr_render_pass_attachment *attachments;
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   struct pvr_render_subpass *subpasses;
+   size_t subpass_attachment_count;
+   uint32_t *subpass_attachments;
+   struct pvr_render_pass *pass;
+   uint32_t *dep_list;
+   bool *flush_on_dep;
+   VkResult result;
+
+   VK_MULTIALLOC(ma);
+   vk_multialloc_add(&ma, &pass, __typeof__(*pass), 1);
+   vk_multialloc_add(&ma,
+                     &attachments,
+                     __typeof__(*attachments),
+                     pCreateInfo->attachmentCount);
+   vk_multialloc_add(&ma,
+                     &subpasses,
+                     __typeof__(*subpasses),
+                     pCreateInfo->subpassCount);
+
+   subpass_attachment_count = 0;
+   for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
+      subpass_attachment_count +=
+         pvr_num_subpass_attachments(&pCreateInfo->pSubpasses[i]);
+   }
+
+   vk_multialloc_add(&ma,
+                     &subpass_attachments,
+                     __typeof__(*subpass_attachments),
+                     subpass_attachment_count);
+   vk_multialloc_add(&ma,
+                     &dep_list,
+                     __typeof__(*dep_list),
+                     pCreateInfo->dependencyCount);
+   vk_multialloc_add(&ma,
+                     &flush_on_dep,
+                     __typeof__(*flush_on_dep),
+                     pCreateInfo->dependencyCount);
+
+   if (!vk_multialloc_zalloc2(&ma,
+                              &device->vk.alloc,
+                              pAllocator,
+                              VK_OBJECT_TYPE_RENDER_PASS)) {
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   vk_object_base_init(&device->vk, &pass->base, VK_OBJECT_TYPE_RENDER_PASS);
+   pass->attachment_count = pCreateInfo->attachmentCount;
+   pass->attachments = attachments;
+   pass->subpass_count = pCreateInfo->subpassCount;
+   pass->subpasses = subpasses;
+   pass->max_sample_count = 1;
+
+   /* Copy attachment descriptions. */
+   for (uint32_t i = 0; i < pass->attachment_count; i++) {
+      const VkAttachmentDescription2 *desc = &pCreateInfo->pAttachments[i];
+      struct pvr_render_pass_attachment *attachment = &pass->attachments[i];
+
+      pvr_assert(!(desc->flags & ~VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT));
+
+      attachment->load_op = desc->loadOp;
+      attachment->store_op = desc->storeOp;
+
+      attachment->has_stencil = vk_format_has_stencil(attachment->vk_format);
+      if (attachment->has_stencil) {
+         attachment->stencil_load_op = desc->stencilLoadOp;
+         attachment->stencil_store_op = desc->stencilStoreOp;
+      }
+
+      attachment->vk_format = desc->format;
+      attachment->sample_count = desc->samples;
+      attachment->initial_layout = desc->initialLayout;
+      attachment->is_pbe_downscalable =
+         pvr_format_is_pbe_downscalable(attachment->vk_format);
+      attachment->index = i;
+
+      if (attachment->sample_count > pass->max_sample_count)
+         pass->max_sample_count = attachment->sample_count;
+   }
+
+   /* Count how many dependencies each subpass has. */
+   for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
+      const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
+
+      if (dep->srcSubpass != VK_SUBPASS_EXTERNAL &&
+          dep->dstSubpass != VK_SUBPASS_EXTERNAL &&
+          dep->srcSubpass != dep->dstSubpass) {
+         pass->subpasses[dep->dstSubpass].dep_count++;
+      }
+   }
+
+   /* Assign reference pointers to lists, and fill in the attachments list, we
+    * need to re-walk the dependencies array later to fill the per-subpass
+    * dependencies lists in.
+    */
+   for (uint32_t i = 0; i < pass->subpass_count; i++) {
+      const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
+      struct pvr_render_subpass *subpass = &pass->subpasses[i];
+
+      subpass->pipeline_bind_point = desc->pipelineBindPoint;
+      subpass->sample_count = 1;
+
+      subpass->color_count = desc->colorAttachmentCount;
+      if (subpass->color_count > 0) {
+         bool has_used_color_attachment = false;
+         uint32_t index;
+
+         subpass->color_attachments = subpass_attachments;
+         subpass_attachments += subpass->color_count;
+
+         for (uint32_t j = 0; j < subpass->color_count; j++) {
+            subpass->color_attachments[j] =
+               desc->pColorAttachments[j].attachment;
+
+            if (subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
+               continue;
+
+            index = subpass->color_attachments[j];
+            subpass->sample_count = pass->attachments[index].sample_count;
+            has_used_color_attachment = true;
+         }
+
+         if (!has_used_color_attachment && desc->pDepthStencilAttachment &&
+             desc->pDepthStencilAttachment->attachment !=
+                VK_ATTACHMENT_UNUSED) {
+            index = desc->pDepthStencilAttachment->attachment;
+            subpass->sample_count = pass->attachments[index].sample_count;
+         }
+      }
+
+      if (desc->pResolveAttachments) {
+         subpass->resolve_attachments = subpass_attachments;
+         subpass_attachments += subpass->color_count;
+
+         for (uint32_t j = 0; j < subpass->color_count; j++) {
+            subpass->resolve_attachments[j] =
+               desc->pResolveAttachments[j].attachment;
+         }
+      }
+
+      subpass->input_count = desc->inputAttachmentCount;
+      if (subpass->input_count > 0) {
+         subpass->input_attachments = subpass_attachments;
+         subpass_attachments += subpass->input_count;
+
+         for (uint32_t j = 0; j < subpass->input_count; j++) {
+            subpass->input_attachments[j] =
+               desc->pInputAttachments[j].attachment;
+         }
+      }
+
+      if (desc->pDepthStencilAttachment) {
+         subpass->depth_stencil_attachment = subpass_attachments++;
+         *subpass->depth_stencil_attachment =
+            desc->pDepthStencilAttachment->attachment;
+      }
+
+      /* Give the dependencies a slice of the subpass_attachments array. */
+      subpass->dep_list = dep_list;
+      dep_list += subpass->dep_count;
+      subpass->flush_on_dep = flush_on_dep;
+      flush_on_dep += subpass->dep_count;
+
+      /* Reset the dependencies count so we can start from 0 and index into
+       * the dependencies array.
+       */
+      subpass->dep_count = 0;
+      subpass->index = i;
+   }
+
+   /* Compute dependencies and populate dep_list and flush_on_dep. */
+   for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
+      const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
+
+      if (dep->srcSubpass != VK_SUBPASS_EXTERNAL &&
+          dep->dstSubpass != VK_SUBPASS_EXTERNAL &&
+          dep->srcSubpass != dep->dstSubpass) {
+         struct pvr_render_subpass *subpass = &pass->subpasses[dep->dstSubpass];
+
+         subpass->dep_list[subpass->dep_count] = dep->srcSubpass;
+         if (pvr_subpass_has_msaa_input_attachment(subpass, pCreateInfo))
+            subpass->flush_on_dep[subpass->dep_count] = true;
+
+         subpass->dep_count++;
+      }
+   }
+
+   pass->max_tilebuffer_count =
+      PVR_SPM_LOAD_IN_BUFFERS_COUNT(&device->pdevice->dev_info);
+
+   pass->hw_setup = pvr_create_renderpass_hwsetup(device, pass, false);
+   if (!pass->hw_setup) {
+      result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+      goto err_free_pass;
+   }
+
+   pvr_init_subpass_userpass_spawn(pass->hw_setup, pass, pass->subpasses);
+
+   for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
+      struct pvr_renderpass_hwsetup_render *hw_render =
+         &pass->hw_setup->renders[i];
+      struct pvr_load_op *load_op = NULL;
+
+      if (hw_render->tile_buffers_count)
+         pvr_finishme("Set up tile buffer table");
+
+      if (!hw_render->color_init_count) {
+         assert(!hw_render->client_data);
+         continue;
+      }
+
+      if (!pvr_has_output_register_writes(hw_render))
+         pvr_finishme("Add output register write");
+
+      result = pvr_load_op_create(device, pAllocator, hw_render, &load_op);
+      if (result != VK_SUCCESS)
+         goto err_load_op_destroy;
+
+      hw_render->client_data = load_op;
+   }
+
+   *pRenderPass = pvr_render_pass_to_handle(pass);
+
+   return VK_SUCCESS;
+
+err_load_op_destroy:
+   for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
+      struct pvr_renderpass_hwsetup_render *hw_render =
+         &pass->hw_setup->renders[i];
+
+      if (hw_render->client_data)
+         pvr_load_op_destroy(device, pAllocator, hw_render->client_data);
+   }
+
+   pvr_destroy_renderpass_hwsetup(device, pass->hw_setup);
+
+err_free_pass:
+   vk_object_base_finish(&pass->base);
+   vk_free2(&device->vk.alloc, pAllocator, pass);
+
+   return result;
+}
+
+void pvr_DestroyRenderPass(VkDevice _device,
+                           VkRenderPass _pass,
+                           const VkAllocationCallbacks *pAllocator)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_render_pass, pass, _pass);
+
+   if (!pass)
+      return;
+
+   for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
+      struct pvr_renderpass_hwsetup_render *hw_render =
+         &pass->hw_setup->renders[i];
+
+      pvr_load_op_destroy(device, pAllocator, hw_render->client_data);
+   }
+
+   pvr_destroy_renderpass_hwsetup(device, pass->hw_setup);
+   vk_object_base_finish(&pass->base);
+   vk_free2(&device->vk.alloc, pAllocator, pass);
+}
+
+void pvr_GetRenderAreaGranularity(VkDevice _device,
+                                  VkRenderPass renderPass,
+                                  VkExtent2D *pGranularity)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+
+   /* Granularity does not depend on any settings in the render pass, so return
+    * the tile granularity.
+    *
+    * The default value is based on the minimum value found in all existing
+    * cores.
+    */
+   pGranularity->width = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 16);
+   pGranularity->height = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 16);
+}
diff --git a/src/imagination/vulkan/pvr_pipeline.c b/src/imagination/vulkan/pvr_pipeline.c
new file mode 100644 (file)
index 0000000..4584ddd
--- /dev/null
@@ -0,0 +1,1859 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on v3dv driver which is:
+ * Copyright © 2019 Raspberry Pi
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <vulkan/vulkan.h>
+
+#include "compiler/shader_enums.h"
+#include "hwdef/rogue_hw_utils.h"
+#include "nir/nir.h"
+#include "pvr_bo.h"
+#include "pvr_csb.h"
+#include "pvr_pds.h"
+#include "pvr_private.h"
+#include "pvr_shader.h"
+#include "pvr_usc_compute_shader.h"
+#include "pvr_winsys.h"
+#include "rogue/rogue.h"
+#include "rogue/rogue_build_data.h"
+#include "util/log.h"
+#include "util/macros.h"
+#include "util/ralloc.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+#include "vk_object.h"
+#include "vk_util.h"
+
+#define WORKGROUP_DIMENSIONS 3U
+
+/* FIXME: Remove this when the compiler is hooked up. */
+/******************************************************************************
+   Hard coding
+ ******************************************************************************/
+/* This section contains hard coding related structs. */
+
+struct pvr_explicit_constant_usage {
+   /* Hardware register number assigned to the explicit constant with the lower
+    * pre_assigned offset.
+    */
+   uint32_t start_offset;
+};
+
+static const struct {
+   uint32_t local_invocation_regs[2];
+
+   uint32_t work_group_regs[WORKGROUP_DIMENSIONS];
+
+   uint32_t barrier_reg;
+
+   uint32_t usc_temps;
+} pvr_pds_compute_program_params = {
+   .local_invocation_regs = { 0, 1 },
+
+   .work_group_regs = { 0, 1, 2 },
+
+   .barrier_reg = ROGUE_REG_UNUSED,
+
+   .usc_temps = 0,
+};
+
+/*****************************************************************************
+   PDS functions
+*****************************************************************************/
+
+/* If allocator == NULL, the internal one will be used. */
+static VkResult pvr_pds_coeff_program_create_and_upload(
+   struct pvr_device *device,
+   const VkAllocationCallbacks *allocator,
+   const uint32_t *fpu_iterators,
+   uint32_t fpu_iterators_count,
+   const uint32_t *destinations,
+   struct pvr_pds_upload *const pds_upload_out)
+{
+   struct pvr_pds_coeff_loading_program program = {
+      .num_fpu_iterators = fpu_iterators_count,
+   };
+   uint32_t staging_buffer_size;
+   uint32_t *staging_buffer;
+   VkResult result;
+
+   assert(fpu_iterators_count < PVR_MAXIMUM_ITERATIONS);
+
+   /* Get the size of the program and then allocate that much memory. */
+   pvr_pds_coefficient_loading(&program, NULL, PDS_GENERATE_SIZES);
+
+   staging_buffer_size =
+      (program.code_size + program.data_size) * sizeof(*staging_buffer);
+
+   staging_buffer = vk_alloc2(&device->vk.alloc,
+                              allocator,
+                              staging_buffer_size,
+                              8,
+                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!staging_buffer)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   /* FIXME: Should we save pointers when we redesign the pds gen api ? */
+   typed_memcpy(program.FPU_iterators,
+                fpu_iterators,
+                program.num_fpu_iterators);
+
+   typed_memcpy(program.destination, destinations, program.num_fpu_iterators);
+
+   /* Generate the program into is the staging_buffer. */
+   pvr_pds_coefficient_loading(&program,
+                               staging_buffer,
+                               PDS_GENERATE_CODEDATA_SEGMENTS);
+
+   /* FIXME: Figure out the define for alignment of 16. */
+   result = pvr_gpu_upload_pds(device,
+                               &staging_buffer[0],
+                               program.data_size,
+                               16,
+                               &staging_buffer[program.data_size],
+                               program.code_size,
+                               16,
+                               16,
+                               pds_upload_out);
+   if (result != VK_SUCCESS) {
+      vk_free2(&device->vk.alloc, allocator, staging_buffer);
+      return result;
+   }
+
+   vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+   return VK_SUCCESS;
+}
+
+/* FIXME: move this elsewhere since it's also called in pvr_pass.c? */
+/* If allocator == NULL, the internal one will be used. */
+VkResult pvr_pds_fragment_program_create_and_upload(
+   struct pvr_device *device,
+   const VkAllocationCallbacks *allocator,
+   const struct pvr_bo *fragment_shader_bo,
+   uint32_t fragment_temp_count,
+   enum rogue_msaa_mode msaa_mode,
+   bool has_phase_rate_change,
+   struct pvr_pds_upload *const pds_upload_out)
+{
+   const enum PVRX(PDSINST_DOUTU_SAMPLE_RATE)
+      sample_rate = pvr_sample_rate_from_usc_msaa_mode(msaa_mode);
+   struct pvr_pds_kickusc_program program = { 0 };
+   uint32_t staging_buffer_size;
+   uint32_t *staging_buffer;
+   VkResult result;
+
+   /* FIXME: Should it be passing in the USC offset rather than address here?
+    */
+   /* Note this is not strictly required to be done before calculating the
+    * staging_buffer_size in this particular case. It can also be done after
+    * allocating the buffer. The size from pvr_pds_kick_usc() is constant.
+    */
+   pvr_pds_setup_doutu(&program.usc_task_control,
+                       fragment_shader_bo->vma->dev_addr.addr,
+                       fragment_temp_count,
+                       sample_rate,
+                       has_phase_rate_change);
+
+   pvr_pds_kick_usc(&program, NULL, 0, false, PDS_GENERATE_SIZES);
+
+   staging_buffer_size =
+      (program.code_size + program.data_size) * sizeof(*staging_buffer);
+
+   staging_buffer = vk_alloc2(&device->vk.alloc,
+                              allocator,
+                              staging_buffer_size,
+                              8,
+                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!staging_buffer)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   pvr_pds_kick_usc(&program,
+                    staging_buffer,
+                    0,
+                    false,
+                    PDS_GENERATE_CODEDATA_SEGMENTS);
+
+   /* FIXME: Figure out the define for alignment of 16. */
+   result = pvr_gpu_upload_pds(device,
+                               &staging_buffer[0],
+                               program.data_size,
+                               16,
+                               &staging_buffer[program.data_size],
+                               program.code_size,
+                               16,
+                               16,
+                               pds_upload_out);
+   if (result != VK_SUCCESS) {
+      vk_free2(&device->vk.alloc, allocator, staging_buffer);
+      return result;
+   }
+
+   vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+   return VK_SUCCESS;
+}
+
+static inline size_t pvr_pds_get_max_vertex_program_const_map_size_in_bytes(
+   const struct pvr_device_info *dev_info,
+   bool robust_buffer_access)
+{
+   /* FIXME: Use more local variable to improve formatting. */
+
+   /* Maximum memory allocation needed for const map entries in
+    * pvr_pds_generate_vertex_primary_program().
+    * When robustBufferAccess is disabled, it must be >= 410.
+    * When robustBufferAccess is enabled, it must be >= 570.
+    *
+    * 1. Size of entry for base instance
+    *        (pvr_const_map_entry_base_instance)
+    *
+    * 2. Max. number of vertex inputs (PVR_MAX_VERTEX_INPUT_BINDINGS) * (
+    *     if (!robustBufferAccess)
+    *         size of vertex attribute entry
+    *             (pvr_const_map_entry_vertex_attribute_address) +
+    *     else
+    *         size of robust vertex attribute entry
+    *             (pvr_const_map_entry_robust_vertex_attribute_address) +
+    *         size of entry for max attribute index
+    *             (pvr_const_map_entry_vertex_attribute_max_index) +
+    *     fi
+    *     size of Unified Store burst entry
+    *         (pvr_const_map_entry_literal32) +
+    *     size of entry for vertex stride
+    *         (pvr_const_map_entry_literal32) +
+    *     size of entries for DDMAD control word
+    *         (num_ddmad_literals * pvr_const_map_entry_literal32))
+    *
+    * 3. Size of entry for DOUTW vertex/instance control word
+    *     (pvr_const_map_entry_literal32)
+    *
+    * 4. Size of DOUTU entry (pvr_const_map_entry_doutu_address)
+    */
+
+   const size_t attribute_size =
+      (!robust_buffer_access)
+         ? sizeof(struct pvr_const_map_entry_vertex_attribute_address)
+         : sizeof(struct pvr_const_map_entry_robust_vertex_attribute_address) +
+              sizeof(struct pvr_const_map_entry_vertex_attribute_max_index);
+
+   /* If has_pds_ddmadt the DDMAD control word is now a DDMADT control word
+    * and is increased by one DWORD to contain the data for the DDMADT's
+    * out-of-bounds check.
+    */
+   const size_t pvr_pds_const_map_vertex_entry_num_ddmad_literals =
+      1U + (size_t)PVR_HAS_FEATURE(dev_info, pds_ddmadt);
+
+   return (sizeof(struct pvr_const_map_entry_base_instance) +
+           PVR_MAX_VERTEX_INPUT_BINDINGS *
+              (attribute_size +
+               (2 + pvr_pds_const_map_vertex_entry_num_ddmad_literals) *
+                  sizeof(struct pvr_const_map_entry_literal32)) +
+           sizeof(struct pvr_const_map_entry_literal32) +
+           sizeof(struct pvr_const_map_entry_doutu_address));
+}
+
+/* This is a const pointer to an array of pvr_pds_vertex_dma structs.
+ * The array being pointed to is of PVR_MAX_VERTEX_ATTRIB_DMAS size.
+ */
+typedef struct pvr_pds_vertex_dma (
+      *const
+         pvr_pds_attrib_dma_descriptions_array_ptr)[PVR_MAX_VERTEX_ATTRIB_DMAS];
+
+/* dma_descriptions_out_ptr is a pointer to the array used as output.
+ * The whole array might not be filled so dma_count_out indicates how many
+ * elements were used.
+ */
+static void pvr_pds_vertex_attrib_init_dma_descriptions(
+   const VkPipelineVertexInputStateCreateInfo *const vertex_input_state,
+   const struct rogue_vs_build_data *vs_data,
+   pvr_pds_attrib_dma_descriptions_array_ptr dma_descriptions_out_ptr,
+   uint32_t *const dma_count_out)
+{
+   struct pvr_pds_vertex_dma *const dma_descriptions =
+      *dma_descriptions_out_ptr;
+   uint32_t dma_count = 0;
+
+   if (!vertex_input_state) {
+      *dma_count_out = 0;
+      return;
+   }
+
+   for (uint32_t i = 0; i < vertex_input_state->vertexAttributeDescriptionCount;
+        i++) {
+      const VkVertexInputAttributeDescription *const attrib_desc =
+         &vertex_input_state->pVertexAttributeDescriptions[i];
+      const VkVertexInputBindingDescription *binding_desc = NULL;
+
+      /* Finding the matching binding description. */
+      for (uint32_t j = 0;
+           j < vertex_input_state->vertexBindingDescriptionCount;
+           j++) {
+         const VkVertexInputBindingDescription *const current_binding_desc =
+            &vertex_input_state->pVertexBindingDescriptions[j];
+
+         if (current_binding_desc->binding == attrib_desc->binding) {
+            binding_desc = current_binding_desc;
+            break;
+         }
+      }
+
+      /* From the Vulkan 1.2.195 spec for
+       * VkPipelineVertexInputStateCreateInfo:
+       *
+       *    "For every binding specified by each element of
+       *    pVertexAttributeDescriptions, a
+       *    VkVertexInputBindingDescription must exist in
+       *    pVertexBindingDescriptions with the same value of binding"
+       *
+       * So we don't check if we found the matching binding description
+       * or not.
+       */
+
+      struct pvr_pds_vertex_dma *const dma_desc = &dma_descriptions[dma_count];
+
+      size_t location = attrib_desc->location;
+      assert(location < vs_data->inputs.num_input_vars);
+
+      dma_desc->offset = attrib_desc->offset;
+      dma_desc->stride = binding_desc->stride;
+
+      dma_desc->flags = 0;
+
+      if (binding_desc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE)
+         dma_desc->flags |= PVR_PDS_VERTEX_DMA_FLAGS_INSTANCE_RATE;
+
+      dma_desc->size_in_dwords = vs_data->inputs.components[location];
+      /* TODO: This will be different when other types are supported.
+       * Store in vs_data with base and components?
+       */
+      /* TODO: Use attrib_desc->format. */
+      dma_desc->component_size_in_bytes = ROGUE_REG_SIZE_BYTES;
+      dma_desc->destination = vs_data->inputs.base[location];
+      dma_desc->binding_index = attrib_desc->binding;
+      dma_desc->divisor = 1;
+      dma_desc->robustness_buffer_offset = 0;
+
+      ++dma_count;
+   }
+
+   *dma_count_out = dma_count;
+}
+
+static VkResult pvr_pds_vertex_attrib_program_create_and_upload(
+   struct pvr_device *const device,
+   const VkAllocationCallbacks *const allocator,
+   struct pvr_pds_vertex_primary_program_input *const input,
+   struct pvr_pds_attrib_program *const program_out)
+{
+   const size_t const_entries_size_in_bytes =
+      pvr_pds_get_max_vertex_program_const_map_size_in_bytes(
+         &device->pdevice->dev_info,
+         device->features.robustBufferAccess);
+   struct pvr_pds_upload *const program = &program_out->program;
+   struct pvr_pds_info *const info = &program_out->info;
+   struct pvr_const_map_entry *entries_buffer;
+   ASSERTED uint32_t code_size_in_dwords;
+   size_t staging_buffer_size;
+   uint32_t *staging_buffer;
+   VkResult result;
+
+   memset(info, 0, sizeof(*info));
+
+   entries_buffer = vk_alloc2(&device->vk.alloc,
+                              allocator,
+                              const_entries_size_in_bytes,
+                              8,
+                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!entries_buffer)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   info->entries = entries_buffer;
+   info->entries_size_in_bytes = const_entries_size_in_bytes;
+
+   pvr_pds_generate_vertex_primary_program(input,
+                                           NULL,
+                                           info,
+                                           device->features.robustBufferAccess,
+                                           &device->pdevice->dev_info);
+
+   code_size_in_dwords = info->code_size_in_dwords;
+   staging_buffer_size = info->code_size_in_dwords * sizeof(*staging_buffer);
+
+   staging_buffer = vk_alloc2(&device->vk.alloc,
+                              allocator,
+                              staging_buffer_size,
+                              8,
+                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!staging_buffer) {
+      vk_free2(&device->vk.alloc, allocator, entries_buffer);
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   /* This also fills in info->entries. */
+   pvr_pds_generate_vertex_primary_program(input,
+                                           staging_buffer,
+                                           info,
+                                           device->features.robustBufferAccess,
+                                           &device->pdevice->dev_info);
+
+   assert(info->code_size_in_dwords <= code_size_in_dwords);
+
+   /* FIXME: Add a vk_realloc2() ? */
+   entries_buffer = vk_realloc((!allocator) ? &device->vk.alloc : allocator,
+                               entries_buffer,
+                               info->entries_written_size_in_bytes,
+                               8,
+                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!entries_buffer) {
+      vk_free2(&device->vk.alloc, allocator, staging_buffer);
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   info->entries = entries_buffer;
+   info->entries_size_in_bytes = info->entries_written_size_in_bytes;
+
+   /* FIXME: Figure out the define for alignment of 16. */
+   result = pvr_gpu_upload_pds(device,
+                               NULL,
+                               0,
+                               0,
+                               staging_buffer,
+                               info->code_size_in_dwords,
+                               16,
+                               16,
+                               program);
+   if (result != VK_SUCCESS) {
+      vk_free2(&device->vk.alloc, allocator, entries_buffer);
+      vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+   return VK_SUCCESS;
+}
+
+static inline void pvr_pds_vertex_attrib_program_destroy(
+   struct pvr_device *const device,
+   const struct VkAllocationCallbacks *const allocator,
+   struct pvr_pds_attrib_program *const program)
+{
+   pvr_bo_free(device, program->program.pvr_bo);
+   vk_free2(&device->vk.alloc, allocator, program->info.entries);
+}
+
+/* This is a const pointer to an array of pvr_pds_attrib_program structs.
+ * The array being pointed to is of PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT size.
+ */
+typedef struct pvr_pds_attrib_program (*const pvr_pds_attrib_programs_array_ptr)
+   [PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT];
+
+/* Generate and uploads a PDS program for DMAing vertex attribs into USC vertex
+ * inputs. This will bake the code segment and create a template of the data
+ * segment for the command buffer to fill in.
+ */
+/* If allocator == NULL, the internal one will be used.
+ *
+ * programs_out_ptr is a pointer to the array where the outputs will be placed.
+ * */
+static VkResult pvr_pds_vertex_attrib_programs_create_and_upload(
+   struct pvr_device *device,
+   const VkAllocationCallbacks *const allocator,
+   const VkPipelineVertexInputStateCreateInfo *const vertex_input_state,
+   uint32_t usc_temp_count,
+   const struct rogue_vs_build_data *vs_data,
+   pvr_pds_attrib_programs_array_ptr programs_out_ptr)
+{
+   struct pvr_pds_vertex_dma dma_descriptions[PVR_MAX_VERTEX_ATTRIB_DMAS];
+   struct pvr_pds_attrib_program *const programs_out = *programs_out_ptr;
+   struct pvr_pds_vertex_primary_program_input input = {
+      .dma_list = dma_descriptions,
+   };
+   VkResult result;
+
+   pvr_pds_vertex_attrib_init_dma_descriptions(vertex_input_state,
+                                               vs_data,
+                                               &dma_descriptions,
+                                               &input.dma_count);
+
+   pvr_pds_setup_doutu(&input.usc_task_control,
+                       0,
+                       usc_temp_count,
+                       PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
+                       false);
+
+   /* TODO: If statements for all the "bRequired"s + ui32ExtraFlags. */
+
+   /* Note: programs_out_ptr is a pointer to an array so this is fine. See the
+    * typedef.
+    */
+   for (uint32_t i = 0; i < ARRAY_SIZE(*programs_out_ptr); i++) {
+      switch (i) {
+      case PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASIC:
+         input.flags = 0;
+         break;
+
+      case PVR_PDS_VERTEX_ATTRIB_PROGRAM_BASE_INSTANCE:
+         input.flags = PVR_PDS_VERTEX_FLAGS_BASE_INSTANCE_VARIANT;
+         break;
+
+      case PVR_PDS_VERTEX_ATTRIB_PROGRAM_DRAW_INDIRECT:
+         /* We unset INSTANCE and set INDIRECT. */
+         input.flags = PVR_PDS_VERTEX_FLAGS_DRAW_INDIRECT_VARIANT;
+         break;
+
+      default:
+         unreachable("Invalid vertex attrib program type.");
+      }
+
+      result =
+         pvr_pds_vertex_attrib_program_create_and_upload(device,
+                                                         allocator,
+                                                         &input,
+                                                         &programs_out[i]);
+      if (result != VK_SUCCESS) {
+         for (uint32_t j = 0; j < i; j++) {
+            pvr_pds_vertex_attrib_program_destroy(device,
+                                                  allocator,
+                                                  &programs_out[j]);
+         }
+
+         return result;
+      }
+   }
+
+   return VK_SUCCESS;
+}
+
+static size_t pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes()
+{
+   /* Maximum memory allocation needed for const map entries in
+    * pvr_pds_generate_descriptor_upload_program().
+    * It must be >= 688 bytes. This size is calculated as the sum of:
+    *
+    *  1. Max. number of descriptor sets (8) * (
+    *         size of descriptor entry
+    *             (pvr_const_map_entry_descriptor_set) +
+    *         size of Common Store burst entry
+    *             (pvr_const_map_entry_literal32))
+    *
+    *  2. Max. number of PDS program buffers (24) * (
+    *         size of the largest buffer structure
+    *             (pvr_const_map_entry_constant_buffer) +
+    *         size of Common Store burst entry
+    *             (pvr_const_map_entry_literal32)
+    *
+    *  3. Size of DOUTU entry (pvr_const_map_entry_doutu_address)
+    */
+
+   /* FIXME: PVR_MAX_DESCRIPTOR_SETS is 4 and not 8. The comment above seems to
+    * say that it should be 8.
+    * Figure our a define for this or is the comment wrong?
+    */
+   return (8 * (sizeof(struct pvr_const_map_entry_descriptor_set) +
+                sizeof(struct pvr_const_map_entry_literal32)) +
+           PVR_PDS_MAX_BUFFERS *
+              (sizeof(struct pvr_const_map_entry_constant_buffer) +
+               sizeof(struct pvr_const_map_entry_literal32)) +
+           sizeof(struct pvr_const_map_entry_doutu_address));
+}
+
+/* This is a const pointer to an array of PVR_PDS_MAX_BUFFERS pvr_pds_buffer
+ * structs.
+ */
+typedef struct pvr_pds_buffer (
+      *const pvr_pds_uniform_program_buffer_array_ptr)[PVR_PDS_MAX_BUFFERS];
+
+static void pvr_pds_uniform_program_setup_buffers(
+   bool robust_buffer_access,
+   const struct rogue_ubo_data *ubo_data,
+   pvr_pds_uniform_program_buffer_array_ptr buffers_out_ptr,
+   uint32_t *const buffer_count_out)
+{
+   struct pvr_pds_buffer *const buffers = *buffers_out_ptr;
+   uint32_t buffer_count = 0;
+
+   for (size_t u = 0; u < ubo_data->num_ubo_entries; ++u) {
+      struct pvr_pds_buffer *current_buffer = &buffers[buffer_count++];
+
+      /* This is fine since buffers_out_ptr is a pointer to an array. */
+      assert(buffer_count <= ARRAY_SIZE(*buffers_out_ptr));
+
+      current_buffer->type = PVR_BUFFER_TYPE_UBO;
+      current_buffer->size_in_dwords = ubo_data->size[u];
+      current_buffer->destination = ubo_data->dest[u];
+
+      current_buffer->buffer_id = buffer_count;
+      current_buffer->desc_set = ubo_data->desc_set[u];
+      current_buffer->binding = ubo_data->binding[u];
+      /* TODO: Is this always the case?
+       * E.g. can multiple UBOs have the same base buffer?
+       */
+      current_buffer->source_offset = 0;
+   }
+
+   *buffer_count_out = buffer_count;
+}
+
+static VkResult pvr_pds_uniform_program_create_and_upload(
+   struct pvr_device *const device,
+   const VkAllocationCallbacks *const allocator,
+   const struct rogue_ubo_data *const ubo_data,
+   const struct pvr_explicit_constant_usage *const explicit_const_usage,
+   const struct pvr_pipeline_layout *const layout,
+   enum pvr_stage_allocation stage,
+   struct pvr_pds_upload *const pds_code_upload_out,
+   struct pvr_pds_info *const pds_info_out)
+{
+   const size_t const_entries_size_in_bytes =
+      pvr_pds_get_max_descriptor_upload_const_map_size_in_bytes();
+   struct pvr_descriptor_program_input program = { 0 };
+   struct pvr_const_map_entry *entries_buffer;
+   ASSERTED uint32_t code_size_in_dwords;
+   uint32_t staging_buffer_size;
+   uint32_t *staging_buffer;
+   VkResult result;
+
+   assert(stage != PVR_STAGE_ALLOCATION_COUNT);
+
+   memset(pds_info_out, 0, sizeof(*pds_info_out));
+
+   pvr_pds_uniform_program_setup_buffers(device->features.robustBufferAccess,
+                                         ubo_data,
+                                         &program.buffers,
+                                         &program.buffer_count);
+
+   for (uint32_t dma = 0; dma < program.buffer_count; dma++) {
+      if (program.buffers[dma].type != PVR_BUFFER_TYPES_COMPILE_TIME)
+         continue;
+
+      assert(!"Unimplemented");
+   }
+
+   if (layout->per_stage_reg_info[stage].primary_dynamic_size_in_dwords)
+      assert(!"Unimplemented");
+
+   for (uint32_t set_num = 0; set_num < layout->set_count; set_num++) {
+      const struct pvr_descriptor_set_layout_mem_layout *const reg_layout =
+         &layout->register_layout_in_dwords_per_stage[stage][set_num];
+      const uint32_t start_offset = explicit_const_usage->start_offset;
+
+      /* TODO: Use compiler usage info to optimize this? */
+
+      /* Only dma primaries if they are actually required. */
+      if (reg_layout->primary_size) {
+         program.descriptor_sets[program.descriptor_set_count++] =
+            (struct pvr_pds_descriptor_set){
+               .descriptor_set = set_num,
+               .size_in_dwords = reg_layout->primary_size,
+               .destination = reg_layout->primary_offset + start_offset,
+               .primary = true,
+            };
+      }
+
+      /* Only dma secondaries if they are actually required. */
+      if (!reg_layout->secondary_size)
+         continue;
+
+      program.descriptor_sets[program.descriptor_set_count++] =
+         (struct pvr_pds_descriptor_set){
+            .descriptor_set = set_num,
+            .size_in_dwords = reg_layout->secondary_size,
+            .destination = reg_layout->secondary_offset + start_offset,
+         };
+   }
+
+   entries_buffer = vk_alloc2(&device->vk.alloc,
+                              allocator,
+                              const_entries_size_in_bytes,
+                              8,
+                              VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!entries_buffer)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   pds_info_out->entries = entries_buffer;
+   pds_info_out->entries_size_in_bytes = const_entries_size_in_bytes;
+
+   pvr_pds_generate_descriptor_upload_program(&program, NULL, pds_info_out);
+
+   code_size_in_dwords = pds_info_out->code_size_in_dwords;
+   staging_buffer_size =
+      pds_info_out->code_size_in_dwords * sizeof(*staging_buffer);
+
+   if (!staging_buffer_size) {
+      vk_free2(&device->vk.alloc, allocator, entries_buffer);
+
+      memset(pds_info_out, 0, sizeof(*pds_info_out));
+      memset(pds_code_upload_out, 0, sizeof(*pds_code_upload_out));
+      return VK_SUCCESS;
+   }
+
+   staging_buffer = vk_alloc2(&device->vk.alloc,
+                              allocator,
+                              staging_buffer_size,
+                              8,
+                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!staging_buffer) {
+      vk_free2(&device->vk.alloc, allocator, entries_buffer);
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   pvr_pds_generate_descriptor_upload_program(&program,
+                                              staging_buffer,
+                                              pds_info_out);
+
+   assert(pds_info_out->code_size_in_dwords <= code_size_in_dwords);
+
+   /* FIXME: use vk_realloc2() ? */
+   entries_buffer = vk_realloc((!allocator) ? &device->vk.alloc : allocator,
+                               entries_buffer,
+                               pds_info_out->entries_written_size_in_bytes,
+                               8,
+                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+   if (!entries_buffer) {
+      vk_free2(&device->vk.alloc, allocator, staging_buffer);
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   pds_info_out->entries = entries_buffer;
+   pds_info_out->entries_size_in_bytes =
+      pds_info_out->entries_written_size_in_bytes;
+
+   /* FIXME: Figure out the define for alignment of 16. */
+   result = pvr_gpu_upload_pds(device,
+                               NULL,
+                               0,
+                               0,
+                               staging_buffer,
+                               pds_info_out->code_size_in_dwords,
+                               16,
+                               16,
+                               pds_code_upload_out);
+   if (result != VK_SUCCESS) {
+      vk_free2(&device->vk.alloc, allocator, entries_buffer);
+      vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+   }
+
+   vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+   return VK_SUCCESS;
+}
+
+static void pvr_pds_uniform_program_destroy(
+   struct pvr_device *const device,
+   const struct VkAllocationCallbacks *const allocator,
+   struct pvr_pds_upload *const pds_code,
+   struct pvr_pds_info *const pds_info)
+{
+   pvr_bo_free(device, pds_code->pvr_bo);
+   vk_free2(&device->vk.alloc, allocator, pds_info->entries);
+}
+
+/* FIXME: See if pvr_device_init_compute_pds_program() and this could be merged.
+ */
+static VkResult pvr_pds_compute_program_create_and_upload(
+   struct pvr_device *const device,
+   const VkAllocationCallbacks *const allocator,
+   const uint32_t local_input_regs[static const WORKGROUP_DIMENSIONS],
+   const uint32_t work_group_input_regs[static const WORKGROUP_DIMENSIONS],
+   uint32_t barrier_coefficient,
+   bool add_base_workgroup,
+   uint32_t usc_temps,
+   pvr_dev_addr_t usc_shader_dev_addr,
+   struct pvr_pds_upload *const pds_upload_out,
+   struct pvr_pds_info *const pds_info_out,
+   uint32_t *const base_workgroup_data_patching_offset_out)
+{
+   struct pvr_pds_compute_shader_program program = {
+      /* clang-format off */
+      .local_input_regs = {
+         local_input_regs[0],
+         local_input_regs[1],
+         local_input_regs[2]
+      },
+      .work_group_input_regs = {
+         work_group_input_regs[0],
+         work_group_input_regs[1],
+         work_group_input_regs[2]
+      },
+      .global_input_regs = {
+         [0 ... (WORKGROUP_DIMENSIONS - 1)] =
+            PVR_PDS_COMPUTE_INPUT_REG_UNUSED
+      },
+      /* clang-format on */
+      .barrier_coefficient = barrier_coefficient,
+      .flattened_work_groups = true,
+      .clear_pds_barrier = false,
+      .add_base_workgroup = add_base_workgroup,
+      .kick_usc = true,
+   };
+   struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   uint32_t staging_buffer_size;
+   uint32_t *staging_buffer;
+   VkResult result;
+
+   STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) == WORKGROUP_DIMENSIONS);
+   STATIC_ASSERT(ARRAY_SIZE(program.work_group_input_regs) ==
+                 WORKGROUP_DIMENSIONS);
+   STATIC_ASSERT(ARRAY_SIZE(program.global_input_regs) == WORKGROUP_DIMENSIONS);
+
+   assert(!add_base_workgroup || base_workgroup_data_patching_offset_out);
+
+   pvr_pds_setup_doutu(&program.usc_task_control,
+                       usc_shader_dev_addr.addr,
+                       usc_temps,
+                       PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
+                       false);
+
+   pvr_pds_compute_shader(&program, NULL, PDS_GENERATE_SIZES, dev_info);
+
+   /* FIXME: According to pvr_device_init_compute_pds_program() the code size
+    * is in bytes. Investigate this.
+    */
+   staging_buffer_size =
+      (program.code_size + program.data_size) * sizeof(*staging_buffer);
+
+   staging_buffer = vk_alloc2(&device->vk.alloc,
+                              allocator,
+                              staging_buffer_size,
+                              8,
+                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!staging_buffer)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   /* FIXME: pvr_pds_compute_shader doesn't implement
+    * PDS_GENERATE_CODEDATA_SEGMENTS.
+    */
+   pvr_pds_compute_shader(&program,
+                          &staging_buffer[0],
+                          PDS_GENERATE_CODE_SEGMENT,
+                          dev_info);
+
+   pvr_pds_compute_shader(&program,
+                          &staging_buffer[program.code_size],
+                          PDS_GENERATE_DATA_SEGMENT,
+                          dev_info);
+
+   /* We'll need to patch the base workgroup in the PDS data section before
+    * dispatch so we give back the offsets at which to patch. We only need to
+    * save the offset for the first workgroup id since the workgroup ids are
+    * stored contiguously in the data segment.
+    */
+   if (add_base_workgroup) {
+      *base_workgroup_data_patching_offset_out =
+         program.base_workgroup_constant_offset_in_dwords[0];
+   }
+
+   /* FIXME: Figure out the define for alignment of 16. */
+   result = pvr_gpu_upload_pds(device,
+                               &staging_buffer[program.code_size],
+                               program.data_size,
+                               16,
+                               &staging_buffer[0],
+                               program.code_size,
+                               16,
+                               16,
+                               pds_upload_out);
+   if (result != VK_SUCCESS) {
+      vk_free2(&device->vk.alloc, allocator, staging_buffer);
+      return result;
+   }
+
+   *pds_info_out = (struct pvr_pds_info){
+      .temps_required = program.highest_temp,
+      .code_size_in_dwords = program.code_size,
+      .data_size_in_dwords = program.data_size,
+   };
+
+   vk_free2(&device->vk.alloc, allocator, staging_buffer);
+
+   return VK_SUCCESS;
+};
+
+static void pvr_pds_compute_program_destroy(
+   struct pvr_device *const device,
+   const struct VkAllocationCallbacks *const allocator,
+   struct pvr_pds_upload *const pds_program,
+   struct pvr_pds_info *const pds_info)
+{
+   /* We don't allocate an entries buffer so we don't need to free it */
+   pvr_bo_free(device, pds_program->pvr_bo);
+}
+
+/******************************************************************************
+   Generic pipeline functions
+ ******************************************************************************/
+
+static void pvr_pipeline_init(struct pvr_device *device,
+                              enum pvr_pipeline_type type,
+                              struct pvr_pipeline *const pipeline)
+{
+   assert(!pipeline->layout);
+
+   vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE);
+
+   pipeline->type = type;
+}
+
+static void pvr_pipeline_finish(struct pvr_pipeline *pipeline)
+{
+   vk_object_base_finish(&pipeline->base);
+}
+
+/******************************************************************************
+   Compute pipeline functions
+ ******************************************************************************/
+
+/* Compiles and uploads shaders and PDS programs. */
+static VkResult pvr_compute_pipeline_compile(
+   struct pvr_device *const device,
+   struct pvr_pipeline_cache *pipeline_cache,
+   const VkComputePipelineCreateInfo *pCreateInfo,
+   const VkAllocationCallbacks *const allocator,
+   struct pvr_compute_pipeline *const compute_pipeline)
+{
+   /* FIXME: Remove this hard coding. */
+   const struct pvr_explicit_constant_usage explicit_const_usage = {
+      .start_offset = 0,
+   };
+   const struct rogue_ubo_data uniform_program_ubo_data = { 0 };
+
+   const uint32_t cache_line_size =
+      rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+   uint32_t work_group_input_regs[WORKGROUP_DIMENSIONS];
+   uint32_t local_input_regs[WORKGROUP_DIMENSIONS];
+   uint32_t barrier_coefficient;
+   VkResult result;
+
+   /* FIXME: Compile the shader. */
+
+   result = pvr_gpu_upload_usc(device,
+                               pvr_usc_compute_shader,
+                               sizeof(pvr_usc_compute_shader),
+                               cache_line_size,
+                               &compute_pipeline->state.bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   result = pvr_pds_uniform_program_create_and_upload(
+      device,
+      allocator,
+      &uniform_program_ubo_data,
+      &explicit_const_usage,
+      compute_pipeline->base.layout,
+      PVR_STAGE_ALLOCATION_COMPUTE,
+      &compute_pipeline->state.uniform.pds_code,
+      &compute_pipeline->state.uniform.pds_info);
+   if (result != VK_SUCCESS)
+      goto err_free_shader;
+
+   /* We make sure that the compiler's unused reg value is compatible with the
+    * pds api.
+    */
+   STATIC_ASSERT(ROGUE_REG_UNUSED == PVR_PDS_COMPUTE_INPUT_REG_UNUSED);
+
+   barrier_coefficient = pvr_pds_compute_program_params.barrier_reg;
+
+   /* TODO: Maybe change the pds api to use pointers so we avoid the copy. */
+   local_input_regs[0] =
+      pvr_pds_compute_program_params.local_invocation_regs[0];
+   local_input_regs[1] =
+      pvr_pds_compute_program_params.local_invocation_regs[1];
+   /* This is not a mistake. We want to assign element 1 to 2. */
+   local_input_regs[2] =
+      pvr_pds_compute_program_params.local_invocation_regs[1];
+
+   STATIC_ASSERT(__same_type(work_group_input_regs,
+                             pvr_pds_compute_program_params.work_group_regs));
+   typed_memcpy(work_group_input_regs,
+                pvr_pds_compute_program_params.work_group_regs,
+                WORKGROUP_DIMENSIONS);
+
+   result = pvr_pds_compute_program_create_and_upload(
+      device,
+      allocator,
+      local_input_regs,
+      work_group_input_regs,
+      barrier_coefficient,
+      false,
+      pvr_pds_compute_program_params.usc_temps,
+      compute_pipeline->state.bo->vma->dev_addr,
+      &compute_pipeline->state.primary_program,
+      &compute_pipeline->state.primary_program_info,
+      NULL);
+   if (result != VK_SUCCESS)
+      goto err_free_uniform_program;
+
+   /* If the workgroup ID is required, then we require the base workgroup
+    * variant of the PDS compute program as well.
+    */
+   compute_pipeline->state.flags.base_workgroup =
+      work_group_input_regs[0] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
+      work_group_input_regs[1] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED ||
+      work_group_input_regs[2] != PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
+
+   if (compute_pipeline->state.flags.base_workgroup) {
+      result = pvr_pds_compute_program_create_and_upload(
+         device,
+         allocator,
+         local_input_regs,
+         work_group_input_regs,
+         barrier_coefficient,
+         true,
+         pvr_pds_compute_program_params.usc_temps,
+         compute_pipeline->state.bo->vma->dev_addr,
+         &compute_pipeline->state.primary_program_base_workgroup_variant,
+         &compute_pipeline->state.primary_program_base_workgroup_variant_info,
+         &compute_pipeline->state.base_workgroup_ids_dword_offset);
+      if (result != VK_SUCCESS)
+         goto err_free_compute_program;
+   }
+
+   return VK_SUCCESS;
+
+err_free_compute_program:
+   if (compute_pipeline->state.flags.base_workgroup)
+      pvr_bo_free(device, compute_pipeline->state.primary_program.pvr_bo);
+
+err_free_uniform_program:
+   pvr_bo_free(device, compute_pipeline->state.uniform.pds_code.pvr_bo);
+
+err_free_shader:
+   pvr_bo_free(device, compute_pipeline->state.bo);
+
+   return result;
+}
+
+static VkResult
+pvr_compute_pipeline_init(struct pvr_device *device,
+                          struct pvr_pipeline_cache *pipeline_cache,
+                          const VkComputePipelineCreateInfo *pCreateInfo,
+                          const VkAllocationCallbacks *allocator,
+                          struct pvr_compute_pipeline *compute_pipeline)
+{
+   VkResult result;
+
+   pvr_pipeline_init(device,
+                     PVR_PIPELINE_TYPE_COMPUTE,
+                     &compute_pipeline->base);
+
+   compute_pipeline->base.layout =
+      pvr_pipeline_layout_from_handle(pCreateInfo->layout);
+
+   result = pvr_compute_pipeline_compile(device,
+                                         pipeline_cache,
+                                         pCreateInfo,
+                                         allocator,
+                                         compute_pipeline);
+   if (result != VK_SUCCESS) {
+      pvr_pipeline_finish(&compute_pipeline->base);
+      return result;
+   }
+
+   return VK_SUCCESS;
+}
+
+static VkResult
+pvr_compute_pipeline_create(struct pvr_device *device,
+                            struct pvr_pipeline_cache *pipeline_cache,
+                            const VkComputePipelineCreateInfo *pCreateInfo,
+                            const VkAllocationCallbacks *allocator,
+                            VkPipeline *const pipeline_out)
+{
+   struct pvr_compute_pipeline *compute_pipeline;
+   VkResult result;
+
+   compute_pipeline = vk_zalloc2(&device->vk.alloc,
+                                 allocator,
+                                 sizeof(*compute_pipeline),
+                                 8,
+                                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!compute_pipeline)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   /* Compiles and uploads shaders and PDS programs. */
+   result = pvr_compute_pipeline_init(device,
+                                      pipeline_cache,
+                                      pCreateInfo,
+                                      allocator,
+                                      compute_pipeline);
+   if (result != VK_SUCCESS) {
+      vk_free2(&device->vk.alloc, allocator, compute_pipeline);
+      return result;
+   }
+
+   *pipeline_out = pvr_pipeline_to_handle(&compute_pipeline->base);
+
+   return VK_SUCCESS;
+}
+
+static void pvr_compute_pipeline_destroy(
+   struct pvr_device *const device,
+   const VkAllocationCallbacks *const allocator,
+   struct pvr_compute_pipeline *const compute_pipeline)
+{
+   if (compute_pipeline->state.flags.base_workgroup) {
+      pvr_pds_compute_program_destroy(
+         device,
+         allocator,
+         &compute_pipeline->state.primary_program_base_workgroup_variant,
+         &compute_pipeline->state.primary_program_base_workgroup_variant_info);
+   }
+
+   pvr_pds_compute_program_destroy(
+      device,
+      allocator,
+      &compute_pipeline->state.primary_program,
+      &compute_pipeline->state.primary_program_info);
+   pvr_pds_uniform_program_destroy(device,
+                                   allocator,
+                                   &compute_pipeline->state.uniform.pds_code,
+                                   &compute_pipeline->state.uniform.pds_info);
+   pvr_bo_free(device, compute_pipeline->state.bo);
+
+   pvr_pipeline_finish(&compute_pipeline->base);
+
+   vk_free2(&device->vk.alloc, allocator, compute_pipeline);
+}
+
+VkResult
+pvr_CreateComputePipelines(VkDevice _device,
+                           VkPipelineCache pipelineCache,
+                           uint32_t createInfoCount,
+                           const VkComputePipelineCreateInfo *pCreateInfos,
+                           const VkAllocationCallbacks *pAllocator,
+                           VkPipeline *pPipelines)
+{
+   PVR_FROM_HANDLE(pvr_pipeline_cache, pipeline_cache, pipelineCache);
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   VkResult result = VK_SUCCESS;
+
+   for (uint32_t i = 0; i < createInfoCount; i++) {
+      const VkResult local_result =
+         pvr_compute_pipeline_create(device,
+                                     pipeline_cache,
+                                     &pCreateInfos[i],
+                                     pAllocator,
+                                     &pPipelines[i]);
+      if (local_result != VK_SUCCESS) {
+         result = local_result;
+         pPipelines[i] = VK_NULL_HANDLE;
+      }
+   }
+
+   return result;
+}
+
+/******************************************************************************
+   Graphics pipeline functions
+ ******************************************************************************/
+
+static inline uint32_t pvr_dynamic_state_bit_from_vk(VkDynamicState state)
+{
+   switch (state) {
+   case VK_DYNAMIC_STATE_VIEWPORT:
+      return PVR_DYNAMIC_STATE_BIT_VIEWPORT;
+   case VK_DYNAMIC_STATE_SCISSOR:
+      return PVR_DYNAMIC_STATE_BIT_SCISSOR;
+   case VK_DYNAMIC_STATE_LINE_WIDTH:
+      return PVR_DYNAMIC_STATE_BIT_LINE_WIDTH;
+   case VK_DYNAMIC_STATE_DEPTH_BIAS:
+      return PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS;
+   case VK_DYNAMIC_STATE_BLEND_CONSTANTS:
+      return PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS;
+   case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:
+      return PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK;
+   case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:
+      return PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK;
+   case VK_DYNAMIC_STATE_STENCIL_REFERENCE:
+      return PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE;
+   default:
+      unreachable("Unsupported state.");
+   }
+}
+
+static void
+pvr_graphics_pipeline_destroy(struct pvr_device *const device,
+                              const VkAllocationCallbacks *const allocator,
+                              struct pvr_graphics_pipeline *const gfx_pipeline)
+{
+   const uint32_t num_vertex_attrib_programs =
+      ARRAY_SIZE(gfx_pipeline->vertex_shader_state.pds_attrib_programs);
+
+   pvr_pds_uniform_program_destroy(
+      device,
+      allocator,
+      &gfx_pipeline->fragment_shader_state.uniform_state.pds_code,
+      &gfx_pipeline->fragment_shader_state.uniform_state.pds_info);
+
+   pvr_pds_uniform_program_destroy(
+      device,
+      allocator,
+      &gfx_pipeline->vertex_shader_state.uniform_state.pds_code,
+      &gfx_pipeline->vertex_shader_state.uniform_state.pds_info);
+
+   for (uint32_t i = 0; i < num_vertex_attrib_programs; i++) {
+      struct pvr_pds_attrib_program *const attrib_program =
+         &gfx_pipeline->vertex_shader_state.pds_attrib_programs[i];
+
+      pvr_pds_vertex_attrib_program_destroy(device, allocator, attrib_program);
+   }
+
+   pvr_bo_free(device,
+               gfx_pipeline->fragment_shader_state.pds_fragment_program.pvr_bo);
+   pvr_bo_free(device,
+               gfx_pipeline->fragment_shader_state.pds_coeff_program.pvr_bo);
+
+   pvr_bo_free(device, gfx_pipeline->fragment_shader_state.bo);
+   pvr_bo_free(device, gfx_pipeline->vertex_shader_state.bo);
+
+   pvr_pipeline_finish(&gfx_pipeline->base);
+
+   vk_free2(&device->vk.alloc, allocator, gfx_pipeline);
+}
+
+static void
+pvr_vertex_state_init(struct pvr_graphics_pipeline *gfx_pipeline,
+                      const struct rogue_common_build_data *common_data,
+                      const struct rogue_vs_build_data *vs_data)
+{
+   struct pvr_vertex_shader_state *vertex_state =
+      &gfx_pipeline->vertex_shader_state;
+
+   /* TODO: Hard coding these for now. These should be populated based on the
+    * information returned by the compiler.
+    */
+   vertex_state->stage_state.const_shared_reg_count = common_data->shareds;
+   vertex_state->stage_state.const_shared_reg_offset = 0;
+   vertex_state->stage_state.temps_count = common_data->temps;
+   vertex_state->stage_state.coefficient_size = common_data->coeffs;
+   vertex_state->stage_state.uses_atomic_ops = false;
+   vertex_state->stage_state.uses_texture_rw = false;
+   vertex_state->stage_state.uses_barrier = false;
+   vertex_state->stage_state.has_side_effects = false;
+   vertex_state->stage_state.empty_program = false;
+
+   vertex_state->vertex_input_size = vs_data->num_vertex_input_regs;
+   vertex_state->vertex_output_size =
+      vs_data->num_vertex_outputs * ROGUE_REG_SIZE_BYTES;
+   vertex_state->output_selects = 0;
+   vertex_state->user_clip_planes_mask = 0;
+   vertex_state->entry_offset = 0;
+
+   /* TODO: The number of varyings should be checked against the fragment
+    * shader inputs and assigned in the place where that happens.
+    * There will also be an opportunity to cull unused fs inputs/vs outputs.
+    */
+   pvr_csb_pack (&gfx_pipeline->vertex_shader_state.varying[0],
+                 TA_STATE_VARYING0,
+                 varying0) {
+      varying0.f32_linear = vs_data->num_varyings;
+      varying0.f32_flat = 0;
+      varying0.f32_npc = 0;
+   }
+
+   pvr_csb_pack (&gfx_pipeline->vertex_shader_state.varying[1],
+                 TA_STATE_VARYING1,
+                 varying1) {
+      varying1.f16_linear = 0;
+      varying1.f16_flat = 0;
+      varying1.f16_npc = 0;
+   }
+}
+
+static void
+pvr_fragment_state_init(struct pvr_graphics_pipeline *gfx_pipeline,
+                        const struct rogue_common_build_data *common_data)
+{
+   struct pvr_fragment_shader_state *fragment_state =
+      &gfx_pipeline->fragment_shader_state;
+
+   /* TODO: Hard coding these for now. These should be populated based on the
+    * information returned by the compiler.
+    */
+   fragment_state->stage_state.const_shared_reg_count = 0;
+   fragment_state->stage_state.const_shared_reg_offset = 0;
+   fragment_state->stage_state.temps_count = common_data->temps;
+   fragment_state->stage_state.coefficient_size = common_data->coeffs;
+   fragment_state->stage_state.uses_atomic_ops = false;
+   fragment_state->stage_state.uses_texture_rw = false;
+   fragment_state->stage_state.uses_barrier = false;
+   fragment_state->stage_state.has_side_effects = false;
+   fragment_state->stage_state.empty_program = false;
+
+   fragment_state->pass_type = 0;
+   fragment_state->entry_offset = 0;
+}
+
+/* Compiles and uploads shaders and PDS programs. */
+static VkResult
+pvr_graphics_pipeline_compile(struct pvr_device *const device,
+                              struct pvr_pipeline_cache *pipeline_cache,
+                              const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                              const VkAllocationCallbacks *const allocator,
+                              struct pvr_graphics_pipeline *const gfx_pipeline)
+{
+   /* FIXME: Remove this hard coding. */
+   const struct pvr_explicit_constant_usage explicit_const_usage = {
+      .start_offset = 16,
+   };
+
+   const VkPipelineVertexInputStateCreateInfo *const vertex_input_state =
+      pCreateInfo->pVertexInputState;
+
+   const uint32_t cache_line_size =
+      rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
+   struct rogue_compiler *compiler = device->pdevice->compiler;
+   struct rogue_build_ctx *ctx;
+   VkResult result;
+
+   /* Compile the USC shaders. */
+
+   /* Setup shared build context. */
+   ctx = rogue_create_build_context(compiler);
+   if (!ctx)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   /* NIR middle-end translation. */
+   for (gl_shader_stage stage = MESA_SHADER_FRAGMENT; stage > MESA_SHADER_NONE;
+        stage--) {
+      const VkPipelineShaderStageCreateInfo *create_info;
+      size_t stage_index = gfx_pipeline->stage_indices[stage];
+
+      /* Skip unused/inactive stages. */
+      if (stage_index == ~0)
+         continue;
+
+      create_info = &pCreateInfo->pStages[stage_index];
+
+      /* SPIR-V to NIR. */
+      ctx->nir[stage] = pvr_spirv_to_nir(ctx, stage, create_info);
+      if (!ctx->nir[stage]) {
+         ralloc_free(ctx);
+         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
+   }
+
+   /* Pre-back-end analysis and optimization, driver data extraction. */
+   /* TODO: Analyze and cull unused I/O between stages. */
+   /* TODO: Allocate UBOs between stages;
+    * pipeline->layout->set_{count,layout}.
+    */
+
+   /* Back-end translation. */
+   for (gl_shader_stage stage = MESA_SHADER_FRAGMENT; stage > MESA_SHADER_NONE;
+        stage--) {
+      if (!ctx->nir[stage])
+         continue;
+
+      ctx->rogue[stage] = pvr_nir_to_rogue(ctx, ctx->nir[stage]);
+      if (!ctx->rogue[stage]) {
+         ralloc_free(ctx);
+         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
+
+      ctx->binary[stage] = pvr_rogue_to_binary(ctx, ctx->rogue[stage]);
+      if (!ctx->binary[stage]) {
+         ralloc_free(ctx);
+         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
+   }
+
+   pvr_vertex_state_init(gfx_pipeline,
+                         &ctx->common_data[MESA_SHADER_VERTEX],
+                         &ctx->stage_data.vs);
+
+   result = pvr_gpu_upload_usc(device,
+                               ctx->binary[MESA_SHADER_VERTEX]->data,
+                               ctx->binary[MESA_SHADER_VERTEX]->size,
+                               cache_line_size,
+                               &gfx_pipeline->vertex_shader_state.bo);
+   if (result != VK_SUCCESS)
+      goto err_free_build_context;
+
+   pvr_fragment_state_init(gfx_pipeline,
+                           &ctx->common_data[MESA_SHADER_FRAGMENT]);
+
+   result = pvr_gpu_upload_usc(device,
+                               ctx->binary[MESA_SHADER_FRAGMENT]->data,
+                               ctx->binary[MESA_SHADER_FRAGMENT]->size,
+                               cache_line_size,
+                               &gfx_pipeline->fragment_shader_state.bo);
+   if (result != VK_SUCCESS)
+      goto err_free_vertex_bo;
+
+   /* TODO: powervr has an optimization where it attempts to recompile shaders.
+    * See PipelineCompileNoISPFeedbackFragmentStage. Unimplemented since in our
+    * case the optimization doesn't happen.
+    */
+
+   /* TODO: The programs we use are hard coded for now, but these should be
+    * selected dynamically.
+    */
+
+   result = pvr_pds_coeff_program_create_and_upload(
+      device,
+      allocator,
+      ctx->stage_data.fs.iterator_args.fpu_iterators,
+      ctx->stage_data.fs.iterator_args.num_fpu_iterators,
+      ctx->stage_data.fs.iterator_args.destination,
+      &gfx_pipeline->fragment_shader_state.pds_coeff_program);
+   if (result != VK_SUCCESS)
+      goto err_free_fragment_bo;
+
+   result = pvr_pds_fragment_program_create_and_upload(
+      device,
+      allocator,
+      gfx_pipeline->fragment_shader_state.bo,
+      ctx->common_data[MESA_SHADER_FRAGMENT].temps,
+      ctx->stage_data.fs.msaa_mode,
+      ctx->stage_data.fs.phas,
+      &gfx_pipeline->fragment_shader_state.pds_fragment_program);
+   if (result != VK_SUCCESS)
+      goto err_free_coeff_program;
+
+   result = pvr_pds_vertex_attrib_programs_create_and_upload(
+      device,
+      allocator,
+      vertex_input_state,
+      ctx->common_data[MESA_SHADER_VERTEX].temps,
+      &ctx->stage_data.vs,
+      &gfx_pipeline->vertex_shader_state.pds_attrib_programs);
+   if (result != VK_SUCCESS)
+      goto err_free_frag_program;
+
+   result = pvr_pds_uniform_program_create_and_upload(
+      device,
+      allocator,
+      &ctx->common_data[MESA_SHADER_VERTEX].ubo_data,
+      &explicit_const_usage,
+      gfx_pipeline->base.layout,
+      PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY,
+      &gfx_pipeline->vertex_shader_state.uniform_state.pds_code,
+      &gfx_pipeline->vertex_shader_state.uniform_state.pds_info);
+   if (result != VK_SUCCESS)
+      goto err_free_vertex_attrib_program;
+
+   /* FIXME: When the temp_buffer_total_size is non-zero we need to allocate a
+    * scratch buffer for both vertex and fragment stage.
+    * Figure out the best place to do this.
+    */
+   /* assert(pvr_pds_uniform_program_variables.temp_buff_total_size == 0); */
+   /* TODO: Implement spilling with the above. */
+
+   /* TODO: Call pvr_pds_uniform_program_create_and_upload in a loop. */
+   /* FIXME: For now we pass in the same explicit_const_usage since it contains
+    * all invalid entries. Fix this by hooking it up to the compiler.
+    */
+   result = pvr_pds_uniform_program_create_and_upload(
+      device,
+      allocator,
+      &ctx->common_data[MESA_SHADER_FRAGMENT].ubo_data,
+      &explicit_const_usage,
+      gfx_pipeline->base.layout,
+      PVR_STAGE_ALLOCATION_FRAGMENT,
+      &gfx_pipeline->fragment_shader_state.uniform_state.pds_code,
+      &gfx_pipeline->fragment_shader_state.uniform_state.pds_info);
+   if (result != VK_SUCCESS)
+      goto err_free_vertex_uniform_program;
+
+   ralloc_free(ctx);
+
+   return VK_SUCCESS;
+
+err_free_vertex_uniform_program:
+   pvr_pds_uniform_program_destroy(
+      device,
+      allocator,
+      &gfx_pipeline->vertex_shader_state.uniform_state.pds_code,
+      &gfx_pipeline->vertex_shader_state.uniform_state.pds_info);
+err_free_vertex_attrib_program:
+   for (uint32_t i = 0;
+        i < ARRAY_SIZE(gfx_pipeline->vertex_shader_state.pds_attrib_programs);
+        i++) {
+      struct pvr_pds_attrib_program *const attrib_program =
+         &gfx_pipeline->vertex_shader_state.pds_attrib_programs[i];
+
+      pvr_pds_vertex_attrib_program_destroy(device, allocator, attrib_program);
+   }
+err_free_frag_program:
+   pvr_bo_free(device,
+               gfx_pipeline->fragment_shader_state.pds_fragment_program.pvr_bo);
+err_free_coeff_program:
+   pvr_bo_free(device,
+               gfx_pipeline->fragment_shader_state.pds_coeff_program.pvr_bo);
+err_free_fragment_bo:
+   pvr_bo_free(device, gfx_pipeline->fragment_shader_state.bo);
+err_free_vertex_bo:
+   pvr_bo_free(device, gfx_pipeline->vertex_shader_state.bo);
+err_free_build_context:
+   ralloc_free(ctx);
+   return result;
+}
+
+static void pvr_graphics_pipeline_init_depth_and_stencil_state(
+   struct pvr_graphics_pipeline *gfx_pipeline,
+   const VkPipelineDepthStencilStateCreateInfo *depth_stencil_state)
+{
+   const VkStencilOpState *front;
+   const VkStencilOpState *back;
+
+   if (!depth_stencil_state)
+      return;
+
+   front = &depth_stencil_state->front;
+   back = &depth_stencil_state->back;
+
+   if (depth_stencil_state->depthTestEnable) {
+      gfx_pipeline->depth_compare_op = depth_stencil_state->depthCompareOp;
+      gfx_pipeline->depth_write_disable =
+         !depth_stencil_state->depthWriteEnable;
+   } else {
+      gfx_pipeline->depth_compare_op = VK_COMPARE_OP_ALWAYS;
+      gfx_pipeline->depth_write_disable = true;
+   }
+
+   if (depth_stencil_state->stencilTestEnable) {
+      gfx_pipeline->stencil_front.compare_op = front->compareOp;
+      gfx_pipeline->stencil_front.fail_op = front->failOp;
+      gfx_pipeline->stencil_front.depth_fail_op = front->depthFailOp;
+      gfx_pipeline->stencil_front.pass_op = front->passOp;
+
+      gfx_pipeline->stencil_back.compare_op = back->compareOp;
+      gfx_pipeline->stencil_back.fail_op = back->failOp;
+      gfx_pipeline->stencil_back.depth_fail_op = back->depthFailOp;
+      gfx_pipeline->stencil_back.pass_op = back->passOp;
+   } else {
+      gfx_pipeline->stencil_front.compare_op = VK_COMPARE_OP_ALWAYS;
+      gfx_pipeline->stencil_front.fail_op = VK_STENCIL_OP_KEEP;
+      gfx_pipeline->stencil_front.depth_fail_op = VK_STENCIL_OP_KEEP;
+      gfx_pipeline->stencil_front.pass_op = VK_STENCIL_OP_KEEP;
+
+      gfx_pipeline->stencil_back = gfx_pipeline->stencil_front;
+   }
+}
+
+static void pvr_graphics_pipeline_init_dynamic_state(
+   struct pvr_graphics_pipeline *gfx_pipeline,
+   const VkPipelineDynamicStateCreateInfo *dynamic_state,
+   const VkPipelineViewportStateCreateInfo *viewport_state,
+   const VkPipelineDepthStencilStateCreateInfo *depth_stencil_state,
+   const VkPipelineColorBlendStateCreateInfo *color_blend_state,
+   const VkPipelineRasterizationStateCreateInfo *rasterization_state)
+{
+   struct pvr_dynamic_state *const internal_dynamic_state =
+      &gfx_pipeline->dynamic_state;
+   uint32_t dynamic_states = 0;
+
+   if (dynamic_state) {
+      for (uint32_t i = 0; i < dynamic_state->dynamicStateCount; i++) {
+         dynamic_states |=
+            pvr_dynamic_state_bit_from_vk(dynamic_state->pDynamicStates[i]);
+      }
+   }
+
+   /* TODO: Verify this.
+    * We don't zero out the pipeline's state if they are dynamic since they
+    * should be set later on in the command buffer.
+    */
+
+   /* TODO: Handle rasterizerDiscardEnable. */
+
+   if (rasterization_state) {
+      if (!(dynamic_states & PVR_DYNAMIC_STATE_BIT_LINE_WIDTH))
+         internal_dynamic_state->line_width = rasterization_state->lineWidth;
+
+      /* TODO: Do we need the depthBiasEnable check? */
+      if (!(dynamic_states & PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS)) {
+         internal_dynamic_state->depth_bias.constant_factor =
+            rasterization_state->depthBiasConstantFactor;
+         internal_dynamic_state->depth_bias.clamp =
+            rasterization_state->depthBiasClamp;
+         internal_dynamic_state->depth_bias.slope_factor =
+            rasterization_state->depthBiasSlopeFactor;
+      }
+   }
+
+   /* TODO: handle viewport state flags. */
+
+   /* TODO: handle static viewport state. */
+   /* We assume the viewport state to by dynamic for now. */
+
+   /* TODO: handle static scissor state. */
+   /* We assume the scissor state to by dynamic for now. */
+
+   if (depth_stencil_state) {
+      const VkStencilOpState *const front = &depth_stencil_state->front;
+      const VkStencilOpState *const back = &depth_stencil_state->back;
+
+      /* VkPhysicalDeviceFeatures->depthBounds is false. */
+      assert(depth_stencil_state->depthBoundsTestEnable == VK_FALSE);
+
+      if (!(dynamic_states & PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK)) {
+         internal_dynamic_state->compare_mask.front = front->compareMask;
+         internal_dynamic_state->compare_mask.back = back->compareMask;
+      }
+
+      if (!(dynamic_states & PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK)) {
+         internal_dynamic_state->write_mask.front = front->writeMask;
+         internal_dynamic_state->write_mask.back = back->writeMask;
+      }
+
+      if (!(dynamic_states & PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE)) {
+         internal_dynamic_state->reference.front = front->reference;
+         internal_dynamic_state->reference.back = back->reference;
+      }
+   }
+
+   if (color_blend_state &&
+       !(dynamic_states & PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS)) {
+      STATIC_ASSERT(__same_type(internal_dynamic_state->blend_constants,
+                                color_blend_state->blendConstants));
+
+      typed_memcpy(internal_dynamic_state->blend_constants,
+                   color_blend_state->blendConstants,
+                   ARRAY_SIZE(internal_dynamic_state->blend_constants));
+   }
+
+   /* TODO: handle STATIC_STATE_DEPTH_BOUNDS ? */
+
+   internal_dynamic_state->mask = dynamic_states;
+}
+
+static VkResult
+pvr_graphics_pipeline_init(struct pvr_device *device,
+                           struct pvr_pipeline_cache *pipeline_cache,
+                           const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                           const VkAllocationCallbacks *allocator,
+                           struct pvr_graphics_pipeline *gfx_pipeline)
+{
+   /* If rasterization is not enabled, various CreateInfo structs must be
+    * ignored.
+    */
+   const bool raster_discard_enabled =
+      pCreateInfo->pRasterizationState->rasterizerDiscardEnable;
+   const VkPipelineViewportStateCreateInfo *vs_info =
+      !raster_discard_enabled ? pCreateInfo->pViewportState : NULL;
+   const VkPipelineDepthStencilStateCreateInfo *dss_info =
+      !raster_discard_enabled ? pCreateInfo->pDepthStencilState : NULL;
+   const VkPipelineRasterizationStateCreateInfo *rs_info =
+      !raster_discard_enabled ? pCreateInfo->pRasterizationState : NULL;
+   const VkPipelineColorBlendStateCreateInfo *cbs_info =
+      !raster_discard_enabled ? pCreateInfo->pColorBlendState : NULL;
+   const VkPipelineMultisampleStateCreateInfo *ms_info =
+      !raster_discard_enabled ? pCreateInfo->pMultisampleState : NULL;
+   VkResult result;
+
+   pvr_pipeline_init(device, PVR_PIPELINE_TYPE_GRAPHICS, &gfx_pipeline->base);
+
+   pvr_finishme("ignoring pCreateInfo flags.");
+   pvr_finishme("ignoring pipeline cache.");
+
+   gfx_pipeline->raster_state.discard_enable = raster_discard_enabled;
+   gfx_pipeline->raster_state.cull_mode =
+      pCreateInfo->pRasterizationState->cullMode;
+   gfx_pipeline->raster_state.front_face =
+      pCreateInfo->pRasterizationState->frontFace;
+   gfx_pipeline->raster_state.depth_bias_enable =
+      pCreateInfo->pRasterizationState->depthBiasEnable;
+   gfx_pipeline->raster_state.depth_clamp_enable =
+      pCreateInfo->pRasterizationState->depthClampEnable;
+
+   /* FIXME: Handle depthClampEnable. */
+
+   pvr_graphics_pipeline_init_depth_and_stencil_state(gfx_pipeline, dss_info);
+   pvr_graphics_pipeline_init_dynamic_state(gfx_pipeline,
+                                            pCreateInfo->pDynamicState,
+                                            vs_info,
+                                            dss_info,
+                                            cbs_info,
+                                            rs_info);
+
+   if (pCreateInfo->pInputAssemblyState) {
+      gfx_pipeline->input_asm_state.topology =
+         pCreateInfo->pInputAssemblyState->topology;
+      gfx_pipeline->input_asm_state.primitive_restart =
+         pCreateInfo->pInputAssemblyState->primitiveRestartEnable;
+   }
+
+   memset(gfx_pipeline->stage_indices, ~0, sizeof(gfx_pipeline->stage_indices));
+
+   for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {
+      VkShaderStageFlagBits vk_stage = pCreateInfo->pStages[i].stage;
+      gl_shader_stage gl_stage = vk_to_mesa_shader_stage(vk_stage);
+      /* From the Vulkan 1.2.192 spec for VkPipelineShaderStageCreateInfo:
+       *
+       *    "stage must not be VK_SHADER_STAGE_ALL_GRAPHICS,
+       *    or VK_SHADER_STAGE_ALL."
+       *
+       * So we don't handle that.
+       *
+       * We also don't handle VK_SHADER_STAGE_TESSELLATION_* and
+       * VK_SHADER_STAGE_GEOMETRY_BIT stages as 'tessellationShader' and
+       * 'geometryShader' are set to false in the VkPhysicalDeviceFeatures
+       * structure returned by the driver.
+       */
+      switch (pCreateInfo->pStages[i].stage) {
+      case VK_SHADER_STAGE_VERTEX_BIT:
+      case VK_SHADER_STAGE_FRAGMENT_BIT:
+         gfx_pipeline->stage_indices[gl_stage] = i;
+         break;
+      default:
+         unreachable("Unsupported stage.");
+      }
+   }
+
+   gfx_pipeline->base.layout =
+      pvr_pipeline_layout_from_handle(pCreateInfo->layout);
+
+   if (ms_info) {
+      gfx_pipeline->rasterization_samples = ms_info->rasterizationSamples;
+      gfx_pipeline->sample_mask =
+         (ms_info->pSampleMask) ? ms_info->pSampleMask[0] : 0xFFFFFFFF;
+   } else {
+      gfx_pipeline->rasterization_samples = VK_SAMPLE_COUNT_1_BIT;
+      gfx_pipeline->sample_mask = 0xFFFFFFFF;
+   }
+
+   /* Compiles and uploads shaders and PDS programs. */
+   result = pvr_graphics_pipeline_compile(device,
+                                          pipeline_cache,
+                                          pCreateInfo,
+                                          allocator,
+                                          gfx_pipeline);
+   if (result != VK_SUCCESS) {
+      pvr_pipeline_finish(&gfx_pipeline->base);
+      return result;
+   }
+
+   return VK_SUCCESS;
+}
+
+/* If allocator == NULL, the internal one will be used. */
+static VkResult
+pvr_graphics_pipeline_create(struct pvr_device *device,
+                             struct pvr_pipeline_cache *pipeline_cache,
+                             const VkGraphicsPipelineCreateInfo *pCreateInfo,
+                             const VkAllocationCallbacks *allocator,
+                             VkPipeline *const pipeline_out)
+{
+   struct pvr_graphics_pipeline *gfx_pipeline;
+   VkResult result;
+
+   gfx_pipeline = vk_zalloc2(&device->vk.alloc,
+                             allocator,
+                             sizeof(*gfx_pipeline),
+                             8,
+                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!gfx_pipeline)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   /* Compiles and uploads shaders and PDS programs too. */
+   result = pvr_graphics_pipeline_init(device,
+                                       pipeline_cache,
+                                       pCreateInfo,
+                                       allocator,
+                                       gfx_pipeline);
+   if (result != VK_SUCCESS) {
+      vk_free2(&device->vk.alloc, allocator, gfx_pipeline);
+      return result;
+   }
+
+   *pipeline_out = pvr_pipeline_to_handle(&gfx_pipeline->base);
+
+   return VK_SUCCESS;
+}
+
+VkResult
+pvr_CreateGraphicsPipelines(VkDevice _device,
+                            VkPipelineCache pipelineCache,
+                            uint32_t createInfoCount,
+                            const VkGraphicsPipelineCreateInfo *pCreateInfos,
+                            const VkAllocationCallbacks *pAllocator,
+                            VkPipeline *pPipelines)
+{
+   PVR_FROM_HANDLE(pvr_pipeline_cache, pipeline_cache, pipelineCache);
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   VkResult result = VK_SUCCESS;
+
+   for (uint32_t i = 0; i < createInfoCount; i++) {
+      const VkResult local_result =
+         pvr_graphics_pipeline_create(device,
+                                      pipeline_cache,
+                                      &pCreateInfos[i],
+                                      pAllocator,
+                                      &pPipelines[i]);
+      if (local_result != VK_SUCCESS) {
+         result = local_result;
+         pPipelines[i] = VK_NULL_HANDLE;
+      }
+   }
+
+   return result;
+}
+
+/*****************************************************************************
+   Other functions
+*****************************************************************************/
+
+void pvr_DestroyPipeline(VkDevice _device,
+                         VkPipeline _pipeline,
+                         const VkAllocationCallbacks *pAllocator)
+{
+   PVR_FROM_HANDLE(pvr_pipeline, pipeline, _pipeline);
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+
+   if (!pipeline)
+      return;
+
+   switch (pipeline->type) {
+   case PVR_PIPELINE_TYPE_GRAPHICS: {
+      struct pvr_graphics_pipeline *const gfx_pipeline =
+         to_pvr_graphics_pipeline(pipeline);
+
+      pvr_graphics_pipeline_destroy(device, pAllocator, gfx_pipeline);
+      break;
+   }
+
+   case PVR_PIPELINE_TYPE_COMPUTE: {
+      struct pvr_compute_pipeline *const compute_pipeline =
+         to_pvr_compute_pipeline(pipeline);
+
+      pvr_compute_pipeline_destroy(device, pAllocator, compute_pipeline);
+      break;
+   }
+
+   default:
+      unreachable("Unknown pipeline type.");
+   }
+}
diff --git a/src/imagination/vulkan/pvr_pipeline_cache.c b/src/imagination/vulkan/pvr_pipeline_cache.c
new file mode 100644 (file)
index 0000000..fb203c4
--- /dev/null
@@ -0,0 +1,155 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <string.h>
+
+#include "pvr_device_info.h"
+#include "pvr_private.h"
+#include "util/blob.h"
+#include "vk_log.h"
+#include "vk_object.h"
+#include "vulkan/util/vk_util.h"
+
+static void pvr_pipeline_cache_load(struct pvr_pipeline_cache *cache,
+                                    const void *data,
+                                    size_t size)
+{
+   struct pvr_device *device = cache->device;
+   struct pvr_physical_device *pdevice = device->pdevice;
+   struct vk_pipeline_cache_header header;
+   struct blob_reader blob;
+
+   blob_reader_init(&blob, data, size);
+
+   blob_copy_bytes(&blob, &header, sizeof(header));
+   if (blob.overrun)
+      return;
+
+   if (header.header_size < sizeof(header))
+      return;
+   if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
+      return;
+   if (header.vendor_id != VK_VENDOR_ID_IMAGINATION)
+      return;
+   if (header.device_id != pdevice->dev_info.ident.device_id)
+      return;
+   if (memcmp(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE) != 0)
+      return;
+
+   /* TODO: There isn't currently any cached data so there's nothing to load
+    * at this point. Once there is something to load then load it now.
+    */
+}
+
+VkResult pvr_CreatePipelineCache(VkDevice _device,
+                                 const VkPipelineCacheCreateInfo *pCreateInfo,
+                                 const VkAllocationCallbacks *pAllocator,
+                                 VkPipelineCache *pPipelineCache)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   struct pvr_pipeline_cache *cache;
+
+   assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
+   assert(pCreateInfo->flags == 0);
+
+   cache = vk_object_alloc(&device->vk,
+                           pAllocator,
+                           sizeof(*cache),
+                           VK_OBJECT_TYPE_PIPELINE_CACHE);
+   if (!cache)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   cache->device = device;
+
+   if (pCreateInfo->initialDataSize > 0) {
+      pvr_pipeline_cache_load(cache,
+                              pCreateInfo->pInitialData,
+                              pCreateInfo->initialDataSize);
+   }
+
+   *pPipelineCache = pvr_pipeline_cache_to_handle(cache);
+
+   return VK_SUCCESS;
+}
+
+void pvr_DestroyPipelineCache(VkDevice _device,
+                              VkPipelineCache _cache,
+                              const VkAllocationCallbacks *pAllocator)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_pipeline_cache, cache, _cache);
+
+   if (!cache)
+      return;
+
+   vk_object_free(&device->vk, pAllocator, cache);
+}
+
+VkResult pvr_GetPipelineCacheData(VkDevice _device,
+                                  VkPipelineCache _cache,
+                                  size_t *pDataSize,
+                                  void *pData)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   struct pvr_physical_device *pdevice = device->pdevice;
+   struct blob blob;
+
+   if (pData)
+      blob_init_fixed(&blob, pData, *pDataSize);
+   else
+      blob_init_fixed(&blob, NULL, SIZE_MAX);
+
+   struct vk_pipeline_cache_header header = {
+      .header_size = sizeof(struct vk_pipeline_cache_header),
+      .header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE,
+      .vendor_id = VK_VENDOR_ID_IMAGINATION,
+      .device_id = pdevice->dev_info.ident.device_id,
+   };
+   memcpy(header.uuid, pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
+   blob_write_bytes(&blob, &header, sizeof(header));
+
+   /* TODO: Once there's some data to cache then this should be written to
+    * 'blob'.
+    */
+
+   *pDataSize = blob.size;
+
+   blob_finish(&blob);
+
+   return VK_SUCCESS;
+}
+
+VkResult pvr_MergePipelineCaches(VkDevice _device,
+                                 VkPipelineCache destCache,
+                                 uint32_t srcCacheCount,
+                                 const VkPipelineCache *pSrcCaches)
+{
+   /* TODO: Once there's some data to cache then this will need to be able to
+    * merge caches together.
+    */
+
+   return VK_SUCCESS;
+}
diff --git a/src/imagination/vulkan/pvr_private.h b/src/imagination/vulkan/pvr_private.h
new file mode 100644 (file)
index 0000000..8e681d8
--- /dev/null
@@ -0,0 +1,1427 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on anv driver which is:
+ * Copyright © 2015 Intel Corporation
+ *
+ * based in part on radv driver which is:
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_PRIVATE_H
+#define PVR_PRIVATE_H
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "compiler/shader_enums.h"
+#include "hwdef/rogue_hw_defs.h"
+#include "pvr_csb.h"
+#include "pvr_device_info.h"
+#include "pvr_entrypoints.h"
+#include "pvr_hw_pass.h"
+#include "pvr_job_render.h"
+#include "pvr_limits.h"
+#include "pvr_pds.h"
+#include "pvr_winsys.h"
+#include "rogue/rogue.h"
+#include "util/bitscan.h"
+#include "util/format/u_format.h"
+#include "util/log.h"
+#include "util/macros.h"
+#include "util/u_dynarray.h"
+#include "vk_command_buffer.h"
+#include "vk_device.h"
+#include "vk_image.h"
+#include "vk_instance.h"
+#include "vk_log.h"
+#include "vk_physical_device.h"
+#include "vk_queue.h"
+#include "wsi_common.h"
+
+#ifdef HAVE_VALGRIND
+#   include <valgrind/valgrind.h>
+#   include <valgrind/memcheck.h>
+#   define VG(x) x
+#else
+#   define VG(x) ((void)0)
+#endif
+
+#define VK_VENDOR_ID_IMAGINATION 0x1010
+
+#define PVR_STATE_PBE_DWORDS 2U
+
+#define PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT \
+   (uint32_t)(VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT + 1U)
+
+/* TODO: move into a common surface library? */
+enum pvr_memlayout {
+   PVR_MEMLAYOUT_UNDEFINED = 0, /* explicitly treat 0 as undefined */
+   PVR_MEMLAYOUT_LINEAR,
+   PVR_MEMLAYOUT_TWIDDLED,
+   PVR_MEMLAYOUT_3DTWIDDLED,
+};
+
+enum pvr_cmd_buffer_status {
+   PVR_CMD_BUFFER_STATUS_INVALID = 0, /* explicitly treat 0 as invalid */
+   PVR_CMD_BUFFER_STATUS_INITIAL,
+   PVR_CMD_BUFFER_STATUS_RECORDING,
+   PVR_CMD_BUFFER_STATUS_EXECUTABLE,
+};
+
+enum pvr_texture_state {
+   PVR_TEXTURE_STATE_SAMPLE,
+   PVR_TEXTURE_STATE_STORAGE,
+   PVR_TEXTURE_STATE_ATTACHMENT,
+   PVR_TEXTURE_STATE_MAX_ENUM,
+};
+
+enum pvr_sub_cmd_type {
+   PVR_SUB_CMD_TYPE_INVALID = 0, /* explicitly treat 0 as invalid */
+   PVR_SUB_CMD_TYPE_GRAPHICS,
+   PVR_SUB_CMD_TYPE_COMPUTE,
+   PVR_SUB_CMD_TYPE_TRANSFER,
+};
+
+enum pvr_depth_stencil_usage {
+   PVR_DEPTH_STENCIL_USAGE_UNDEFINED = 0, /* explicitly treat 0 as undefined */
+   PVR_DEPTH_STENCIL_USAGE_NEEDED,
+   PVR_DEPTH_STENCIL_USAGE_NEVER,
+};
+
+enum pvr_job_type {
+   PVR_JOB_TYPE_GEOM,
+   PVR_JOB_TYPE_FRAG,
+   PVR_JOB_TYPE_COMPUTE,
+   PVR_JOB_TYPE_TRANSFER,
+   PVR_JOB_TYPE_MAX
+};
+
+enum pvr_pipeline_type {
+   PVR_PIPELINE_TYPE_INVALID = 0, /* explicitly treat 0 as undefined */
+   PVR_PIPELINE_TYPE_GRAPHICS,
+   PVR_PIPELINE_TYPE_COMPUTE,
+};
+
+enum pvr_pipeline_stage_bits {
+   PVR_PIPELINE_STAGE_GEOM_BIT = BITFIELD_BIT(PVR_JOB_TYPE_GEOM),
+   PVR_PIPELINE_STAGE_FRAG_BIT = BITFIELD_BIT(PVR_JOB_TYPE_FRAG),
+   PVR_PIPELINE_STAGE_COMPUTE_BIT = BITFIELD_BIT(PVR_JOB_TYPE_COMPUTE),
+   PVR_PIPELINE_STAGE_TRANSFER_BIT = BITFIELD_BIT(PVR_JOB_TYPE_TRANSFER),
+};
+
+#define PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS \
+   (PVR_PIPELINE_STAGE_GEOM_BIT | PVR_PIPELINE_STAGE_FRAG_BIT)
+
+#define PVR_PIPELINE_STAGE_ALL_BITS \
+   (PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS | PVR_PIPELINE_STAGE_TRANSFER_BIT)
+
+/* TODO: This number must be changed when we add compute support. */
+#define PVR_NUM_SYNC_PIPELINE_STAGES 3U
+
+/* Warning: Do not define an invalid stage as 0 since other code relies on 0
+ * being the first shader stage. This allows for stages to be split or added
+ * in the future. Defining 0 as invalid will very likely cause problems.
+ */
+enum pvr_stage_allocation {
+   PVR_STAGE_ALLOCATION_VERTEX_GEOMETRY,
+   PVR_STAGE_ALLOCATION_FRAGMENT,
+   PVR_STAGE_ALLOCATION_COMPUTE,
+   PVR_STAGE_ALLOCATION_COUNT
+};
+
+/* Scissor accumulation state defines
+ *  - Disabled means that a clear has been detected, and scissor accumulation
+ *    should stop.
+ *  - Check for clear is when there's no clear loadops, but there could be
+ *    another clear call that would be broken via scissoring
+ *  - Enabled means that a scissor has been set in the pipeline, and
+ *    accumulation can continue
+ */
+enum pvr_scissor_accum_state {
+   PVR_SCISSOR_ACCUM_INVALID = 0, /* Explicitly treat 0 as invalid */
+   PVR_SCISSOR_ACCUM_DISABLED,
+   PVR_SCISSOR_ACCUM_CHECK_FOR_CLEAR,
+   PVR_SCISSOR_ACCUM_ENABLED,
+};
+
+struct pvr_bo;
+struct pvr_compute_ctx;
+struct pvr_compute_pipeline;
+struct pvr_free_list;
+struct pvr_graphics_pipeline;
+struct pvr_instance;
+struct pvr_render_ctx;
+struct rogue_compiler;
+
+struct pvr_descriptor_limits {
+   uint32_t max_per_stage_resources;
+   uint32_t max_per_stage_samplers;
+   uint32_t max_per_stage_uniform_buffers;
+   uint32_t max_per_stage_storage_buffers;
+   uint32_t max_per_stage_sampled_images;
+   uint32_t max_per_stage_storage_images;
+   uint32_t max_per_stage_input_attachments;
+};
+
+struct pvr_physical_device {
+   struct vk_physical_device vk;
+
+   /* Back-pointer to instance */
+   struct pvr_instance *instance;
+
+   char *name;
+   int master_fd;
+   int render_fd;
+   char *master_path;
+   char *render_path;
+
+   struct pvr_winsys *ws;
+   struct pvr_device_info dev_info;
+
+   VkPhysicalDeviceMemoryProperties memory;
+
+   uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
+
+   struct wsi_device wsi_device;
+
+   struct rogue_compiler *compiler;
+};
+
+struct pvr_instance {
+   struct vk_instance vk;
+
+   int physical_devices_count;
+   struct pvr_physical_device physical_device;
+};
+
+struct pvr_queue {
+   struct vk_queue vk;
+
+   struct pvr_device *device;
+
+   struct pvr_render_ctx *gfx_ctx;
+   struct pvr_compute_ctx *compute_ctx;
+
+   struct pvr_winsys_syncobj *completion[PVR_JOB_TYPE_MAX];
+};
+
+struct pvr_semaphore {
+   struct vk_object_base base;
+
+   struct pvr_winsys_syncobj *syncobj;
+};
+
+struct pvr_fence {
+   struct vk_object_base base;
+
+   struct pvr_winsys_syncobj *syncobj;
+};
+
+struct pvr_vertex_binding {
+   struct pvr_buffer *buffer;
+   VkDeviceSize offset;
+};
+
+struct pvr_pds_upload {
+   struct pvr_bo *pvr_bo;
+   /* Offset from the pds heap base address. */
+   uint32_t data_offset;
+   /* Offset from the pds heap base address. */
+   uint32_t code_offset;
+
+   /* data_size + code_size = program_size. */
+   uint32_t data_size;
+   uint32_t code_size;
+};
+
+struct pvr_device {
+   struct vk_device vk;
+   struct pvr_instance *instance;
+   struct pvr_physical_device *pdevice;
+
+   int master_fd;
+   int render_fd;
+
+   struct pvr_winsys *ws;
+   struct pvr_winsys_heaps heaps;
+
+   struct pvr_free_list *global_free_list;
+
+   struct pvr_queue *queues;
+   uint32_t queue_count;
+
+   /* Running count of the number of job submissions across all queue. */
+   uint32_t global_queue_job_count;
+
+   /* Running count of the number of presentations across all queues. */
+   uint32_t global_queue_present_count;
+
+   uint32_t pixel_event_data_size_in_dwords;
+
+   struct pvr_pds_upload pds_compute_fence_program;
+
+   VkPhysicalDeviceFeatures features;
+};
+
+struct pvr_device_memory {
+   struct vk_object_base base;
+   struct pvr_winsys_bo *bo;
+};
+
+struct pvr_mip_level {
+   /* Offset of the mip level in bytes */
+   uint32_t offset;
+
+   /* Aligned mip level size in bytes */
+   uint32_t size;
+
+   /* Aligned row length in bytes */
+   uint32_t pitch;
+
+   /* Aligned height in bytes */
+   uint32_t height_pitch;
+};
+
+struct pvr_image {
+   struct vk_image vk;
+
+   /* vma this image is bound to */
+   struct pvr_winsys_vma *vma;
+
+   /* Device address the image is mapped to in device virtual address space */
+   pvr_dev_addr_t dev_addr;
+
+   /* Derived and other state */
+   VkExtent3D physical_extent;
+   enum pvr_memlayout memlayout;
+   VkDeviceSize layer_size;
+   VkDeviceSize size;
+
+   VkDeviceSize alignment;
+
+   struct pvr_mip_level mip_levels[14];
+};
+
+struct pvr_buffer {
+   struct vk_object_base base;
+
+   /* Saved information from pCreateInfo */
+   VkDeviceSize size;
+
+   /* Derived and other state */
+   uint32_t alignment;
+   /* vma this buffer is bound to */
+   struct pvr_winsys_vma *vma;
+   /* Device address the buffer is mapped to in device virtual address space */
+   pvr_dev_addr_t dev_addr;
+};
+
+struct pvr_image_view {
+   struct vk_image_view vk;
+
+   /* Saved information from pCreateInfo. */
+   const struct pvr_image *image;
+
+   /* Prepacked Texture Image dword 0 and 1. It will be copied to the
+    * descriptor info during pvr_UpdateDescriptorSets.
+    *
+    * We create separate texture states for sampling, storage and input
+    * attachment cases.
+    */
+   uint64_t texture_state[PVR_TEXTURE_STATE_MAX_ENUM][2];
+};
+
+struct pvr_sampler {
+   struct vk_object_base base;
+};
+
+struct pvr_descriptor_size_info {
+   /* Non-spillable size for storage in the common store. */
+   uint32_t primary;
+
+   /* Spillable size to accommodate limitation of the common store. */
+   uint32_t secondary;
+
+   uint32_t alignment;
+};
+
+struct pvr_descriptor_set_layout_binding {
+   VkDescriptorType type;
+
+   /* "M" in layout(set = N, binding = M)
+    * Can be used to index bindings in the descriptor_set_layout. Not the
+    * original user specified binding number as those might be non-contiguous.
+    */
+   uint32_t binding_number;
+
+   uint32_t descriptor_count;
+
+   /* Index into the flattened descriptor set */
+   uint16_t descriptor_index;
+
+   VkShaderStageFlags shader_stages;
+   /* Mask composed by shifted PVR_STAGE_ALLOCATION_...
+    * Makes it easier to check active shader stages by just shifting and
+    * ANDing instead of using VkShaderStageFlags and match the PVR_STAGE_...
+    */
+   uint32_t shader_stage_mask;
+
+   struct {
+      uint32_t primary;
+      uint32_t secondary;
+   } per_stage_offset_in_dwords[PVR_STAGE_ALLOCATION_COUNT];
+
+   /* Index at which the samplers can be found in the descriptor_set_layout.
+    * 0 when the samplers are at index 0 or no samplers are present.
+    * Check descriptor_count to differentiate. It will be 0 for 0 samplers.
+    */
+   uint32_t immutable_samplers_index;
+};
+
+/* All sizes are in dwords. */
+struct pvr_descriptor_set_layout_mem_layout {
+   uint32_t primary_offset;
+   uint32_t primary_size;
+
+   uint32_t secondary_offset;
+   uint32_t secondary_size;
+
+   uint32_t primary_dynamic_size;
+   uint32_t secondary_dynamic_size;
+};
+
+struct pvr_descriptor_set_layout {
+   struct vk_object_base base;
+
+   /* Total amount of descriptors contained in this set. */
+   uint32_t descriptor_count;
+
+   /* Count of dynamic buffers. */
+   uint32_t dynamic_buffer_count;
+
+   uint32_t binding_count;
+   struct pvr_descriptor_set_layout_binding *bindings;
+
+   uint32_t immutable_sampler_count;
+   struct pvr_sampler **immutable_samplers;
+
+   /* Shader stages requiring access to descriptors in this set. */
+   VkShaderStageFlags shader_stages;
+
+   /* Count of each VkDescriptorType per shader stage. Dynamically allocated
+    * arrays per stage as to not hard code the max descriptor type here.
+    *
+    * Note: when adding a new type, it might not numerically follow the
+    * previous type so a sparse array will be created. You might want to
+    * readjust how these arrays are created and accessed.
+    */
+   uint32_t *per_stage_descriptor_count[PVR_STAGE_ALLOCATION_COUNT];
+
+   uint32_t total_size_in_dwords;
+   struct pvr_descriptor_set_layout_mem_layout
+      memory_layout_in_dwords_per_stage[PVR_STAGE_ALLOCATION_COUNT];
+};
+
+struct pvr_descriptor_pool {
+   struct vk_object_base base;
+
+   VkAllocationCallbacks alloc;
+
+   /* Saved information from pCreateInfo. */
+   uint32_t max_sets;
+
+   uint32_t total_size_in_dwords;
+   uint32_t current_size_in_dwords;
+
+   /* Derived and other state. */
+   /* List of the descriptor sets created using this pool. */
+   struct list_head descriptor_sets;
+};
+
+struct pvr_descriptor {
+   VkDescriptorType type;
+
+   /* TODO: Follow anv_descriptor layout when adding support for
+    * other descriptor types.
+    */
+   pvr_dev_addr_t buffer_dev_addr;
+   VkDeviceSize buffer_desc_range;
+   VkDeviceSize buffer_create_info_size;
+};
+
+struct pvr_descriptor_set {
+   struct vk_object_base base;
+
+   const struct pvr_descriptor_set_layout *layout;
+   const struct pvr_descriptor_pool *pool;
+
+   struct pvr_bo *pvr_bo;
+
+   /* Links this descriptor set into pvr_descriptor_pool::descriptor_sets list.
+    */
+   struct list_head link;
+
+   /* Array of size layout::descriptor_count. */
+   struct pvr_descriptor descriptors[0];
+};
+
+struct pvr_descriptor_state {
+   struct pvr_descriptor_set *descriptor_sets[PVR_MAX_DESCRIPTOR_SETS];
+   uint32_t valid_mask;
+};
+
+struct pvr_transfer_cmd {
+   /* Node to link this cmd into the transfer_cmds list in
+    * pvr_sub_cmd::transfer structure.
+    */
+   struct list_head link;
+
+   struct pvr_buffer *src;
+   struct pvr_buffer *dst;
+   uint32_t region_count;
+   VkBufferCopy2 regions[0];
+};
+
+struct pvr_sub_cmd {
+   /* This links the subcommand in pvr_cmd_buffer:sub_cmds list. */
+   struct list_head link;
+
+   enum pvr_sub_cmd_type type;
+
+   union {
+      struct {
+         const struct pvr_framebuffer *framebuffer;
+
+         struct pvr_render_job job;
+
+         struct pvr_bo *depth_bias_bo;
+         struct pvr_bo *scissor_bo;
+
+         /* Tracking how the loaded depth/stencil values are being used. */
+         enum pvr_depth_stencil_usage depth_usage;
+         enum pvr_depth_stencil_usage stencil_usage;
+
+         /* Tracking whether the subcommand modifies depth/stencil. */
+         bool modifies_depth;
+         bool modifies_stencil;
+
+         /* Control stream builder object */
+         struct pvr_csb control_stream;
+
+         uint32_t hw_render_idx;
+
+         uint32_t max_tiles_in_flight;
+
+         bool empty_cmd;
+
+         /* True if any fragment shader used in this sub command uses atomic
+          * operations.
+          */
+         bool frag_uses_atomic_ops;
+
+         bool disable_compute_overlap;
+
+         /* True if any fragment shader used in this sub command has side
+          * effects.
+          */
+         bool frag_has_side_effects;
+
+         /* True if any vertex shader used in this sub command contains both
+          * texture reads and texture writes.
+          */
+         bool vertex_uses_texture_rw;
+
+         /* True if any fragment shader used in this sub command contains
+          * both texture reads and texture writes.
+          */
+         bool frag_uses_texture_rw;
+      } gfx;
+
+      struct {
+         /* Control stream builder object. */
+         struct pvr_csb control_stream;
+
+         struct pvr_winsys_compute_submit_info submit_info;
+
+         uint32_t num_shared_regs;
+
+         /* True if any shader used in this sub command uses atomic
+          * operations.
+          */
+         bool uses_atomic_ops;
+
+         bool uses_barrier;
+      } compute;
+
+      struct {
+         /* List of pvr_transfer_cmd type structures. */
+         struct list_head transfer_cmds;
+      } transfer;
+   };
+};
+
+struct pvr_render_pass_info {
+   const struct pvr_render_pass *pass;
+   struct pvr_framebuffer *framebuffer;
+
+   struct pvr_image_view **attachments;
+
+   uint32_t subpass_idx;
+   uint32_t current_hw_subpass;
+
+   VkRect2D render_area;
+
+   uint32_t clear_value_count;
+   VkClearValue *clear_values;
+
+   VkPipelineBindPoint pipeline_bind_point;
+
+   bool process_empty_tiles;
+   bool enable_bg_tag;
+   uint32_t userpass_spawn;
+
+   /* Have we had to scissor a depth/stencil clear because render area was not
+    * tile aligned?
+    */
+   bool scissor_ds_clear;
+};
+
+struct pvr_emit_state {
+   bool ppp_control : 1;
+   bool isp : 1;
+   bool isp_fb : 1;
+   bool isp_ba : 1;
+   bool isp_bb : 1;
+   bool isp_dbsc : 1;
+   bool pds_fragment_stateptr0 : 1;
+   bool pds_fragment_stateptr1 : 1;
+   bool pds_fragment_stateptr2 : 1;
+   bool pds_fragment_stateptr3 : 1;
+   bool region_clip : 1;
+   bool viewport : 1;
+   bool wclamp : 1;
+   bool output_selects : 1;
+   bool varying_word0 : 1;
+   bool varying_word1 : 1;
+   bool varying_word2 : 1;
+   bool stream_out : 1;
+};
+
+struct pvr_ppp_state {
+   uint32_t header;
+
+   struct {
+      /* TODO: Can we get rid of the "control" field? */
+      struct pvr_cmd_struct(TA_STATE_ISPCTL) control_struct;
+      uint32_t control;
+
+      uint32_t front_a;
+      uint32_t front_b;
+      uint32_t back_a;
+      uint32_t back_b;
+   } isp;
+
+   struct {
+      uint16_t scissor_index;
+      uint16_t depthbias_index;
+   } depthbias_scissor_indices;
+
+   struct {
+      uint32_t pixel_shader_base;
+      uint32_t texture_uniform_code_base;
+      uint32_t size_info1;
+      uint32_t size_info2;
+      uint32_t varying_base;
+      uint32_t texture_state_data_base;
+      uint32_t uniform_state_data_base;
+   } pds;
+
+   struct {
+      uint32_t word0;
+      uint32_t word1;
+   } region_clipping;
+
+   struct {
+      uint32_t a0;
+      uint32_t m0;
+      uint32_t a1;
+      uint32_t m1;
+      uint32_t a2;
+      uint32_t m2;
+   } viewports[PVR_MAX_VIEWPORTS];
+
+   uint32_t viewport_count;
+
+   uint32_t output_selects;
+
+   uint32_t varying_word[2];
+
+   uint32_t ppp_control;
+};
+
+#define PVR_DYNAMIC_STATE_BIT_VIEWPORT BITFIELD_BIT(0U)
+#define PVR_DYNAMIC_STATE_BIT_SCISSOR BITFIELD_BIT(1U)
+#define PVR_DYNAMIC_STATE_BIT_LINE_WIDTH BITFIELD_BIT(2U)
+#define PVR_DYNAMIC_STATE_BIT_DEPTH_BIAS BITFIELD_BIT(3U)
+#define PVR_DYNAMIC_STATE_BIT_STENCIL_COMPARE_MASK BITFIELD_BIT(4U)
+#define PVR_DYNAMIC_STATE_BIT_STENCIL_WRITE_MASK BITFIELD_BIT(5U)
+#define PVR_DYNAMIC_STATE_BIT_STENCIL_REFERENCE BITFIELD_BIT(6U)
+#define PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS BITFIELD_BIT(7U)
+
+#define PVR_DYNAMIC_STATE_ALL_BITS \
+   ((PVR_DYNAMIC_STATE_BIT_BLEND_CONSTANTS << 1U) - 1U)
+
+struct pvr_dynamic_state {
+   /* Identifies which pipeline state is static or dynamic.
+    * To test for dynamic: & PVR_STATE_BITS_...
+    */
+   uint32_t mask;
+
+   struct {
+      /* TODO: fixme in the original code - figure out what. */
+      uint32_t count;
+      VkViewport viewports[PVR_MAX_VIEWPORTS];
+   } viewport;
+
+   struct {
+      /* TODO: fixme in the original code - figure out what. */
+      uint32_t count;
+      VkRect2D scissors[PVR_MAX_VIEWPORTS];
+   } scissor;
+
+   /* Saved information from pCreateInfo. */
+   float line_width;
+
+   struct {
+      /* Saved information from pCreateInfo. */
+      float constant_factor;
+      float clamp;
+      float slope_factor;
+   } depth_bias;
+   float blend_constants[4];
+   struct {
+      uint32_t front;
+      uint32_t back;
+   } compare_mask;
+   struct {
+      uint32_t front;
+      uint32_t back;
+   } write_mask;
+   struct {
+      uint32_t front;
+      uint32_t back;
+   } reference;
+};
+
+struct pvr_cmd_buffer_draw_state {
+   uint32_t base_instance;
+   uint32_t base_vertex;
+   bool draw_indirect;
+   bool draw_indexed;
+};
+
+struct pvr_cmd_buffer_state {
+   VkResult status;
+
+   /* Pipeline binding. */
+   const struct pvr_graphics_pipeline *gfx_pipeline;
+
+   const struct pvr_compute_pipeline *compute_pipeline;
+
+   struct pvr_render_pass_info render_pass_info;
+
+   struct pvr_sub_cmd *current_sub_cmd;
+
+   struct pvr_ppp_state ppp_state;
+
+   union {
+      struct pvr_emit_state emit_state;
+      /* This is intended to allow setting and clearing of all bits. This
+       * shouldn't be used to access specific bits of ppp_state.
+       */
+      uint32_t emit_state_bits;
+   };
+
+   struct {
+      /* FIXME: Check if we need a dirty state flag for the given scissor
+       * accumulation state.
+       * Check whether these members should be moved in the top level struct
+       * and this struct replaces with just pvr_dynamic_state "dynamic".
+       */
+      enum pvr_scissor_accum_state scissor_accum_state;
+      VkRect2D scissor_accum_bounds;
+
+      struct pvr_dynamic_state common;
+   } dynamic;
+
+   struct pvr_vertex_binding vertex_bindings[PVR_MAX_VERTEX_INPUT_BINDINGS];
+
+   struct {
+      struct pvr_buffer *buffer;
+      VkDeviceSize offset;
+      VkIndexType type;
+   } index_buffer_binding;
+
+   struct {
+      uint8_t data[PVR_MAX_PUSH_CONSTANTS_SIZE];
+      VkShaderStageFlags dirty_stages;
+   } push_constants;
+
+   /* Array size of barriers_needed is based on number of sync pipeline
+    * stages.
+    */
+   uint32_t barriers_needed[4];
+
+   struct pvr_descriptor_state gfx_desc_state;
+   struct pvr_descriptor_state compute_desc_state;
+
+   VkFormat depth_format;
+
+   struct {
+      bool viewport : 1;
+      bool scissor : 1;
+
+      bool compute_pipeline_binding : 1;
+      bool compute_desc_dirty : 1;
+
+      bool gfx_pipeline_binding : 1;
+      bool gfx_desc_dirty : 1;
+
+      bool vertex_bindings : 1;
+      bool index_buffer_binding : 1;
+      bool vertex_descriptors : 1;
+      bool fragment_descriptors : 1;
+
+      bool line_width : 1;
+
+      bool depth_bias : 1;
+
+      bool blend_constants : 1;
+
+      bool compare_mask : 1;
+      bool write_mask : 1;
+      bool reference : 1;
+
+      bool userpass_spawn : 1;
+
+      /* Some draw state needs to be tracked for changes between draw calls
+       * i.e. if we get a draw with baseInstance=0, followed by a call with
+       * baseInstance=1 that needs to cause us to select a different PDS
+       * attrib program and update the BASE_INSTANCE PDS const. If only
+       * baseInstance changes then we just have to update the data section.
+       */
+      bool draw_base_instance : 1;
+      bool draw_variant : 1;
+   } dirty;
+
+   struct pvr_cmd_buffer_draw_state draw_state;
+
+   struct {
+      uint32_t code_offset;
+      const struct pvr_pds_info *info;
+   } pds_shader;
+
+   uint32_t max_shared_regs;
+
+   /* Address of data segment for vertex attrib upload program. */
+   uint32_t pds_vertex_attrib_offset;
+
+   uint32_t pds_fragment_uniform_data_offset;
+};
+
+static_assert(
+   sizeof(((struct pvr_cmd_buffer_state *)(0))->emit_state) <=
+      sizeof(((struct pvr_cmd_buffer_state *)(0))->emit_state_bits),
+   "Size of emit_state_bits must be greater that or equal to emit_state.");
+
+struct pvr_cmd_buffer {
+   struct vk_command_buffer vk;
+
+   struct pvr_device *device;
+
+   /* Buffer status, invalid/initial/recording/executable */
+   enum pvr_cmd_buffer_status status;
+
+   /* Buffer usage flags */
+   VkCommandBufferUsageFlags usage_flags;
+
+   struct util_dynarray depth_bias_array;
+
+   struct util_dynarray scissor_array;
+   uint32_t scissor_words[2];
+
+   struct pvr_cmd_buffer_state state;
+
+   /* List of pvr_bo structs associated with this cmd buffer. */
+   struct list_head bo_list;
+
+   struct list_head sub_cmds;
+};
+
+struct pvr_pipeline_layout {
+   struct vk_object_base base;
+
+   uint32_t set_count;
+   /* Contains set_count amount of descriptor set layouts. */
+   struct pvr_descriptor_set_layout *set_layout[PVR_MAX_DESCRIPTOR_SETS];
+
+   VkShaderStageFlags push_constants_shader_stages;
+
+   VkShaderStageFlags shader_stages;
+
+   /* Per stage masks indicating which set in the layout contains any
+    * descriptor of the appropriate types: VK..._{SAMPLER, SAMPLED_IMAGE,
+    * UNIFORM_TEXEL_BUFFER, UNIFORM_BUFFER, STORAGE_BUFFER}.
+    * Shift by the set's number to check the mask (1U << set_num).
+    */
+   uint32_t per_stage_descriptor_masks[PVR_STAGE_ALLOCATION_COUNT];
+
+   /* Array of descriptor offsets at which the set's descriptors' start, per
+    * stage, within all the sets in the pipeline layout per descriptor type.
+    * Note that we only store into for specific descriptor types
+    * VK_DESCRIPTOR_TYPE_{SAMPLER, SAMPLED_IMAGE, UNIFORM_TEXEL_BUFFER,
+    * UNIFORM_BUFFER, STORAGE_BUFFER}, the rest will be 0.
+    */
+   uint32_t
+      descriptor_offsets[PVR_MAX_DESCRIPTOR_SETS][PVR_STAGE_ALLOCATION_COUNT]
+                        [PVR_PIPELINE_LAYOUT_SUPPORTED_DESCRIPTOR_TYPE_COUNT];
+
+   /* There is no accounting for dynamics in here. They will be garbage values.
+    */
+   struct pvr_descriptor_set_layout_mem_layout
+      register_layout_in_dwords_per_stage[PVR_STAGE_ALLOCATION_COUNT]
+                                         [PVR_MAX_DESCRIPTOR_SETS];
+
+   /* All sizes in dwords. */
+   struct pvr_pipeline_layout_reg_info {
+      uint32_t primary_dynamic_size_in_dwords;
+      uint32_t secondary_dynamic_size_in_dwords;
+   } per_stage_reg_info[PVR_STAGE_ALLOCATION_COUNT];
+};
+
+struct pvr_pipeline_cache {
+   struct vk_object_base base;
+
+   struct pvr_device *device;
+};
+
+struct pvr_stage_allocation_uniform_state {
+   struct pvr_pds_upload pds_code;
+   /* Since we upload the code segment separately from the data segment
+    * pds_code->data_size might be 0 whilst
+    * pds_info->data_size_in_dwords might be >0 in the case of this struct
+    * referring to the code upload.
+    */
+   struct pvr_pds_info pds_info;
+};
+
+struct pvr_pds_attrib_program {
+   struct pvr_pds_info info;
+   /* The uploaded PDS program stored here only contains the code segment,
+    * meaning the data size will be 0, unlike the data size stored in the
+    * 'info' member above.
+    */
+   struct pvr_pds_upload program;
+};
+
+struct pvr_pipeline_stage_state {
+   uint32_t const_shared_reg_count;
+   uint32_t const_shared_reg_offset;
+   uint32_t temps_count;
+
+   uint32_t coefficient_size;
+
+   /* True if this shader uses any atomic operations. */
+   bool uses_atomic_ops;
+
+   /* True if this shader uses both texture reads and texture writes. */
+   bool uses_texture_rw;
+
+   /* Only used for compute stage. */
+   bool uses_barrier;
+
+   /* True if this shader has side effects */
+   bool has_side_effects;
+
+   /* True if this shader is simply a nop.end. */
+   bool empty_program;
+};
+
+struct pvr_vertex_shader_state {
+   /* Pointer to a buffer object that contains the shader binary. */
+   struct pvr_bo *bo;
+   uint32_t entry_offset;
+
+   /* 2 since we only need STATE_VARYING{0,1} state words. */
+   uint32_t varying[2];
+
+   struct pvr_pds_attrib_program
+      pds_attrib_programs[PVR_PDS_VERTEX_ATTRIB_PROGRAM_COUNT];
+
+   struct pvr_pipeline_stage_state stage_state;
+   /* FIXME: Move this into stage_state? */
+   struct pvr_stage_allocation_uniform_state uniform_state;
+   uint32_t vertex_input_size;
+   uint32_t vertex_output_size;
+   uint32_t output_selects;
+   uint32_t user_clip_planes_mask;
+};
+
+struct pvr_fragment_shader_state {
+   /* Pointer to a buffer object that contains the shader binary. */
+   struct pvr_bo *bo;
+   uint32_t entry_offset;
+
+   struct pvr_pipeline_stage_state stage_state;
+   /* FIXME: Move this into stage_state? */
+   struct pvr_stage_allocation_uniform_state uniform_state;
+   uint32_t pass_type;
+
+   struct pvr_pds_upload pds_coeff_program;
+   struct pvr_pds_upload pds_fragment_program;
+};
+
+struct pvr_pipeline {
+   struct vk_object_base base;
+
+   enum pvr_pipeline_type type;
+
+   /* Saved information from pCreateInfo. */
+   struct pvr_pipeline_layout *layout;
+};
+
+struct pvr_compute_pipeline {
+   struct pvr_pipeline base;
+
+   struct {
+      /* Pointer to a buffer object that contains the shader binary. */
+      struct pvr_bo *bo;
+
+      struct {
+         uint32_t base_workgroup : 1;
+      } flags;
+
+      struct pvr_stage_allocation_uniform_state uniform;
+
+      struct pvr_pds_upload primary_program;
+      struct pvr_pds_info primary_program_info;
+
+      struct pvr_pds_upload primary_program_base_workgroup_variant;
+      struct pvr_pds_info primary_program_base_workgroup_variant_info;
+      /* Offset within the PDS data section at which the base workgroup id
+       * resides.
+       */
+      uint32_t base_workgroup_ids_dword_offset;
+   } state;
+};
+
+struct pvr_graphics_pipeline {
+   struct pvr_pipeline base;
+
+   VkSampleCountFlagBits rasterization_samples;
+   struct pvr_raster_state {
+      /* Derived and other state. */
+      /* Indicates whether primitives are discarded immediately before the
+       * rasterization stage.
+       */
+      bool discard_enable;
+      VkCullModeFlags cull_mode;
+      VkFrontFace front_face;
+      bool depth_bias_enable;
+      bool depth_clamp_enable;
+   } raster_state;
+   struct {
+      VkPrimitiveTopology topology;
+      bool primitive_restart;
+   } input_asm_state;
+   uint32_t sample_mask;
+
+   struct pvr_dynamic_state dynamic_state;
+
+   VkCompareOp depth_compare_op;
+   bool depth_write_disable;
+
+   struct {
+      VkCompareOp compare_op;
+      /* SOP1 */
+      VkStencilOp fail_op;
+      /* SOP2 */
+      VkStencilOp depth_fail_op;
+      /* SOP3 */
+      VkStencilOp pass_op;
+   } stencil_front, stencil_back;
+
+   /* Derived and other state */
+   size_t stage_indices[MESA_SHADER_FRAGMENT + 1];
+
+   struct pvr_vertex_shader_state vertex_shader_state;
+   struct pvr_fragment_shader_state fragment_shader_state;
+};
+
+struct pvr_render_target {
+   struct pvr_rt_dataset *rt_dataset;
+
+   pthread_mutex_t mutex;
+
+   bool valid;
+};
+
+struct pvr_framebuffer {
+   struct vk_object_base base;
+
+   /* Saved information from pCreateInfo. */
+   uint32_t width;
+   uint32_t height;
+   uint32_t layers;
+
+   uint32_t attachment_count;
+   struct pvr_image_view **attachments;
+
+   /* Derived and other state. */
+   struct pvr_bo *ppp_state_bo;
+   /* PPP state size in dwords. */
+   size_t ppp_state_size;
+
+   uint32_t render_targets_count;
+   struct pvr_render_target *render_targets;
+};
+
+struct pvr_render_pass_attachment {
+   /* Saved information from pCreateInfo. */
+   VkAttachmentLoadOp load_op;
+
+   VkAttachmentStoreOp store_op;
+
+   VkAttachmentLoadOp stencil_load_op;
+
+   VkAttachmentStoreOp stencil_store_op;
+
+   VkFormat vk_format;
+   uint32_t sample_count;
+   VkImageLayout initial_layout;
+
+   /*  Derived and other state. */
+   /* True if the attachment format includes a stencil component. */
+   bool has_stencil;
+
+   /* Can this surface be resolved by the PBE. */
+   bool is_pbe_downscalable;
+
+   uint32_t index;
+};
+
+struct pvr_render_subpass {
+   /* Saved information from pCreateInfo. */
+   /* The number of samples per color attachment (or depth attachment if
+    * z-only).
+    */
+   /* FIXME: rename to 'samples' to match struct pvr_image */
+   uint32_t sample_count;
+
+   uint32_t color_count;
+   uint32_t *color_attachments;
+   uint32_t *resolve_attachments;
+
+   uint32_t input_count;
+   uint32_t *input_attachments;
+
+   uint32_t *depth_stencil_attachment;
+
+   /*  Derived and other state. */
+   uint32_t dep_count;
+   uint32_t *dep_list;
+
+   /* Array with dep_count elements. flush_on_dep[x] is true if this subpass
+    * and the subpass dep_list[x] can't be in the same hardware render.
+    */
+   bool *flush_on_dep;
+
+   uint32_t index;
+
+   uint32_t userpass_spawn;
+
+   VkPipelineBindPoint pipeline_bind_point;
+};
+
+struct pvr_render_pass {
+   struct vk_object_base base;
+
+   /* Saved information from pCreateInfo. */
+   uint32_t attachment_count;
+
+   struct pvr_render_pass_attachment *attachments;
+
+   uint32_t subpass_count;
+
+   struct pvr_render_subpass *subpasses;
+
+   struct pvr_renderpass_hwsetup *hw_setup;
+
+   /*  Derived and other state. */
+   /* FIXME: rename to 'max_samples' as we use 'samples' elsewhere */
+   uint32_t max_sample_count;
+
+   /* The maximum number of tile buffers to use in any subpass. */
+   uint32_t max_tilebuffer_count;
+};
+
+struct pvr_load_op {
+   bool is_hw_object;
+
+   uint32_t clear_mask;
+
+   struct pvr_bo *usc_frag_prog_bo;
+   uint32_t const_shareds_count;
+   uint32_t shareds_dest_offset;
+   uint32_t shareds_count;
+
+   struct pvr_pds_upload pds_frag_prog;
+
+   struct pvr_pds_upload pds_tex_state_prog;
+   uint32_t temps_count;
+};
+
+VkResult pvr_wsi_init(struct pvr_physical_device *pdevice);
+void pvr_wsi_finish(struct pvr_physical_device *pdevice);
+
+VkResult pvr_queues_create(struct pvr_device *device,
+                           const VkDeviceCreateInfo *pCreateInfo);
+void pvr_queues_destroy(struct pvr_device *device);
+
+VkResult pvr_bind_memory(struct pvr_device *device,
+                         struct pvr_device_memory *mem,
+                         VkDeviceSize offset,
+                         VkDeviceSize size,
+                         VkDeviceSize alignment,
+                         struct pvr_winsys_vma **const vma_out,
+                         pvr_dev_addr_t *const dev_addr_out);
+void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma);
+
+VkResult pvr_gpu_upload(struct pvr_device *device,
+                        struct pvr_winsys_heap *heap,
+                        const void *data,
+                        size_t size,
+                        uint64_t alignment,
+                        struct pvr_bo **const pvr_bo_out);
+VkResult pvr_gpu_upload_pds(struct pvr_device *device,
+                            const uint32_t *data,
+                            uint32_t data_size_dwords,
+                            uint32_t data_alignment,
+                            const uint32_t *code,
+                            uint32_t code_size_dwords,
+                            uint32_t code_alignment,
+                            uint64_t min_alignment,
+                            struct pvr_pds_upload *const pds_upload_out);
+
+VkResult pvr_gpu_upload_usc(struct pvr_device *device,
+                            const void *code,
+                            size_t code_size,
+                            uint64_t code_alignment,
+                            struct pvr_bo **const pvr_bo_out);
+
+VkResult pvr_cmd_buffer_add_transfer_cmd(struct pvr_cmd_buffer *cmd_buffer,
+                                         struct pvr_transfer_cmd *transfer_cmd);
+
+VkResult pvr_cmd_buffer_alloc_mem(struct pvr_cmd_buffer *cmd_buffer,
+                                  struct pvr_winsys_heap *heap,
+                                  uint64_t size,
+                                  uint32_t flags,
+                                  struct pvr_bo **const pvr_bo_out);
+
+static inline struct pvr_compute_pipeline *
+to_pvr_compute_pipeline(struct pvr_pipeline *pipeline)
+{
+   assert(pipeline->type == PVR_PIPELINE_TYPE_COMPUTE);
+   return container_of(pipeline, struct pvr_compute_pipeline, base);
+}
+
+static inline struct pvr_graphics_pipeline *
+to_pvr_graphics_pipeline(struct pvr_pipeline *pipeline)
+{
+   assert(pipeline->type == PVR_PIPELINE_TYPE_GRAPHICS);
+   return container_of(pipeline, struct pvr_graphics_pipeline, base);
+}
+
+/* FIXME: Place this in USC specific header? */
+/* clang-format off */
+static inline enum PVRX(PDSINST_DOUTU_SAMPLE_RATE)
+pvr_sample_rate_from_usc_msaa_mode(enum rogue_msaa_mode msaa_mode)
+/* clang-format on */
+{
+   switch (msaa_mode) {
+   case ROGUE_MSAA_MODE_PIXEL:
+      return PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE);
+   case ROGUE_MSAA_MODE_SELECTIVE:
+      return PVRX(PDSINST_DOUTU_SAMPLE_RATE_SELECTIVE);
+   case ROGUE_MSAA_MODE_FULL:
+      return PVRX(PDSINST_DOUTU_SAMPLE_RATE_FULL);
+   default:
+      unreachable("Undefined MSAA mode.");
+   }
+}
+
+VkResult pvr_pds_fragment_program_create_and_upload(
+   struct pvr_device *device,
+   const VkAllocationCallbacks *allocator,
+   const struct pvr_bo *fragment_shader_bo,
+   uint32_t fragment_temp_count,
+   enum rogue_msaa_mode msaa_mode,
+   bool has_phase_rate_change,
+   struct pvr_pds_upload *const pds_upload_out);
+
+#define PVR_FROM_HANDLE(__pvr_type, __name, __handle) \
+   VK_FROM_HANDLE(__pvr_type, __name, __handle)
+
+VK_DEFINE_HANDLE_CASTS(pvr_cmd_buffer,
+                       vk.base,
+                       VkCommandBuffer,
+                       VK_OBJECT_TYPE_COMMAND_BUFFER)
+VK_DEFINE_HANDLE_CASTS(pvr_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
+VK_DEFINE_HANDLE_CASTS(pvr_instance,
+                       vk.base,
+                       VkInstance,
+                       VK_OBJECT_TYPE_INSTANCE)
+VK_DEFINE_HANDLE_CASTS(pvr_physical_device,
+                       vk.base,
+                       VkPhysicalDevice,
+                       VK_OBJECT_TYPE_PHYSICAL_DEVICE)
+VK_DEFINE_HANDLE_CASTS(pvr_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
+
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_device_memory,
+                               base,
+                               VkDeviceMemory,
+                               VK_OBJECT_TYPE_DEVICE_MEMORY)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_image, vk.base, VkImage, VK_OBJECT_TYPE_IMAGE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline_cache,
+                               base,
+                               VkPipelineCache,
+                               VK_OBJECT_TYPE_PIPELINE_CACHE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_buffer, base, VkBuffer, VK_OBJECT_TYPE_BUFFER)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_image_view,
+                               vk.base,
+                               VkImageView,
+                               VK_OBJECT_TYPE_IMAGE_VIEW)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_set_layout,
+                               base,
+                               VkDescriptorSetLayout,
+                               VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_set,
+                               base,
+                               VkDescriptorSet,
+                               VK_OBJECT_TYPE_DESCRIPTOR_SET)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_descriptor_pool,
+                               base,
+                               VkDescriptorPool,
+                               VK_OBJECT_TYPE_DESCRIPTOR_POOL)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_sampler,
+                               base,
+                               VkSampler,
+                               VK_OBJECT_TYPE_SAMPLER)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_semaphore,
+                               base,
+                               VkSemaphore,
+                               VK_OBJECT_TYPE_SEMAPHORE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_fence, base, VkFence, VK_OBJECT_TYPE_FENCE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline_layout,
+                               base,
+                               VkPipelineLayout,
+                               VK_OBJECT_TYPE_PIPELINE_LAYOUT)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_pipeline,
+                               base,
+                               VkPipeline,
+                               VK_OBJECT_TYPE_PIPELINE)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_framebuffer,
+                               base,
+                               VkFramebuffer,
+                               VK_OBJECT_TYPE_FRAMEBUFFER)
+VK_DEFINE_NONDISP_HANDLE_CASTS(pvr_render_pass,
+                               base,
+                               VkRenderPass,
+                               VK_OBJECT_TYPE_RENDER_PASS)
+
+/**
+ * Warn on ignored extension structs.
+ *
+ * The Vulkan spec requires us to ignore unsupported or unknown structs in
+ * a pNext chain. In debug mode, emitting warnings for ignored structs may
+ * help us discover structs that we should not have ignored.
+ *
+ *
+ * From the Vulkan 1.0.38 spec:
+ *
+ *    Any component of the implementation (the loader, any enabled layers,
+ *    and drivers) must skip over, without processing (other than reading the
+ *    sType and pNext members) any chained structures with sType values not
+ *    defined by extensions supported by that component.
+ */
+#define pvr_debug_ignored_stype(sType) \
+   mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType))
+
+/* Debug helper macros. */
+#define PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer)         \
+   do {                                                             \
+      struct pvr_cmd_buffer *const _cmd_buffer = (cmd_buffer);      \
+      if (_cmd_buffer->status != PVR_CMD_BUFFER_STATUS_RECORDING) { \
+         vk_errorf(_cmd_buffer,                                     \
+                   VK_ERROR_OUT_OF_DEVICE_MEMORY,                   \
+                   "Command buffer is not in recording state");     \
+         return;                                                    \
+      } else if (_cmd_buffer->state.status < VK_SUCCESS) {          \
+         vk_errorf(_cmd_buffer,                                     \
+                   _cmd_buffer->state.status,                       \
+                   "Skipping function as command buffer has "       \
+                   "previous build error");                         \
+         return;                                                    \
+      }                                                             \
+   } while (0)
+
+/**
+ * Print a FINISHME message, including its source location.
+ */
+#define pvr_finishme(format, ...)              \
+   do {                                        \
+      static bool reported = false;            \
+      if (!reported) {                         \
+         mesa_logw("%s:%d: FINISHME: " format, \
+                   __FILE__,                   \
+                   __LINE__,                   \
+                   ##__VA_ARGS__);             \
+         reported = true;                      \
+      }                                        \
+   } while (false)
+
+/* A non-fatal assert. Useful for debugging. */
+#ifdef DEBUG
+#   define pvr_assert(x)                                           \
+      ({                                                           \
+         if (unlikely(!(x)))                                       \
+            mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
+      })
+#else
+#   define pvr_assert(x)
+#endif
+
+#endif /* PVR_PRIVATE_H */
diff --git a/src/imagination/vulkan/pvr_query.c b/src/imagination/vulkan/pvr_query.c
new file mode 100644 (file)
index 0000000..0c63007
--- /dev/null
@@ -0,0 +1,88 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "pvr_private.h"
+
+VkResult pvr_CreateQueryPool(VkDevice _device,
+                             const VkQueryPoolCreateInfo *pCreateInfo,
+                             const VkAllocationCallbacks *pAllocator,
+                             VkQueryPool *pQueryPool)
+{
+   assert(!"Unimplemented");
+   return VK_SUCCESS;
+}
+
+void pvr_DestroyQueryPool(VkDevice _device,
+                          VkQueryPool queryPool,
+                          const VkAllocationCallbacks *pAllocator)
+{
+   assert(!"Unimplemented");
+}
+
+VkResult pvr_GetQueryPoolResults(VkDevice _device,
+                                 VkQueryPool queryPool,
+                                 uint32_t firstQuery,
+                                 uint32_t queryCount,
+                                 size_t dataSize,
+                                 void *pData,
+                                 VkDeviceSize stride,
+                                 VkQueryResultFlags flags)
+{
+   assert(!"Unimplemented");
+   return VK_SUCCESS;
+}
+
+void pvr_CmdResetQueryPool(VkCommandBuffer commandBuffer,
+                           VkQueryPool queryPool,
+                           uint32_t firstQuery,
+                           uint32_t queryCount)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdCopyQueryPoolResults(VkCommandBuffer commandBuffer,
+                                 VkQueryPool queryPool,
+                                 uint32_t firstQuery,
+                                 uint32_t queryCount,
+                                 VkBuffer dstBuffer,
+                                 VkDeviceSize dstOffset,
+                                 VkDeviceSize stride,
+                                 VkQueryResultFlags flags)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdBeginQuery(VkCommandBuffer commandBuffer,
+                       VkQueryPool queryPool,
+                       uint32_t query,
+                       VkQueryControlFlags flags)
+{
+   assert(!"Unimplemented");
+}
+
+void pvr_CmdEndQuery(VkCommandBuffer commandBuffer,
+                     VkQueryPool queryPool,
+                     uint32_t query)
+{
+   assert(!"Unimplemented");
+}
diff --git a/src/imagination/vulkan/pvr_queue.c b/src/imagination/vulkan/pvr_queue.c
new file mode 100644 (file)
index 0000000..00b7575
--- /dev/null
@@ -0,0 +1,773 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on radv driver which is:
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/**
+ * This file implements VkQueue, VkFence, and VkSemaphore
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_job_compute.h"
+#include "pvr_job_context.h"
+#include "pvr_job_render.h"
+#include "pvr_limits.h"
+#include "pvr_private.h"
+#include "util/macros.h"
+#include "util/u_atomic.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+#include "vk_object.h"
+#include "vk_queue.h"
+#include "vk_util.h"
+
+static VkResult pvr_queue_init(struct pvr_device *device,
+                               struct pvr_queue *queue,
+                               const VkDeviceQueueCreateInfo *pCreateInfo,
+                               uint32_t index_in_family)
+{
+   struct pvr_compute_ctx *compute_ctx;
+   struct pvr_render_ctx *gfx_ctx;
+   VkResult result;
+
+   result =
+      vk_queue_init(&queue->vk, &device->vk, pCreateInfo, index_in_family);
+   if (result != VK_SUCCESS)
+      return result;
+
+   result = pvr_compute_ctx_create(device,
+                                   PVR_WINSYS_CTX_PRIORITY_MEDIUM,
+                                   &compute_ctx);
+   if (result != VK_SUCCESS)
+      goto err_vk_queue_finish;
+
+   result =
+      pvr_render_ctx_create(device, PVR_WINSYS_CTX_PRIORITY_MEDIUM, &gfx_ctx);
+   if (result != VK_SUCCESS)
+      goto err_compute_ctx_destroy;
+
+   queue->device = device;
+   queue->gfx_ctx = gfx_ctx;
+   queue->compute_ctx = compute_ctx;
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++)
+      queue->completion[i] = NULL;
+
+   return VK_SUCCESS;
+
+err_compute_ctx_destroy:
+   pvr_compute_ctx_destroy(compute_ctx);
+
+err_vk_queue_finish:
+   vk_queue_finish(&queue->vk);
+
+   return result;
+}
+
+VkResult pvr_queues_create(struct pvr_device *device,
+                           const VkDeviceCreateInfo *pCreateInfo)
+{
+   VkResult result;
+
+   /* Check requested queue families and queues */
+   assert(pCreateInfo->queueCreateInfoCount == 1);
+   assert(pCreateInfo->pQueueCreateInfos[0].queueFamilyIndex == 0);
+   assert(pCreateInfo->pQueueCreateInfos[0].queueCount <= PVR_MAX_QUEUES);
+
+   const VkDeviceQueueCreateInfo *queue_create = queue_create =
+      &pCreateInfo->pQueueCreateInfos[0];
+
+   device->queues = vk_alloc(&device->vk.alloc,
+                             queue_create->queueCount * sizeof(*device->queues),
+                             8,
+                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!device->queues)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   device->queue_count = 0;
+
+   for (uint32_t i = 0; i < queue_create->queueCount; i++) {
+      result = pvr_queue_init(device, &device->queues[i], queue_create, i);
+      if (result != VK_SUCCESS)
+         goto err_queues_finish;
+
+      device->queue_count++;
+   }
+
+   return VK_SUCCESS;
+
+err_queues_finish:
+   pvr_queues_destroy(device);
+   return result;
+}
+
+static void pvr_queue_finish(struct pvr_queue *queue)
+{
+   for (uint32_t i = 0; i < ARRAY_SIZE(queue->completion); i++) {
+      if (queue->completion[i])
+         queue->device->ws->ops->syncobj_destroy(queue->completion[i]);
+   }
+
+   pvr_render_ctx_destroy(queue->gfx_ctx);
+   pvr_compute_ctx_destroy(queue->compute_ctx);
+
+   vk_queue_finish(&queue->vk);
+}
+
+void pvr_queues_destroy(struct pvr_device *device)
+{
+   for (uint32_t q_idx = 0; q_idx < device->queue_count; q_idx++)
+      pvr_queue_finish(&device->queues[q_idx]);
+
+   vk_free(&device->vk.alloc, device->queues);
+}
+
+VkResult pvr_QueueWaitIdle(VkQueue _queue)
+{
+   PVR_FROM_HANDLE(pvr_queue, queue, _queue);
+
+   return queue->device->ws->ops->syncobjs_wait(queue->device->ws,
+                                                queue->completion,
+                                                ARRAY_SIZE(queue->completion),
+                                                true,
+                                                UINT64_MAX);
+}
+
+VkResult pvr_CreateFence(VkDevice _device,
+                         const VkFenceCreateInfo *pCreateInfo,
+                         const VkAllocationCallbacks *pAllocator,
+                         VkFence *pFence)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   struct pvr_fence *fence;
+   VkResult result;
+
+   fence = vk_object_alloc(&device->vk,
+                           pAllocator,
+                           sizeof(*fence),
+                           VK_OBJECT_TYPE_FENCE);
+   if (!fence)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   /* We don't really need to create a syncobj here unless it's a signaled
+    * fence.
+    */
+   if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
+      result =
+         device->ws->ops->syncobj_create(device->ws, true, &fence->syncobj);
+      if (result != VK_SUCCESS) {
+         vk_object_free(&device->vk, pAllocator, fence);
+         return result;
+      }
+   } else {
+      fence->syncobj = NULL;
+   }
+
+   *pFence = pvr_fence_to_handle(fence);
+
+   return VK_SUCCESS;
+}
+
+void pvr_DestroyFence(VkDevice _device,
+                      VkFence _fence,
+                      const VkAllocationCallbacks *pAllocator)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_fence, fence, _fence);
+
+   if (!fence)
+      return;
+
+   if (fence->syncobj)
+      device->ws->ops->syncobj_destroy(fence->syncobj);
+
+   vk_object_free(&device->vk, pAllocator, fence);
+}
+
+VkResult
+pvr_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
+{
+   struct pvr_winsys_syncobj *syncobjs[fenceCount];
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+
+   for (uint32_t i = 0; i < fenceCount; i++) {
+      PVR_FROM_HANDLE(pvr_fence, fence, pFences[i]);
+
+      syncobjs[i] = fence->syncobj;
+   }
+
+   return device->ws->ops->syncobjs_reset(device->ws, syncobjs, fenceCount);
+}
+
+VkResult pvr_GetFenceStatus(VkDevice _device, VkFence _fence)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_fence, fence, _fence);
+   VkResult result;
+
+   result =
+      device->ws->ops->syncobjs_wait(device->ws, &fence->syncobj, 1U, true, 0U);
+   if (result == VK_TIMEOUT)
+      return VK_NOT_READY;
+
+   return result;
+}
+
+VkResult pvr_WaitForFences(VkDevice _device,
+                           uint32_t fenceCount,
+                           const VkFence *pFences,
+                           VkBool32 waitAll,
+                           uint64_t timeout)
+{
+   struct pvr_winsys_syncobj *syncobjs[fenceCount];
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+
+   for (uint32_t i = 0; i < fenceCount; i++) {
+      PVR_FROM_HANDLE(pvr_fence, fence, pFences[i]);
+
+      syncobjs[i] = fence->syncobj;
+   }
+
+   return device->ws->ops->syncobjs_wait(device->ws,
+                                         syncobjs,
+                                         fenceCount,
+                                         !!waitAll,
+                                         timeout);
+}
+
+VkResult pvr_CreateSemaphore(VkDevice _device,
+                             const VkSemaphoreCreateInfo *pCreateInfo,
+                             const VkAllocationCallbacks *pAllocator,
+                             VkSemaphore *pSemaphore)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   struct pvr_semaphore *semaphore;
+
+   semaphore = vk_object_alloc(&device->vk,
+                               pAllocator,
+                               sizeof(*semaphore),
+                               VK_OBJECT_TYPE_SEMAPHORE);
+   if (!semaphore)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   semaphore->syncobj = NULL;
+
+   *pSemaphore = pvr_semaphore_to_handle(semaphore);
+
+   return VK_SUCCESS;
+}
+
+void pvr_DestroySemaphore(VkDevice _device,
+                          VkSemaphore _semaphore,
+                          const VkAllocationCallbacks *pAllocator)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   PVR_FROM_HANDLE(pvr_semaphore, semaphore, _semaphore);
+
+   if (semaphore->syncobj)
+      device->ws->ops->syncobj_destroy(semaphore->syncobj);
+
+   vk_object_free(&device->vk, pAllocator, semaphore);
+}
+
+static enum pvr_pipeline_stage_bits
+pvr_convert_stage_mask(VkPipelineStageFlags stage_mask)
+{
+   enum pvr_pipeline_stage_bits stages = 0;
+
+   if (stage_mask & VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT ||
+       stage_mask & VK_PIPELINE_STAGE_ALL_COMMANDS_BIT) {
+      return PVR_PIPELINE_STAGE_ALL_BITS;
+   }
+
+   if (stage_mask & (VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT))
+      stages |= PVR_PIPELINE_STAGE_ALL_GRAPHICS_BITS;
+
+   if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
+                     VK_PIPELINE_STAGE_VERTEX_INPUT_BIT |
+                     VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
+                     VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
+                     VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
+                     VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) {
+      stages |= PVR_PIPELINE_STAGE_GEOM_BIT;
+   }
+
+   if (stage_mask & (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
+                     VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
+                     VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
+                     VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT)) {
+      stages |= PVR_PIPELINE_STAGE_FRAG_BIT;
+   }
+
+   if (stage_mask & (VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT |
+                     VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT)) {
+      assert(!"Unimplemented");
+   }
+
+   if (stage_mask & (VK_PIPELINE_STAGE_TRANSFER_BIT))
+      stages |= PVR_PIPELINE_STAGE_TRANSFER_BIT;
+
+   return stages;
+}
+
+static VkResult pvr_process_graphics_cmd(
+   struct pvr_device *device,
+   struct pvr_queue *queue,
+   struct pvr_cmd_buffer *cmd_buffer,
+   struct pvr_sub_cmd *sub_cmd,
+   const VkSemaphore *semaphores,
+   uint32_t semaphore_count,
+   uint32_t *stage_flags,
+   struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
+{
+   const struct pvr_framebuffer *framebuffer = sub_cmd->gfx.framebuffer;
+   struct pvr_winsys_syncobj *syncobj_geom = NULL;
+   struct pvr_winsys_syncobj *syncobj_frag = NULL;
+   uint32_t bo_count = 0;
+   VkResult result;
+
+   STACK_ARRAY(struct pvr_winsys_job_bo, bos, framebuffer->attachment_count);
+   if (!bos)
+      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   /* FIXME: DoShadowLoadOrStore() */
+
+   /* FIXME: If the framebuffer being rendered to has multiple layers then we
+    * need to split submissions that run a fragment job into two.
+    */
+   if (sub_cmd->gfx.job.run_frag && framebuffer->layers > 1)
+      pvr_finishme("Split job submission for framebuffers with > 1 layers");
+
+   /* Get any imported buffers used in framebuffer attachments. */
+   for (uint32_t i = 0U; i < framebuffer->attachment_count; i++) {
+      if (!framebuffer->attachments[i]->image->vma->bo->is_imported)
+         continue;
+
+      bos[bo_count].bo = framebuffer->attachments[i]->image->vma->bo;
+      bos[bo_count].flags = PVR_WINSYS_JOB_BO_FLAG_WRITE;
+      bo_count++;
+   }
+
+   /* This passes ownership of the wait fences to pvr_render_job_submit(). */
+   result = pvr_render_job_submit(queue->gfx_ctx,
+                                  &sub_cmd->gfx.job,
+                                  bos,
+                                  bo_count,
+                                  semaphores,
+                                  semaphore_count,
+                                  stage_flags,
+                                  &syncobj_geom,
+                                  &syncobj_frag);
+   STACK_ARRAY_FINISH(bos);
+   if (result != VK_SUCCESS)
+      return result;
+
+   /* Replace the completion fences. */
+   if (syncobj_geom) {
+      if (completions[PVR_JOB_TYPE_GEOM])
+         device->ws->ops->syncobj_destroy(completions[PVR_JOB_TYPE_GEOM]);
+
+      completions[PVR_JOB_TYPE_GEOM] = syncobj_geom;
+   }
+
+   if (syncobj_frag) {
+      if (completions[PVR_JOB_TYPE_FRAG])
+         device->ws->ops->syncobj_destroy(completions[PVR_JOB_TYPE_FRAG]);
+
+      completions[PVR_JOB_TYPE_FRAG] = syncobj_frag;
+   }
+
+   /* FIXME: DoShadowLoadOrStore() */
+
+   return result;
+}
+
+static VkResult pvr_process_compute_cmd(
+   struct pvr_device *device,
+   struct pvr_queue *queue,
+   struct pvr_sub_cmd *sub_cmd,
+   const VkSemaphore *semaphores,
+   uint32_t semaphore_count,
+   uint32_t *stage_flags,
+   struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
+{
+   struct pvr_winsys_syncobj *syncobj = NULL;
+   VkResult result;
+
+   /* This passes ownership of the wait fences to pvr_compute_job_submit(). */
+   result = pvr_compute_job_submit(queue->compute_ctx,
+                                   sub_cmd,
+                                   semaphores,
+                                   semaphore_count,
+                                   stage_flags,
+                                   &syncobj);
+   if (result != VK_SUCCESS)
+      return result;
+
+   /* Replace the completion fences. */
+   if (syncobj) {
+      if (completions[PVR_JOB_TYPE_COMPUTE])
+         device->ws->ops->syncobj_destroy(completions[PVR_JOB_TYPE_COMPUTE]);
+
+      completions[PVR_JOB_TYPE_COMPUTE] = syncobj;
+   }
+
+   return result;
+}
+
+/* FIXME: Implement gpu based transfer support. */
+static VkResult pvr_process_transfer_cmds(
+   struct pvr_device *device,
+   struct pvr_sub_cmd *sub_cmd,
+   const VkSemaphore *semaphores,
+   uint32_t semaphore_count,
+   uint32_t *stage_flags,
+   struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
+{
+   /* Wait for transfer semaphores here before doing any transfers. */
+   for (uint32_t i = 0; i < semaphore_count; i++) {
+      PVR_FROM_HANDLE(pvr_semaphore, sem, semaphores[i]);
+
+      if (sem->syncobj && stage_flags[i] & PVR_PIPELINE_STAGE_TRANSFER_BIT) {
+         VkResult result = device->ws->ops->syncobjs_wait(device->ws,
+                                                          &sem->syncobj,
+                                                          1,
+                                                          true,
+                                                          UINT64_MAX);
+         if (result != VK_SUCCESS)
+            return result;
+
+         stage_flags[i] &= ~PVR_PIPELINE_STAGE_TRANSFER_BIT;
+         if (stage_flags[i] == 0) {
+            device->ws->ops->syncobj_destroy(sem->syncobj);
+            sem->syncobj = NULL;
+         }
+      }
+   }
+
+   list_for_each_entry_safe (struct pvr_transfer_cmd,
+                             transfer_cmd,
+                             &sub_cmd->transfer.transfer_cmds,
+                             link) {
+      bool src_mapped = false;
+      bool dst_mapped = false;
+      void *src_addr;
+      void *dst_addr;
+      void *ret_ptr;
+
+      /* Map if bo is not mapped. */
+      if (!transfer_cmd->src->vma->bo->map) {
+         src_mapped = true;
+         ret_ptr = device->ws->ops->buffer_map(transfer_cmd->src->vma->bo);
+         if (!ret_ptr)
+            return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
+      }
+
+      if (!transfer_cmd->dst->vma->bo->map) {
+         dst_mapped = true;
+         ret_ptr = device->ws->ops->buffer_map(transfer_cmd->dst->vma->bo);
+         if (!ret_ptr)
+            return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
+      }
+
+      src_addr =
+         transfer_cmd->src->vma->bo->map + transfer_cmd->src->vma->bo_offset;
+      dst_addr =
+         transfer_cmd->dst->vma->bo->map + transfer_cmd->dst->vma->bo_offset;
+
+      for (uint32_t i = 0; i < transfer_cmd->region_count; i++) {
+         VkBufferCopy2 *region = &transfer_cmd->regions[i];
+
+         memcpy(dst_addr + region->dstOffset,
+                src_addr + region->srcOffset,
+                region->size);
+      }
+
+      if (src_mapped)
+         device->ws->ops->buffer_unmap(transfer_cmd->src->vma->bo);
+
+      if (dst_mapped)
+         device->ws->ops->buffer_unmap(transfer_cmd->dst->vma->bo);
+   }
+
+   /* Given we are doing CPU based copy, completion fence should always be -1.
+    * This should be fixed when GPU based copy is implemented.
+    */
+   assert(!completions[PVR_JOB_TYPE_TRANSFER]);
+
+   return VK_SUCCESS;
+}
+
+static VkResult pvr_set_semaphore_payloads(
+   struct pvr_device *device,
+   struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX],
+   const VkSemaphore *semaphores,
+   uint32_t semaphore_count)
+{
+   struct pvr_winsys_syncobj *syncobj = NULL;
+   VkResult result;
+
+   if (!semaphore_count)
+      return VK_SUCCESS;
+
+   for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
+      if (completions[i]) {
+         result =
+            device->ws->ops->syncobjs_merge(completions[i], syncobj, &syncobj);
+         if (result != VK_SUCCESS)
+            goto err_destroy_syncobj;
+      }
+   }
+
+   for (uint32_t i = 0; i < semaphore_count; i++) {
+      PVR_FROM_HANDLE(pvr_semaphore, semaphore, semaphores[i]);
+      struct pvr_winsys_syncobj *dup_signal_fence;
+
+      /* Duplicate signal_fence and store it in each signal semaphore. */
+      result =
+         device->ws->ops->syncobjs_merge(syncobj, NULL, &dup_signal_fence);
+      if (result != VK_SUCCESS)
+         goto err_destroy_syncobj;
+
+      if (semaphore->syncobj)
+         device->ws->ops->syncobj_destroy(semaphore->syncobj);
+      semaphore->syncobj = dup_signal_fence;
+   }
+
+err_destroy_syncobj:
+   if (syncobj)
+      device->ws->ops->syncobj_destroy(syncobj);
+
+   return result;
+}
+
+static VkResult pvr_set_fence_payload(
+   struct pvr_device *device,
+   struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX],
+   VkFence _fence)
+{
+   PVR_FROM_HANDLE(pvr_fence, fence, _fence);
+   struct pvr_winsys_syncobj *syncobj = NULL;
+
+   for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
+      if (completions[i]) {
+         VkResult result =
+            device->ws->ops->syncobjs_merge(completions[i], syncobj, &syncobj);
+         if (result != VK_SUCCESS) {
+            device->ws->ops->syncobj_destroy(syncobj);
+            return result;
+         }
+      }
+   }
+
+   if (fence->syncobj)
+      device->ws->ops->syncobj_destroy(fence->syncobj);
+   fence->syncobj = syncobj;
+
+   return VK_SUCCESS;
+}
+
+static VkResult pvr_process_cmd_buffer(
+   struct pvr_device *device,
+   struct pvr_queue *queue,
+   VkCommandBuffer commandBuffer,
+   const VkSemaphore *semaphores,
+   uint32_t semaphore_count,
+   uint32_t *stage_flags,
+   struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
+{
+   PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
+   VkResult result;
+
+   assert(cmd_buffer->status == PVR_CMD_BUFFER_STATUS_EXECUTABLE);
+
+   list_for_each_entry_safe (struct pvr_sub_cmd,
+                             sub_cmd,
+                             &cmd_buffer->sub_cmds,
+                             link) {
+      switch (sub_cmd->type) {
+      case PVR_SUB_CMD_TYPE_GRAPHICS:
+         result = pvr_process_graphics_cmd(device,
+                                           queue,
+                                           cmd_buffer,
+                                           sub_cmd,
+                                           semaphores,
+                                           semaphore_count,
+                                           stage_flags,
+                                           completions);
+         break;
+
+      case PVR_SUB_CMD_TYPE_COMPUTE:
+         result = pvr_process_compute_cmd(device,
+                                          queue,
+                                          sub_cmd,
+                                          semaphores,
+                                          semaphore_count,
+                                          stage_flags,
+                                          completions);
+         break;
+
+      case PVR_SUB_CMD_TYPE_TRANSFER:
+         result = pvr_process_transfer_cmds(device,
+                                            sub_cmd,
+                                            semaphores,
+                                            semaphore_count,
+                                            stage_flags,
+                                            completions);
+         break;
+
+      default:
+         pvr_finishme("Unsupported sub-command type %d", sub_cmd->type);
+         return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
+      }
+
+      if (result != VK_SUCCESS) {
+         cmd_buffer->status = PVR_CMD_BUFFER_STATUS_INVALID;
+         return result;
+      }
+
+      p_atomic_inc(&device->global_queue_job_count);
+   }
+
+   return VK_SUCCESS;
+}
+
+static VkResult pvr_process_empty_job(
+   struct pvr_device *device,
+   const VkSemaphore *semaphores,
+   uint32_t semaphore_count,
+   uint32_t *stage_flags,
+   struct pvr_winsys_syncobj *completions[static PVR_JOB_TYPE_MAX])
+{
+   for (uint32_t i = 0; i < semaphore_count; i++) {
+      PVR_FROM_HANDLE(pvr_semaphore, semaphore, semaphores[i]);
+
+      if (!semaphore->syncobj)
+         continue;
+
+      for (uint32_t j = 0; j < PVR_NUM_SYNC_PIPELINE_STAGES; j++) {
+         if (stage_flags[i] & (1U << j)) {
+            VkResult result =
+               device->ws->ops->syncobjs_merge(semaphore->syncobj,
+                                               completions[j],
+                                               &completions[j]);
+            if (result != VK_SUCCESS)
+               return result;
+         }
+      }
+
+      device->ws->ops->syncobj_destroy(semaphore->syncobj);
+      semaphore->syncobj = NULL;
+   }
+
+   return VK_SUCCESS;
+}
+
+static void
+pvr_update_syncobjs(struct pvr_device *device,
+                    struct pvr_winsys_syncobj *src[static PVR_JOB_TYPE_MAX],
+                    struct pvr_winsys_syncobj *dst[static PVR_JOB_TYPE_MAX])
+{
+   for (uint32_t i = 0; i < PVR_JOB_TYPE_MAX; i++) {
+      if (src[i]) {
+         if (dst[i])
+            device->ws->ops->syncobj_destroy(dst[i]);
+
+         dst[i] = src[i];
+      }
+   }
+}
+
+VkResult pvr_QueueSubmit(VkQueue _queue,
+                         uint32_t submitCount,
+                         const VkSubmitInfo *pSubmits,
+                         VkFence fence)
+{
+   PVR_FROM_HANDLE(pvr_queue, queue, _queue);
+   struct pvr_winsys_syncobj *completion_syncobjs[PVR_JOB_TYPE_MAX] = {};
+   struct pvr_device *device = queue->device;
+   VkResult result;
+
+   for (uint32_t i = 0; i < submitCount; i++) {
+      struct pvr_winsys_syncobj
+         *per_submit_completion_syncobjs[PVR_JOB_TYPE_MAX] = {};
+      const VkSubmitInfo *desc = &pSubmits[i];
+      uint32_t stage_flags[desc->waitSemaphoreCount];
+
+      for (uint32_t j = 0; j < desc->waitSemaphoreCount; j++)
+         stage_flags[j] = pvr_convert_stage_mask(desc->pWaitDstStageMask[j]);
+
+      if (desc->commandBufferCount > 0U) {
+         for (uint32_t j = 0U; j < desc->commandBufferCount; j++) {
+            result = pvr_process_cmd_buffer(device,
+                                            queue,
+                                            desc->pCommandBuffers[j],
+                                            desc->pWaitSemaphores,
+                                            desc->waitSemaphoreCount,
+                                            stage_flags,
+                                            per_submit_completion_syncobjs);
+            if (result != VK_SUCCESS)
+               return result;
+         }
+      } else {
+         result = pvr_process_empty_job(device,
+                                        desc->pWaitSemaphores,
+                                        desc->waitSemaphoreCount,
+                                        stage_flags,
+                                        per_submit_completion_syncobjs);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+
+      if (desc->signalSemaphoreCount) {
+         result = pvr_set_semaphore_payloads(device,
+                                             per_submit_completion_syncobjs,
+                                             desc->pSignalSemaphores,
+                                             desc->signalSemaphoreCount);
+         if (result != VK_SUCCESS)
+            return result;
+      }
+
+      pvr_update_syncobjs(device,
+                          per_submit_completion_syncobjs,
+                          completion_syncobjs);
+   }
+
+   if (fence) {
+      result = pvr_set_fence_payload(device, completion_syncobjs, fence);
+      if (result != VK_SUCCESS)
+         return result;
+   }
+
+   pvr_update_syncobjs(device, completion_syncobjs, queue->completion);
+
+   return VK_SUCCESS;
+}
diff --git a/src/imagination/vulkan/pvr_shader.c b/src/imagination/vulkan/pvr_shader.c
new file mode 100644 (file)
index 0000000..a0063d8
--- /dev/null
@@ -0,0 +1,104 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <vulkan/vulkan.h>
+
+#include "compiler/shader_enums.h"
+#include "nir/nir.h"
+#include "nir/nir_builder.h"
+#include "pvr_private.h"
+#include "pvr_shader.h"
+#include "rogue/rogue.h"
+#include "rogue/rogue_shader.h"
+#include "spirv/nir_spirv.h"
+#include "vk_format.h"
+#include "vk_shader_module.h"
+#include "vk_util.h"
+
+/**
+ * \file pvr_shader.c
+ *
+ * \brief Contains top-level functions to compile SPIR-V -> NIR -> Rogue, and
+ * interfaces with the compiler.
+ */
+
+/**
+ * \brief Converts a SPIR-V shader to NIR.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] stage Shader stage.
+ * \param[in] create_info Shader creation info from Vulkan pipeline.
+ * \return A nir_shader* if successful, or NULL if unsuccessful.
+ */
+nir_shader *pvr_spirv_to_nir(struct rogue_build_ctx *ctx,
+                             gl_shader_stage stage,
+                             const VkPipelineShaderStageCreateInfo *create_info)
+{
+   VK_FROM_HANDLE(vk_shader_module, module, create_info->module);
+   struct nir_spirv_specialization *spec;
+   unsigned num_spec = 0;
+   nir_shader *nir;
+
+   spec =
+      vk_spec_info_to_nir_spirv(create_info->pSpecializationInfo, &num_spec);
+
+   nir = rogue_spirv_to_nir(ctx,
+                            stage,
+                            create_info->pName,
+                            module->size / sizeof(uint32_t),
+                            (uint32_t *)module->data,
+                            num_spec,
+                            spec);
+
+   free(spec);
+
+   return nir;
+}
+
+/**
+ * \brief Converts a NIR shader to Rogue.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] nir NIR shader.
+ * \return A rogue_shader* if successful, or NULL if unsuccessful.
+ */
+struct rogue_shader *pvr_nir_to_rogue(struct rogue_build_ctx *ctx,
+                                      nir_shader *nir)
+{
+   return rogue_nir_to_rogue(ctx, nir);
+}
+
+/**
+ * \brief Converts a Rogue shader to binary.
+ *
+ * \param[in] ctx Shared multi-stage build context.
+ * \param[in] shader Rogue shader.
+ * \return A rogue_shader_binary* if successful, or NULL if unsuccessful.
+ */
+struct rogue_shader_binary *pvr_rogue_to_binary(struct rogue_build_ctx *ctx,
+                                                struct rogue_shader *shader)
+{
+   return rogue_to_binary(ctx, shader);
+}
diff --git a/src/imagination/vulkan/pvr_shader.h b/src/imagination/vulkan/pvr_shader.h
new file mode 100644 (file)
index 0000000..c522654
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SHADER_H
+#define PVR_SHADER_H
+
+#include <stddef.h>
+
+#include "compiler/shader_enums.h"
+#include "nir/nir.h"
+#include "vulkan/vulkan.h"
+
+struct rogue_build_ctx;
+struct rogue_compiler;
+struct rogue_shader;
+
+nir_shader *
+pvr_spirv_to_nir(struct rogue_build_ctx *ctx,
+                 gl_shader_stage stage,
+                 const VkPipelineShaderStageCreateInfo *create_info);
+
+struct rogue_shader *pvr_nir_to_rogue(struct rogue_build_ctx *ctx,
+                                      nir_shader *nir);
+
+struct rogue_shader_binary *pvr_rogue_to_binary(struct rogue_build_ctx *ctx,
+                                                struct rogue_shader *rogue);
+
+#endif /* PVR_SHADER_H */
diff --git a/src/imagination/vulkan/pvr_tex_state.c b/src/imagination/vulkan/pvr_tex_state.c
new file mode 100644 (file)
index 0000000..4ad37ee
--- /dev/null
@@ -0,0 +1,208 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "pvr_csb.h"
+#include "pvr_device_info.h"
+#include "pvr_formats.h"
+#include "pvr_private.h"
+#include "pvr_tex_state.h"
+#include "util/macros.h"
+#include "util/u_math.h"
+#include "vk_format.h"
+#include "vk_log.h"
+
+static enum ROGUE_TEXSTATE_SWIZ pvr_get_hw_swizzle(VkComponentSwizzle comp,
+                                                   enum pipe_swizzle swz)
+{
+   switch (swz) {
+   case PIPE_SWIZZLE_0:
+      return ROGUE_TEXSTATE_SWIZ_SRC_ZERO;
+   case PIPE_SWIZZLE_1:
+      return ROGUE_TEXSTATE_SWIZ_SRC_ONE;
+   case PIPE_SWIZZLE_X:
+      return ROGUE_TEXSTATE_SWIZ_SRCCHAN_0;
+   case PIPE_SWIZZLE_Y:
+      return ROGUE_TEXSTATE_SWIZ_SRCCHAN_1;
+   case PIPE_SWIZZLE_Z:
+      return ROGUE_TEXSTATE_SWIZ_SRCCHAN_2;
+   case PIPE_SWIZZLE_W:
+      return ROGUE_TEXSTATE_SWIZ_SRCCHAN_3;
+   case PIPE_SWIZZLE_NONE:
+      if (comp == VK_COMPONENT_SWIZZLE_A)
+         return ROGUE_TEXSTATE_SWIZ_SRC_ONE;
+      else
+         return ROGUE_TEXSTATE_SWIZ_SRC_ZERO;
+   default:
+      unreachable("Unknown enum pipe_swizzle");
+   };
+}
+
+VkResult
+pvr_pack_tex_state(struct pvr_device *device,
+                   struct pvr_texture_state_info *info,
+                   uint64_t state[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS])
+{
+   const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
+   uint32_t texture_type;
+
+   pvr_csb_pack (&state[0], TEXSTATE_IMAGE_WORD0, word0) {
+      /* Determine texture type */
+      if (info->is_cube && info->tex_state_type == PVR_TEXTURE_STATE_SAMPLE) {
+         word0.textype = texture_type = PVRX(TEXSTATE_TEXTYPE_CUBE);
+      } else if (info->mem_layout == PVR_MEMLAYOUT_TWIDDLED ||
+                 info->mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
+         if (info->type == VK_IMAGE_VIEW_TYPE_3D) {
+            word0.textype = texture_type = PVRX(TEXSTATE_TEXTYPE_3D);
+         } else if (info->type == VK_IMAGE_VIEW_TYPE_1D ||
+                    info->type == VK_IMAGE_VIEW_TYPE_1D_ARRAY) {
+            word0.textype = texture_type = PVRX(TEXSTATE_TEXTYPE_1D);
+         } else if (info->type == VK_IMAGE_VIEW_TYPE_2D ||
+                    info->type == VK_IMAGE_VIEW_TYPE_2D_ARRAY) {
+            word0.textype = texture_type = PVRX(TEXSTATE_TEXTYPE_2D);
+         } else {
+            return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED);
+         }
+      } else if (info->mem_layout == PVR_MEMLAYOUT_LINEAR) {
+         word0.textype = texture_type = PVRX(TEXSTATE_TEXTYPE_STRIDE);
+      } else {
+         return vk_error(device, VK_ERROR_FORMAT_NOT_SUPPORTED);
+      }
+
+      word0.texformat = pvr_get_tex_format(info->format);
+      word0.smpcnt = util_logbase2(info->sample_count);
+      word0.swiz0 =
+         pvr_get_hw_swizzle(VK_COMPONENT_SWIZZLE_R, info->swizzle[0]);
+      word0.swiz1 =
+         pvr_get_hw_swizzle(VK_COMPONENT_SWIZZLE_G, info->swizzle[1]);
+      word0.swiz2 =
+         pvr_get_hw_swizzle(VK_COMPONENT_SWIZZLE_B, info->swizzle[2]);
+      word0.swiz3 =
+         pvr_get_hw_swizzle(VK_COMPONENT_SWIZZLE_A, info->swizzle[3]);
+
+      /* Gamma */
+      if (vk_format_is_srgb(info->format)) {
+         /* Gamma for 2 Component Formats has to be handled differently. */
+         if (vk_format_get_nr_components(info->format) == 2) {
+            /* Enable Gamma only for Channel 0 if Channel 1 is an Alpha
+             * Channel.
+             */
+            if (vk_format_has_alpha(info->format)) {
+               word0.twocomp_gamma = PVRX(TEXSTATE_TWOCOMP_GAMMA_R);
+            } else {
+               /* Otherwise Enable Gamma for both the Channels. */
+               word0.twocomp_gamma = PVRX(TEXSTATE_TWOCOMP_GAMMA_RG);
+
+               /* If Channel 0 happens to be the Alpha Channel, the
+                * ALPHA_MSB bit would not be set thereby disabling Gamma
+                * for Channel 0.
+                */
+            }
+         } else {
+            word0.gamma = PVRX(TEXSTATE_GAMMA_ON);
+         }
+      }
+
+      word0.width = info->extent.width - 1;
+      if (info->type != VK_IMAGE_VIEW_TYPE_1D ||
+          info->type != VK_IMAGE_VIEW_TYPE_1D_ARRAY)
+         word0.height = info->extent.height - 1;
+   }
+
+   /* Texture type specific stuff (word 1) */
+   if (texture_type == PVRX(TEXSTATE_TEXTYPE_STRIDE)) {
+      pvr_csb_pack (&state[1], TEXSTATE_STRIDE_IMAGE_WORD1, word1) {
+         word1.stride = info->stride;
+         word1.num_mip_levels = info->mip_levels;
+         word1.mipmaps_present = info->mipmaps_present;
+
+         word1.texaddr = info->addr;
+         word1.texaddr.addr += info->offset;
+
+         if (vk_format_is_alpha_on_msb(info->format))
+            word1.alpha_msb = true;
+
+         if (!PVR_HAS_FEATURE(dev_info, tpu_extended_integer_lookup) &&
+             !PVR_HAS_FEATURE(dev_info, tpu_image_state_v2)) {
+            if (info->flags & PVR_TEXFLAGS_INDEX_LOOKUP ||
+                info->flags & PVR_TEXFLAGS_BUFFER)
+               word1.index_lookup = true;
+         }
+
+         if (info->flags & PVR_TEXFLAGS_BUFFER)
+            word1.mipmaps_present = false;
+
+         if (PVR_HAS_FEATURE(dev_info, tpu_image_state_v2) &&
+             vk_format_is_compressed(info->format))
+            word1.tpu_image_state_v2_compression_mode =
+               PVRX(TEXSTATE_COMPRESSION_MODE_TPU);
+      }
+   } else {
+      pvr_csb_pack (&state[1], TEXSTATE_IMAGE_WORD1, word1) {
+         word1.num_mip_levels = info->mip_levels;
+         word1.mipmaps_present = info->mipmaps_present;
+         word1.baselevel = info->base_level;
+
+         if (info->extent.depth > 0) {
+            word1.depth = info->extent.depth - 1;
+         } else if (PVR_HAS_FEATURE(dev_info, tpu_array_textures)) {
+            uint32_t array_layers = info->array_size;
+
+            if (info->type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY &&
+                info->tex_state_type == PVR_TEXTURE_STATE_SAMPLE)
+               array_layers /= 6;
+
+            word1.depth = array_layers - 1;
+         }
+
+         word1.texaddr = info->addr;
+         word1.texaddr.addr += info->offset;
+
+         if (!PVR_HAS_FEATURE(dev_info, tpu_extended_integer_lookup) &&
+             !PVR_HAS_FEATURE(dev_info, tpu_image_state_v2)) {
+            if (info->flags & PVR_TEXFLAGS_INDEX_LOOKUP ||
+                info->flags & PVR_TEXFLAGS_BUFFER)
+               word1.index_lookup = true;
+         }
+
+         if (info->flags & PVR_TEXFLAGS_BUFFER)
+            word1.mipmaps_present = false;
+
+         if (info->flags & PVR_TEXFLAGS_BORDER)
+            word1.border = true;
+
+         if (vk_format_is_alpha_on_msb(info->format))
+            word1.alpha_msb = true;
+
+         if (PVR_HAS_FEATURE(dev_info, tpu_image_state_v2) &&
+             vk_format_is_compressed(info->format))
+            word1.tpu_image_state_v2_compression_mode =
+               PVRX(TEXSTATE_COMPRESSION_MODE_TPU);
+      }
+   }
+
+   return VK_SUCCESS;
+}
diff --git a/src/imagination/vulkan/pvr_tex_state.h b/src/imagination/vulkan/pvr_tex_state.h
new file mode 100644 (file)
index 0000000..f6f18d9
--- /dev/null
@@ -0,0 +1,111 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_TEX_STATE_H
+#define PVR_TEX_STATE_H
+
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "pvr_private.h"
+#include "util/macros.h"
+
+/**
+ * Texture requires 32bit index lookups instead of texture coordinate access.
+ */
+#define PVR_TEXFLAGS_INDEX_LOOKUP BITFIELD_BIT(0U)
+
+/** Texture has border texels present. */
+#define PVR_TEXFLAGS_BORDER BITFIELD_BIT(1U)
+
+/**
+ * Resource is actually a buffer, not a texture, and therefore LOD is ignored.
+ * Coordinates are integers.
+ */
+#define PVR_TEXFLAGS_BUFFER BITFIELD_BIT(2U)
+
+/** Parameters for #pvr_pack_tex_state(). */
+struct pvr_texture_state_info {
+   VkFormat format;
+   enum pvr_memlayout mem_layout;
+   uint32_t flags;
+   VkImageViewType type;
+   bool is_cube;
+   enum pvr_texture_state tex_state_type;
+   VkExtent3D extent;
+
+   /**
+    * For array textures, this holds the array dimension, in elements. This can
+    * be zero if texture is not an array.
+    */
+   uint32_t array_size;
+
+   /** Base mipmap level. This is the miplevel you want as the top level. */
+   uint32_t base_level;
+
+   /**
+    * Number of mipmap levels that should be accessed by HW. This is not
+    * necessarily the number of levels that are in memory. (See
+    * mipmaps_present)
+    */
+   uint32_t mip_levels;
+
+   /**
+    * True if the texture is mipmapped.
+    * Note: This is based on the number of mip levels the texture contains, not
+    * on the mip levels that are being used i.e. mip_levels.
+    */
+   bool mipmaps_present;
+
+   /**
+    * Number of samples per texel for multisampling. This should be 1 for none
+    * multisampled textures.
+    */
+   uint32_t sample_count;
+
+   /** Stride, in pixels. Only valid if mem_layout is stride or tiled. */
+   uint32_t stride;
+
+   /**
+    * For buffers, where TPU_BUFFER_LOOKUP is present, this defines
+    * the offset for the buffer, in texels.
+    */
+   uint32_t offset;
+
+   /**
+    * Precomputed (composed from createinfo->components and format swizzle)
+    * swizzles to pass in to the texture state.
+    */
+   uint8_t swizzle[4];
+
+   /** Address of texture, which must be aligned to at least 32bits. */
+   pvr_dev_addr_t addr;
+};
+
+VkResult
+pvr_pack_tex_state(struct pvr_device *device,
+                   struct pvr_texture_state_info *info,
+                   uint64_t state[static const ROGUE_NUM_TEXSTATE_IMAGE_WORDS]);
+
+#endif /* PVR_TEX_STATE_H */
diff --git a/src/imagination/vulkan/pvr_wsi.c b/src/imagination/vulkan/pvr_wsi.c
new file mode 100644 (file)
index 0000000..aadf186
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based on intel anv code:
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+#include "util/u_atomic.h"
+#include "wsi_common.h"
+
+static PFN_vkVoidFunction pvr_wsi_proc_addr(VkPhysicalDevice physicalDevice,
+                                            const char *pName)
+{
+   PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
+
+   return vk_instance_get_proc_addr_unchecked(&pdevice->instance->vk, pName);
+}
+
+VkResult pvr_wsi_init(struct pvr_physical_device *pdevice)
+{
+   VkResult result;
+
+   result = wsi_device_init(&pdevice->wsi_device,
+                            pvr_physical_device_to_handle(pdevice),
+                            pvr_wsi_proc_addr,
+                            &pdevice->vk.instance->alloc,
+                            pdevice->master_fd,
+                            NULL,
+                            false);
+   if (result != VK_SUCCESS)
+      return result;
+
+   pdevice->wsi_device.supports_modifiers = true;
+   pdevice->vk.wsi_device = &pdevice->wsi_device;
+
+   return VK_SUCCESS;
+}
+
+void pvr_wsi_finish(struct pvr_physical_device *pdevice)
+{
+   pdevice->vk.wsi_device = NULL;
+   wsi_device_finish(&pdevice->wsi_device, &pdevice->vk.instance->alloc);
+}
+
+VkResult pvr_QueuePresentKHR(VkQueue _queue,
+                             const VkPresentInfoKHR *pPresentInfo)
+{
+   PVR_FROM_HANDLE(pvr_queue, queue, _queue);
+   VkResult result;
+
+   result = wsi_common_queue_present(&queue->device->pdevice->wsi_device,
+                                     pvr_device_to_handle(queue->device),
+                                     _queue,
+                                     0,
+                                     pPresentInfo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   p_atomic_inc(&queue->device->global_queue_present_count);
+
+   return VK_SUCCESS;
+}
+
+VkResult pvr_AcquireNextImage2KHR(VkDevice _device,
+                                  const VkAcquireNextImageInfoKHR *pAcquireInfo,
+                                  uint32_t *pImageIndex)
+{
+   PVR_FROM_HANDLE(pvr_device, device, _device);
+   struct pvr_winsys_syncobj *handles[2];
+   uint32_t count = 0U;
+   VkResult result;
+   VkResult ret;
+
+   result = wsi_common_acquire_next_image2(&device->pdevice->wsi_device,
+                                           _device,
+                                           pAcquireInfo,
+                                           pImageIndex);
+   if (result != VK_SUCCESS && result != VK_SUBOPTIMAL_KHR)
+      return result;
+
+   if (pAcquireInfo->fence) {
+      PVR_FROM_HANDLE(pvr_fence, fence, pAcquireInfo->fence);
+      handles[count++] = fence->syncobj;
+   }
+
+   if (pAcquireInfo->semaphore) {
+      PVR_FROM_HANDLE(pvr_semaphore, semaphore, pAcquireInfo->semaphore);
+      handles[count++] = semaphore->syncobj;
+   }
+
+   if (count == 0U)
+      return result;
+
+   /* We need to preserve VK_SUBOPTIMAL_KHR status. */
+   ret = device->ws->ops->syncobjs_signal(device->ws, handles, count);
+   if (ret != VK_SUCCESS)
+      return ret;
+
+   return result;
+}
diff --git a/src/imagination/vulkan/usc/programs/pvr_cdm_load_sr.h b/src/imagination/vulkan/usc/programs/pvr_cdm_load_sr.h
new file mode 100644 (file)
index 0000000..99b3625
--- /dev/null
@@ -0,0 +1,120 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_CDM_LOAD_SR_H
+#define PVR_CDM_LOAD_SR_H
+
+#include <stdint.h>
+
+/* clang-format off */
+static const uint8_t pvr_cdm_load_sr_code[] = {
+       0x25, 0x02, 0x87, 0x81,
+       0x04, 0x00, 0x00, 0x00,
+       0x84, 0x04, 0x25, 0x02,
+       0x87, 0x80, 0x04, 0x00,
+       0x00, 0x00, 0x85, 0x04,
+       0x25, 0x02, 0x87, 0x83,
+       0x04, 0x00, 0x00, 0x00,
+       0x86, 0x04, 0x25, 0x02,
+       0x87, 0x82, 0x04, 0x00,
+       0x00, 0x00, 0x87, 0x04,
+       0x56, 0x20, 0xF1, 0x85,
+       0x02, 0x80, 0x81, 0xD0,
+       0xC4, 0x08, 0x00, 0xFF,
+       0x02, 0x80, 0x6A, 0xFF,
+       0x67, 0xF0, 0x40, 0x20,
+       0x41, 0x8C, 0x80, 0x40,
+       0x00, 0x50, 0x8F, 0xC0,
+       0x80, 0x02, 0x04, 0x81,
+       0x60, 0x00, 0x0A, 0x01,
+       0x00, 0x00, 0x25, 0x36,
+       0x87, 0x87, 0x00, 0x00,
+       0x40, 0x05, 0xD1, 0x06,
+       0x55, 0x20, 0xF1, 0x81,
+       0x02, 0x00, 0xC0, 0xC6,
+       0x08, 0x00, 0x02, 0x80,
+       0x6A, 0xFF, 0x46, 0x42,
+       0xD0, 0x03, 0xEA, 0xD1,
+       0x41, 0x00, 0x01, 0x00,
+       0x00, 0x51, 0x27, 0x06,
+       0xEB, 0x84, 0x50, 0x20,
+       0x86, 0x87, 0x04, 0x00,
+       0xC0, 0x06, 0x87, 0x22,
+       0x25, 0x32, 0x87, 0x87,
+       0x00, 0x1F, 0x40, 0xC5,
+       0x0C, 0xFF, 0x25, 0x02,
+       0x87, 0xC0, 0x0C, 0x00,
+       0x00, 0x00, 0x83, 0x0C,
+       0x47, 0x42, 0xD0, 0x03,
+       0xEA, 0x85, 0x41, 0x90,
+       0x01, 0x08, 0x00, 0x00,
+       0x85, 0x0C, 0x47, 0x42,
+       0xD0, 0x03, 0xEA, 0xC5,
+       0x41, 0x90, 0x01, 0x08,
+       0x00, 0x00, 0xC5, 0x0C,
+       0x67, 0xF0, 0x40, 0x28,
+       0x42, 0x8C, 0x80, 0x40,
+       0x80, 0xC5, 0x80, 0x90,
+       0x80, 0xFF, 0x04, 0x81,
+       0x60, 0x00, 0xCC, 0xFF,
+       0xFF, 0xFF, 0x66, 0xF0,
+       0x40, 0x28, 0x42, 0x8C,
+       0x80, 0x40, 0x00, 0x51,
+       0xD0, 0x80, 0x04, 0x81,
+       0x60, 0x00, 0x86, 0xFF,
+       0xFF, 0xFF, 0x46, 0x40,
+       0xF1, 0xB0, 0xE2, 0x81,
+       0x4D, 0x01, 0x00, 0x00,
+       0x00, 0xFF, 0x04, 0x81,
+       0x60, 0x00, 0x32, 0x00,
+       0x00, 0x00, 0x02, 0x80,
+       0x6C, 0xC4, 0x45, 0x12,
+       0xD3, 0x3F, 0x01, 0x00,
+       0x00, 0xAB, 0x01, 0xFF,
+       0x44, 0x12, 0xD3, 0x3F,
+       0x00, 0x00, 0x00, 0x40,
+       0x44, 0x10, 0xD3, 0x3F,
+       0x40, 0x00, 0x00, 0xFF,
+       0x02, 0x80, 0x6C, 0x84,
+       0x04, 0x80, 0x60, 0x00,
+       0x40, 0x00, 0x00, 0x00,
+       0x46, 0x50, 0xFB, 0xB0,
+       0x87, 0xE2, 0x81, 0x4B,
+       0x03, 0x00, 0x00, 0x00,
+       0x04, 0x81, 0x61, 0x00,
+       0xF4, 0xFF, 0xFF, 0xFF,
+       0x02, 0x80, 0x6C, 0xC4,
+       0x45, 0x12, 0xD3, 0x3F,
+       0x01, 0x00, 0x00, 0xAA,
+       0x01, 0xFF, 0x44, 0x12,
+       0xD3, 0x3F, 0x00, 0x00,
+       0x00, 0x40, 0x44, 0x10,
+       0xD3, 0x3F, 0x40, 0x00,
+       0x00, 0xFF, 0x03, 0x80,
+       0x6C, 0x84, 0xF1, 0xFF,
+       0x04, 0x80, 0xEE, 0x00,
+       0xF2, 0xFF, 0xFF, 0xFF
+};
+/* clang-format on */
+
+#endif /* PVR_CDM_LOAD_SR_H */
diff --git a/src/imagination/vulkan/usc/programs/pvr_end_of_tile.h b/src/imagination/vulkan/usc/programs/pvr_end_of_tile.h
new file mode 100644 (file)
index 0000000..2ec962c
--- /dev/null
@@ -0,0 +1,46 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_END_OF_TILE_H
+#define PVR_END_OF_TILE_H
+
+#include <stdint.h>
+
+/* clang-format off */
+static const uint8_t pvr_end_of_tile_program[] = {
+   0xa9, 0xf2, 0x40, 0x00,
+   0x47, 0x91, 0x00, 0x50,
+   0x04, 0x00, 0x80, 0x40,
+   0x00, 0x00, 0x80, 0x80,
+   0x24, 0xff, 0xa9, 0xf2,
+   0x40, 0x00, 0x47, 0x91,
+   0x20, 0x20, 0x08, 0x00,
+   0x80, 0x40, 0x00, 0x00,
+   0x80, 0x80, 0x25, 0xff,
+   0x45, 0xa0, 0x80, 0xc2,
+   0xa4, 0x40, 0x00, 0x25,
+   0x00, 0x00
+};
+/* clang-format on */
+
+#endif /* PVR_END_OF_TILE_H */
diff --git a/src/imagination/vulkan/usc/programs/pvr_usc_compute_shader.h b/src/imagination/vulkan/usc/programs/pvr_usc_compute_shader.h
new file mode 100644 (file)
index 0000000..0d59c75
--- /dev/null
@@ -0,0 +1,102 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Auto-generated file - don't edit */
+
+#ifndef PVR_USC_COMPUTE_SHADER_H
+#define PVR_USC_COMPUTE_SHADER_H
+
+#include <stdint.h>
+
+/* clang-format off */
+uint8_t pvr_usc_compute_shader[] = {
+   0x44, 0x12, 0xd3, 0x3f,
+   0x00, 0x00, 0x00, 0x24,
+   0x46, 0x40, 0xf9, 0xb0,
+   0x87, 0x80, 0x40, 0xa0,
+   0x2a, 0x30, 0x00, 0x02,
+   0x04, 0x81, 0x61, 0x00,
+   0x54, 0x00, 0x00, 0x00,
+   0x02, 0x80, 0x6c, 0xc4,
+   0x46, 0x40, 0xf9, 0xb0,
+   0x87, 0x81, 0x40, 0xa0,
+   0x2d, 0x10, 0x00, 0x02,
+   0x46, 0x12, 0xd3, 0x3f,
+   0x80, 0xca, 0x83, 0x10,
+   0x00, 0x00, 0x25, 0xff,
+   0x46, 0x13, 0xd3, 0x3f,
+   0x80, 0xcb, 0x83, 0x10,
+   0x00, 0x00, 0x25, 0xff,
+   0x46, 0x40, 0xf9, 0xb0,
+   0x87, 0x80, 0x40, 0xa0,
+   0x25, 0x10, 0x00, 0x02,
+   0x04, 0x81, 0x61, 0x00,
+   0x14, 0x00, 0x00, 0x00,
+   0x02, 0x80, 0x6c, 0x44,
+   0x04, 0x80, 0x60, 0x00,
+   0xc0, 0xff, 0xff, 0xff,
+   0x02, 0x80, 0x6c, 0x04,
+   0x89, 0x52, 0xdf, 0x3c,
+   0xfc, 0xa0, 0x9c, 0x1e,
+   0x87, 0x87, 0x80, 0xcf,
+   0x90, 0x11, 0x01, 0xa0,
+   0x25, 0xff, 0x46, 0x40,
+   0xff, 0xd0, 0x87, 0xa5,
+   0x40, 0xa0, 0x00, 0x10,
+   0x00, 0x02, 0x44, 0x82,
+   0x67, 0x38, 0x24, 0x00,
+   0x24, 0xff, 0x04, 0x80,
+   0x60, 0x04, 0x68, 0x00,
+   0x00, 0x00, 0x45, 0x12,
+   0xd3, 0x3f, 0xc0, 0x04,
+   0x00, 0x00, 0x00, 0x25,
+   0x27, 0x02, 0xeb, 0xa5,
+   0x44, 0xa0, 0x00, 0x80,
+   0x81, 0x08, 0x00, 0xc0,
+   0x80, 0x04, 0x27, 0x04,
+   0xeb, 0xa5, 0x44, 0xa0,
+   0x00, 0x80, 0x81, 0x08,
+   0x00, 0xc0, 0x81, 0x04,
+   0x55, 0x20, 0xf1, 0x84,
+   0x02, 0x00, 0x82, 0xc0,
+   0x18, 0x00, 0x02, 0x80,
+   0x6a, 0xff, 0x27, 0x02,
+   0xeb, 0xa5, 0x44, 0xa0,
+   0x02, 0x80, 0x83, 0x08,
+   0x00, 0xc0, 0x80, 0x04,
+   0x27, 0x04, 0xeb, 0xa5,
+   0x44, 0xa0, 0x02, 0x80,
+   0x83, 0x08, 0x00, 0xc0,
+   0x81, 0x04, 0x66, 0x20,
+   0xf2, 0x86, 0xb8, 0x28,
+   0x00, 0x82, 0xc0, 0x18,
+   0x00, 0xff, 0x02, 0x80,
+   0x6a, 0xff, 0x45, 0x82,
+   0x67, 0x24, 0x24, 0x00,
+   0x24, 0xff, 0xf1, 0xff,
+   0x04, 0x80, 0xee, 0x00,
+   0xf2, 0xff, 0xff, 0xff
+};
+/* clang-format on */
+
+#endif /* PVR_USC_COMPUTE_SHADER_H */
diff --git a/src/imagination/vulkan/usc/programs/pvr_usc_fragment_shader.h b/src/imagination/vulkan/usc/programs/pvr_usc_fragment_shader.h
new file mode 100644 (file)
index 0000000..838a155
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Auto-generated file - don't edit */
+
+#ifndef PVR_USC_FRAGMENT_SHADER_H
+#define PVR_USC_FRAGMENT_SHADER_H
+
+#include <stdint.h>
+
+/* clang-format off */
+static const uint8_t pvr_usc_fragment_shader[] = {
+   0x58, 0x9a, 0x80, 0xd3,
+   0x3f, 0x80, 0x08, 0x00,
+   0x00, 0x00, 0x20, 0xff,
+   0xf2, 0xff, 0xff, 0xff
+};
+/* clang-format on */
+
+#endif /* PVR_USC_FRAGMENT_SHADER_H */
diff --git a/src/imagination/vulkan/usc/programs/pvr_vdm_load_sr.h b/src/imagination/vulkan/usc/programs/pvr_vdm_load_sr.h
new file mode 100644 (file)
index 0000000..ab97096
--- /dev/null
@@ -0,0 +1,95 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Auto-generated file - don't edit */
+
+#ifndef PVR_VDM_LOAD_SR_H
+#define PVR_VDM_LOAD_SR_H
+
+#include <stdint.h>
+
+/* clang-format off */
+static const uint8_t pvr_vdm_load_sr_code[] =
+{
+   0x25, 0x02, 0x87, 0x81,
+   0x04, 0x00, 0x00, 0x00,
+   0x84, 0x04, 0x25, 0x02,
+   0x87, 0x80, 0x04, 0x00,
+   0x00, 0x00, 0x85, 0x04,
+   0x25, 0x02, 0x87, 0x83,
+   0x04, 0x00, 0x00, 0x00,
+   0x86, 0x04, 0x25, 0x02,
+   0x87, 0x82, 0x04, 0x00,
+   0x00, 0x00, 0x87, 0x04,
+   0x56, 0x20, 0xF1, 0x85,
+   0x02, 0x80, 0x81, 0xD0,
+   0xC4, 0x08, 0x00, 0xFF,
+   0x02, 0x80, 0x6A, 0xFF,
+   0x67, 0xF0, 0x40, 0x20,
+   0x41, 0x8C, 0x80, 0x40,
+   0x00, 0x50, 0x8F, 0xC0,
+   0x80, 0x02, 0x04, 0x81,
+   0x60, 0x00, 0x9A, 0x00,
+   0x00, 0x00, 0x25, 0x36,
+   0x87, 0x87, 0x00, 0x00,
+   0x40, 0x05, 0xD1, 0x06,
+   0x55, 0x20, 0xF1, 0x81,
+   0x02, 0x00, 0xC0, 0xC6,
+   0x08, 0x00, 0x02, 0x80,
+   0x6A, 0xFF, 0x46, 0x42,
+   0xD0, 0x03, 0xEA, 0xD1,
+   0x41, 0x00, 0x01, 0x00,
+   0x00, 0x51, 0x27, 0x06,
+   0xEB, 0x84, 0x50, 0x20,
+   0x86, 0x87, 0x04, 0x00,
+   0xC0, 0x06, 0x87, 0x22,
+   0x25, 0x32, 0x87, 0x87,
+   0x00, 0x1F, 0x40, 0xC5,
+   0x0C, 0xFF, 0x25, 0x02,
+   0x87, 0xC0, 0x0C, 0x00,
+   0x00, 0x00, 0x83, 0x0C,
+   0x47, 0x42, 0xD0, 0x03,
+   0xEA, 0x85, 0x41, 0x90,
+   0x01, 0x08, 0x00, 0x00,
+   0x85, 0x0C, 0x47, 0x42,
+   0xD0, 0x03, 0xEA, 0xC5,
+   0x41, 0x90, 0x01, 0x08,
+   0x00, 0x00, 0xC5, 0x0C,
+   0x67, 0xF0, 0x40, 0x28,
+   0x42, 0x8C, 0x80, 0x40,
+   0x80, 0xC5, 0x80, 0x90,
+   0x80, 0xFF, 0x04, 0x81,
+   0x60, 0x00, 0xCC, 0xFF,
+   0xFF, 0xFF, 0x66, 0xF0,
+   0x40, 0x28, 0x42, 0x8C,
+   0x80, 0x40, 0x00, 0x51,
+   0xD0, 0x80, 0x07, 0x81,
+   0x60, 0x00, 0x86, 0xFF,
+   0xFF, 0xFF, 0xF3, 0xFF,
+   0xFF, 0xFF, 0xFF, 0xFF,
+   0x04, 0x80, 0xEE, 0x00,
+   0xF2, 0xFF, 0xFF, 0xFF
+};
+/* clang-format on */
+
+#endif /* PVR_VDM_LOAD_SR_H */
diff --git a/src/imagination/vulkan/usc/programs/pvr_vdm_store_sr.h b/src/imagination/vulkan/usc/programs/pvr_vdm_store_sr.h
new file mode 100644 (file)
index 0000000..40aa57e
--- /dev/null
@@ -0,0 +1,117 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* Auto-generated file - don't edit */
+
+#ifndef PVR_VDM_STORE_SR_H
+#define PVR_VDM_STORE_SR_H
+
+#include <stdint.h>
+
+/* clang-format off */
+static const uint8_t pvr_vdm_store_sr_code[] =
+{
+   0x25, 0x02, 0x87, 0x81,
+   0x04, 0x00, 0x00, 0x00,
+   0x40, 0xFF, 0x25, 0x02,
+   0x87, 0x80, 0x04, 0x00,
+   0x00, 0x00, 0x41, 0xFF,
+   0x25, 0x02, 0x87, 0x83,
+   0x04, 0x00, 0x00, 0x00,
+   0x86, 0x04, 0x25, 0x02,
+   0x87, 0x82, 0x04, 0x00,
+   0x00, 0x00, 0x87, 0x04,
+   0x68, 0xF2, 0x40, 0x20,
+   0x41, 0x8C, 0x80, 0x40,
+   0x80, 0x27, 0x20, 0x8F,
+   0xC0, 0x80, 0x02, 0x42,
+   0x66, 0x20, 0xF2, 0x84,
+   0xB8, 0x28, 0x80, 0xA2,
+   0xC2, 0xA0, 0x00, 0xFF,
+   0x44, 0x20, 0xE0, 0x00,
+   0xC0, 0xA1, 0x00, 0x00,
+   0x02, 0x80, 0x6A, 0xFF,
+   0x04, 0x81, 0x60, 0x00,
+   0xE8, 0x00, 0x00, 0x00,
+   0x25, 0x36, 0x87, 0x87,
+   0x00, 0x00, 0x40, 0x05,
+   0xC3, 0x06, 0x25, 0x02,
+   0xE2, 0x90, 0x50, 0x00,
+   0x00, 0x00, 0x00, 0x45,
+   0x25, 0x02, 0xE2, 0xC5,
+   0x44, 0x00, 0x00, 0x00,
+   0x00, 0x45, 0x89, 0xF2,
+   0x40, 0x21, 0x49, 0x9C,
+   0xC0, 0x00, 0x80, 0x40,
+   0x00, 0x42, 0x8F, 0xC6,
+   0x80, 0x02, 0x44, 0xFF,
+   0x04, 0x81, 0x60, 0x00,
+   0x62, 0x00, 0x00, 0x00,
+   0x68, 0x20, 0xE2, 0x88,
+   0xB8, 0x28, 0x80, 0x64,
+   0x00, 0x00, 0x83, 0xC6,
+   0x98, 0x08, 0x00, 0xFF,
+   0x45, 0x20, 0xE0, 0x00,
+   0x86, 0xE1, 0x10, 0x00,
+   0x00, 0xFF, 0x02, 0x80,
+   0x6A, 0xFF, 0x47, 0x42,
+   0xD0, 0x03, 0xE2, 0xC5,
+   0x41, 0x80, 0x85, 0x80,
+   0x00, 0x00, 0x85, 0x0C,
+   0x46, 0x42, 0xD0, 0x03,
+   0xE2, 0xC3, 0x41, 0x00,
+   0x01, 0x00, 0x00, 0x43,
+   0x27, 0x06, 0xE3, 0x84,
+   0x65, 0x20, 0x86, 0x87,
+   0x04, 0x00, 0xC0, 0x06,
+   0x87, 0x22, 0x66, 0xF0,
+   0x40, 0x28, 0x42, 0x8C,
+   0x80, 0x40, 0x00, 0x43,
+   0xC4, 0x80, 0x04, 0x81,
+   0x60, 0x00, 0xAE, 0xFF,
+   0xFF, 0xFF, 0x46, 0x42,
+   0xD0, 0x03, 0xE2, 0x84,
+   0x4F, 0x00, 0x03, 0x00,
+   0x00, 0x46, 0x67, 0xF2,
+   0x40, 0x00, 0x41, 0x8C,
+   0x80, 0x40, 0x00, 0x42,
+   0xC6, 0x84, 0x42, 0xFF,
+   0x66, 0xF0, 0x40, 0x20,
+   0x41, 0x8C, 0x80, 0x40,
+   0x00, 0x42, 0xC2, 0x80,
+   0x04, 0x81, 0x60, 0x00,
+   0x28, 0x00, 0x00, 0x00,
+   0x68, 0x20, 0xE2, 0x88,
+   0xB8, 0x28, 0x80, 0x62,
+   0x00, 0x42, 0x83, 0xC6,
+   0x98, 0x08, 0x00, 0xFF,
+   0x45, 0x20, 0xE0, 0x00,
+   0x86, 0xE1, 0x10, 0x00,
+   0x00, 0xFF, 0x03, 0x80,
+   0x6A, 0xFF, 0xF1, 0xFF,
+   0x04, 0x80, 0xEE, 0x00,
+   0xF2, 0xFF, 0xFF, 0xFF
+};
+/* clang-format on */
+
+#endif /* PVR_VDM_STORE_SR_H */
diff --git a/src/imagination/vulkan/vk_format.h b/src/imagination/vulkan/vk_format.h
new file mode 100644 (file)
index 0000000..627dc86
--- /dev/null
@@ -0,0 +1,122 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * based in part on radv driver which is:
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Based on u_format.h which is:
+ * Copyright 2009-2010 VMware, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* clang-format off */
+#ifndef VK_FORMAT_H
+#define VK_FORMAT_H
+
+#include <util/format/u_format.h>
+#include <vulkan/util/vk_format.h>
+
+#include <vulkan/vulkan.h>
+
+#include "util/u_endian.h"
+
+static inline bool
+vk_format_is_alpha_on_msb(VkFormat vk_format)
+{
+   const struct util_format_description *desc =
+      vk_format_description(vk_format);
+
+   return (desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
+           desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) &&
+#if defined(UTIL_ARCH_BIG_ENDIAN)
+           desc->swizzle[3] == PIPE_SWIZZLE_X;
+#else
+           desc->swizzle[3] == PIPE_SWIZZLE_W;
+#endif
+}
+
+static inline boolean
+vk_format_has_alpha(VkFormat vk_format)
+{
+   return util_format_has_alpha(vk_format_to_pipe_format(vk_format));
+}
+
+static inline boolean
+vk_format_is_pure_integer(VkFormat vk_format)
+{
+   return util_format_is_pure_integer(vk_format_to_pipe_format(vk_format));
+}
+
+static inline uint
+vk_format_get_blocksizebits(VkFormat vk_format)
+{
+   return util_format_get_blocksizebits(vk_format_to_pipe_format(vk_format));
+}
+
+static inline uint
+vk_format_get_channel_width(VkFormat vk_format, uint32_t channel)
+{
+   const struct util_format_description *desc =
+      vk_format_description(vk_format);
+
+   return desc->channel[channel].size;
+}
+
+static inline boolean
+vk_format_has_32bit_component(VkFormat vk_format)
+{
+   const struct util_format_description *desc =
+      vk_format_description(vk_format);
+
+   for (uint32_t i = 0; i < desc->nr_channels; i++) {
+      if (desc->channel[i].size == 32U)
+         return true;
+   }
+
+   return false;
+}
+
+static inline uint
+vk_format_get_component_size_in_bits(VkFormat vk_format,
+                                     enum util_format_colorspace colorspace,
+                                     uint32_t component)
+{
+   return util_format_get_component_bits(vk_format_to_pipe_format(vk_format),
+                                         colorspace,
+                                         component);
+}
+
+static inline boolean
+vk_format_is_normalized(VkFormat vk_format)
+{
+   const struct util_format_description *desc =
+      vk_format_description(vk_format);
+
+   for (uint32_t i = 0; i < desc->nr_channels; i++) {
+      if (!desc->channel[i].normalized)
+         return false;
+   }
+
+   return true;
+}
+
+#endif /* VK_FORMAT_H */
diff --git a/src/imagination/vulkan/winsys/powervr/pvr_drm.c b/src/imagination/vulkan/winsys/powervr/pvr_drm.c
new file mode 100644 (file)
index 0000000..3138fb6
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <vulkan/vulkan.h>
+
+#include "pvr_drm_public.h"
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+
+struct pvr_winsys *pvr_drm_winsys_create(int master_fd,
+                                         int render_fd,
+                                         const VkAllocationCallbacks *alloc)
+{
+   pvr_finishme("Add implementation once powervr UAPI is stable.");
+
+   return NULL;
+}
diff --git a/src/imagination/vulkan/winsys/powervr/pvr_drm_public.h b/src/imagination/vulkan/winsys/powervr/pvr_drm_public.h
new file mode 100644 (file)
index 0000000..1326e03
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_DRM_PUBLIC_H
+#define PVR_DRM_PUBLIC_H
+
+#include <vulkan/vulkan.h>
+
+#include "pvr_winsys.h"
+
+struct pvr_winsys *pvr_drm_winsys_create(int master_fd,
+                                         int render_fd,
+                                         const VkAllocationCallbacks *alloc);
+
+#endif /* PVR_DRM_PUBLIC_H */
diff --git a/src/imagination/vulkan/winsys/pvr_winsys.c b/src/imagination/vulkan/winsys/pvr_winsys.c
new file mode 100644 (file)
index 0000000..94f9384
--- /dev/null
@@ -0,0 +1,78 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <vulkan/vulkan.h>
+#include <xf86drm.h>
+
+#include "powervr/pvr_drm_public.h"
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+#include "vk_log.h"
+
+#if defined(PVR_SUPPORT_SERVICES_DRIVER)
+#   include "pvrsrvkm/pvr_srv_public.h"
+#endif
+
+void pvr_winsys_destroy(struct pvr_winsys *ws)
+{
+   ws->ops->destroy(ws);
+}
+
+struct pvr_winsys *pvr_winsys_create(int master_fd,
+                                     int render_fd,
+                                     const VkAllocationCallbacks *alloc)
+{
+#if defined(PVR_SUPPORT_SERVICES_DRIVER)
+   drmVersionPtr version;
+   bool services_driver;
+
+   version = drmGetVersion(render_fd);
+   if (!version) {
+      vk_errorf(NULL,
+                VK_ERROR_INCOMPATIBLE_DRIVER,
+                "Failed to query kernel driver version for device.");
+      return NULL;
+   }
+
+   if (strcmp(version->name, "pvr") == 0) {
+      services_driver = true;
+   } else if (strcmp(version->name, "powervr") == 0) {
+      services_driver = false;
+   } else {
+      drmFreeVersion(version);
+      vk_errorf(
+         NULL,
+         VK_ERROR_INCOMPATIBLE_DRIVER,
+         "Device does not use any of the supported pvrsrvkm or powervr kernel driver.");
+      return NULL;
+   }
+
+   drmFreeVersion(version);
+
+   if (services_driver)
+      return pvr_srv_winsys_create(master_fd, render_fd, alloc);
+#endif
+
+   return pvr_drm_winsys_create(master_fd, render_fd, alloc);
+}
diff --git a/src/imagination/vulkan/winsys/pvr_winsys.h b/src/imagination/vulkan/winsys/pvr_winsys.h
new file mode 100644 (file)
index 0000000..a73ede5
--- /dev/null
@@ -0,0 +1,462 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Based on radv_radeon_winsys.h which is:
+ * Copyright © 2016 Red Hat.
+ * Copyright © 2016 Bas Nieuwenhuizen
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_WINSYS_H
+#define PVR_WINSYS_H
+
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "hwdef/rogue_hw_defs.h"
+#include "pvr_rogue_fw.h"
+#include "pvr_limits.h"
+#include "util/macros.h"
+#include "util/vma.h"
+
+struct pvr_device_info;
+
+/* device virtual address */
+typedef struct pvr_dev_addr {
+   uint64_t addr;
+} pvr_dev_addr_t;
+
+/* clang-format off */
+#define PVR_DEV_ADDR_INVALID (pvr_dev_addr_t){ .addr = 0 }
+/* clang-format on */
+
+struct pvr_winsys_heaps {
+   struct pvr_winsys_heap *general_heap;
+   struct pvr_winsys_heap *pds_heap;
+   struct pvr_winsys_heap *rgn_hdr_heap;
+   struct pvr_winsys_heap *usc_heap;
+};
+
+struct pvr_winsys_static_data_offsets {
+   uint64_t eot;
+   uint64_t fence;
+   uint64_t vdm_sync;
+   uint64_t yuv_csc;
+};
+
+struct pvr_winsys_heap {
+   struct pvr_winsys *ws;
+
+   pvr_dev_addr_t base_addr;
+   pvr_dev_addr_t reserved_addr;
+
+   uint64_t size;
+   uint64_t reserved_size;
+
+   uint32_t page_size;
+   uint32_t log2_page_size;
+
+   struct util_vma_heap vma_heap;
+   int ref_count;
+   pthread_mutex_t lock;
+
+   /* These are the offsets from the base at which static data might be
+    * uploaded. Some of these might be invalid since the kernel might not
+    * return all of these offsets per each heap as they might not be
+    * applicable.
+    * You should know which to use beforehand. There should be no need to check
+    * whether an offset is valid or invalid.
+    */
+   struct pvr_winsys_static_data_offsets static_data_offsets;
+};
+
+enum pvr_winsys_bo_type {
+   PVR_WINSYS_BO_TYPE_GPU = 0,
+   PVR_WINSYS_BO_TYPE_DISPLAY = 1,
+};
+
+/**
+ * \brief Flag passed to #pvr_winsys_ops.buffer_create to indicate that the
+ * buffer should be CPU accessible. This is required in order to map the buffer
+ * using #pvr_winsys_ops.buffer_map.
+ */
+#define PVR_WINSYS_BO_FLAG_CPU_ACCESS BITFIELD_BIT(0U)
+/**
+ * \brief Flag passed to #pvr_winsys_ops.buffer_create to indicate that, when
+ * the buffer is mapped to the GPU using #pvr_winsys.vma_map, it should be
+ * mapped uncached.
+ */
+#define PVR_WINSYS_BO_FLAG_GPU_UNCACHED BITFIELD_BIT(1U)
+/**
+ * \brief Flag passed to #pvr_winsys_ops.buffer_create to indicate that, when
+ * the buffer is mapped to the GPU using #pvr_winsys.vma_map, it should only be
+ * accessible to the Parameter Manager unit and firmware processor.
+ */
+#define PVR_WINSYS_BO_FLAG_PM_FW_PROTECT BITFIELD_BIT(2U)
+/**
+ * \brief Flag passed to #pvr_winsys_ops.buffer_create to indicate that the
+ * buffer should be zeroed at allocation time.
+ */
+#define PVR_WINSYS_BO_FLAG_ZERO_ON_ALLOC BITFIELD_BIT(3U)
+
+struct pvr_winsys_bo {
+   struct pvr_winsys *ws;
+   void *map;
+   uint64_t size;
+
+   bool is_imported;
+};
+
+struct pvr_winsys_vma {
+   struct pvr_winsys_heap *heap;
+
+   /* Buffer and offset this vma is bound to. */
+   struct pvr_winsys_bo *bo;
+   VkDeviceSize bo_offset;
+
+   pvr_dev_addr_t dev_addr;
+   uint64_t size;
+   uint64_t mapped_size;
+};
+
+struct pvr_winsys_syncobj {
+   struct pvr_winsys *ws;
+};
+
+struct pvr_winsys_free_list {
+   struct pvr_winsys *ws;
+};
+
+struct pvr_winsys_rt_dataset_create_info {
+   /* Local freelist */
+   struct pvr_winsys_free_list *local_free_list;
+
+   /* ISP register values */
+   uint32_t isp_merge_lower_x;
+   uint32_t isp_merge_lower_y;
+   uint32_t isp_merge_scale_x;
+   uint32_t isp_merge_scale_y;
+   uint32_t isp_merge_upper_x;
+   uint32_t isp_merge_upper_y;
+   uint32_t isp_mtile_size;
+
+   /* PPP register values */
+   uint64_t ppp_multi_sample_ctl;
+   uint64_t ppp_multi_sample_ctl_y_flipped;
+   uint32_t ppp_screen;
+
+   /* TE register values */
+   uint32_t te_aa;
+   uint32_t te_mtile1;
+   uint32_t te_mtile2;
+   uint32_t te_screen;
+
+   /* Allocations and associated information */
+   pvr_dev_addr_t vheap_table_dev_addr;
+   pvr_dev_addr_t rtc_dev_addr;
+
+   pvr_dev_addr_t tpc_dev_addr;
+   uint32_t tpc_stride;
+   uint32_t tpc_size;
+
+   struct {
+      pvr_dev_addr_t pm_mlist_dev_addr;
+      pvr_dev_addr_t macrotile_array_dev_addr;
+      pvr_dev_addr_t rgn_header_dev_addr;
+   } rt_datas[ROGUE_NUM_RTDATAS];
+   uint64_t rgn_header_size;
+
+   /* Miscellaneous */
+   uint32_t mtile_stride;
+   uint16_t max_rts;
+};
+
+struct pvr_winsys_rt_dataset {
+   struct pvr_winsys *ws;
+};
+
+enum pvr_winsys_ctx_priority {
+   PVR_WINSYS_CTX_PRIORITY_LOW,
+   PVR_WINSYS_CTX_PRIORITY_MEDIUM,
+   PVR_WINSYS_CTX_PRIORITY_HIGH,
+};
+
+struct pvr_winsys_render_ctx_create_info {
+   enum pvr_winsys_ctx_priority priority;
+   pvr_dev_addr_t vdm_callstack_addr;
+
+   struct pvr_winsys_render_ctx_static_state {
+      uint64_t vdm_ctx_state_base_addr;
+      uint64_t geom_ctx_state_base_addr;
+
+      struct {
+         uint64_t vdm_ctx_store_task0;
+         uint32_t vdm_ctx_store_task1;
+         uint64_t vdm_ctx_store_task2;
+
+         uint64_t vdm_ctx_resume_task0;
+         uint32_t vdm_ctx_resume_task1;
+         uint64_t vdm_ctx_resume_task2;
+      } geom_state[2];
+   } static_state;
+};
+
+struct pvr_winsys_render_ctx {
+   struct pvr_winsys *ws;
+};
+
+struct pvr_winsys_compute_ctx_create_info {
+   enum pvr_winsys_ctx_priority priority;
+
+   struct pvr_winsys_compute_ctx_static_state {
+      uint64_t cdm_ctx_state_base_addr;
+
+      uint64_t cdm_ctx_store_pds0;
+      uint64_t cdm_ctx_store_pds0_b;
+      uint64_t cdm_ctx_store_pds1;
+
+      uint64_t cdm_ctx_terminate_pds;
+      uint64_t cdm_ctx_terminate_pds1;
+
+      uint64_t cdm_ctx_resume_pds0;
+      uint64_t cdm_ctx_resume_pds0_b;
+   } static_state;
+};
+
+struct pvr_winsys_compute_ctx {
+   struct pvr_winsys *ws;
+};
+
+#define PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP BITFIELD_BIT(0U)
+#define PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE BITFIELD_BIT(1U)
+
+struct pvr_winsys_compute_submit_info {
+   uint32_t frame_num;
+   uint32_t job_num;
+
+   /* semaphores and stage_flags are arrays of length semaphore_count. */
+   const VkSemaphore *semaphores;
+   uint32_t *stage_flags;
+   uint32_t semaphore_count;
+
+   struct {
+      uint64_t tpu_border_colour_table;
+      uint64_t cdm_item;
+      uint64_t compute_cluster;
+      uint64_t cdm_ctrl_stream_base;
+      uint32_t tpu;
+      uint32_t cdm_resume_pds1;
+   } regs;
+
+   /* Must be 0 or a combination of PVR_WINSYS_COMPUTE_FLAG_* flags. */
+   uint32_t flags;
+};
+
+#define PVR_WINSYS_JOB_BO_FLAG_WRITE BITFIELD_BIT(0U)
+
+struct pvr_winsys_job_bo {
+   struct pvr_winsys_bo *bo;
+   /* Must be 0 or a combination of PVR_WINSYS_JOB_BO_FLAG_* flags. */
+   uint32_t flags;
+};
+
+#define PVR_WINSYS_GEOM_FLAG_FIRST_GEOMETRY BITFIELD_BIT(0U)
+#define PVR_WINSYS_GEOM_FLAG_LAST_GEOMETRY BITFIELD_BIT(1U)
+#define PVR_WINSYS_GEOM_FLAG_SINGLE_CORE BITFIELD_BIT(2U)
+
+#define PVR_WINSYS_FRAG_FLAG_DEPTH_BUFFER_PRESENT BITFIELD_BIT(0U)
+#define PVR_WINSYS_FRAG_FLAG_STENCIL_BUFFER_PRESENT BITFIELD_BIT(1U)
+#define PVR_WINSYS_FRAG_FLAG_PREVENT_CDM_OVERLAP BITFIELD_BIT(2U)
+#define PVR_WINSYS_FRAG_FLAG_SINGLE_CORE BITFIELD_BIT(3U)
+
+struct pvr_winsys_render_submit_info {
+   struct pvr_winsys_rt_dataset *rt_dataset;
+   uint8_t rt_data_idx;
+
+   uint32_t frame_num;
+   uint32_t job_num;
+
+   uint32_t bo_count;
+   const struct pvr_winsys_job_bo *bos;
+
+   /* FIXME: should this be flags instead? */
+   bool run_frag;
+
+   /* semaphores and stage_flags are arrays of length semaphore_count. */
+   const VkSemaphore *semaphores;
+   uint32_t *stage_flags;
+   uint32_t semaphore_count;
+
+   struct pvr_winsys_geometry_state {
+      struct {
+         uint32_t pds_ctrl;
+         uint32_t ppp_ctrl;
+         uint32_t te_psg;
+         uint32_t tpu;
+         uint64_t tpu_border_colour_table;
+         uint64_t vdm_ctrl_stream_base;
+         uint32_t vdm_ctx_resume_task0_size;
+      } regs;
+
+      /* Must be 0 or a combination of PVR_WINSYS_GEOM_FLAG_* flags. */
+      uint32_t flags;
+   } geometry;
+
+   struct pvr_winsys_fragment_state {
+      struct {
+         uint32_t event_pixel_pds_data;
+         uint32_t event_pixel_pds_info;
+         uint32_t isp_aa;
+         uint32_t isp_bgobjdepth;
+         uint32_t isp_bgobjvals;
+         uint32_t isp_ctl;
+         uint64_t isp_dbias_base;
+         uint64_t isp_oclqry_base;
+         uint64_t isp_scissor_base;
+         uint64_t isp_stencil_load_store_base;
+         uint64_t isp_zload_store_base;
+         uint64_t isp_zlsctl;
+         uint64_t isp_zls_pixels;
+         uint64_t pbe_word[PVR_MAX_COLOR_ATTACHMENTS]
+                          [ROGUE_NUM_PBESTATE_REG_WORDS];
+         uint32_t pixel_phantom;
+         uint64_t pds_bgnd[ROGUE_NUM_CR_PDS_BGRND_WORDS];
+         uint64_t pds_pr_bgnd[ROGUE_NUM_CR_PDS_BGRND_WORDS];
+         uint32_t tpu;
+         uint64_t tpu_border_colour_table;
+         uint32_t usc_pixel_output_ctrl;
+      } regs;
+
+      /* Must be 0 or a combination of PVR_WINSYS_FRAG_FLAG_* flags. */
+      uint32_t flags;
+      uint32_t zls_stride;
+      uint32_t sls_stride;
+   } fragment;
+};
+
+struct pvr_winsys_ops {
+   void (*destroy)(struct pvr_winsys *ws);
+   int (*device_info_init)(struct pvr_winsys *ws,
+                           struct pvr_device_info *dev_info);
+   void (*get_heaps_info)(struct pvr_winsys *ws,
+                          struct pvr_winsys_heaps *heaps);
+
+   VkResult (*buffer_create)(struct pvr_winsys *ws,
+                             uint64_t size,
+                             uint64_t alignment,
+                             enum pvr_winsys_bo_type type,
+                             uint32_t flags,
+                             struct pvr_winsys_bo **const bo_out);
+   VkResult (*buffer_create_from_fd)(struct pvr_winsys *ws,
+                                     int fd,
+                                     struct pvr_winsys_bo **const bo_out);
+   void (*buffer_destroy)(struct pvr_winsys_bo *bo);
+
+   VkResult (*buffer_get_fd)(struct pvr_winsys_bo *bo, int *const fd_out);
+
+   void *(*buffer_map)(struct pvr_winsys_bo *bo);
+   void (*buffer_unmap)(struct pvr_winsys_bo *bo);
+
+   struct pvr_winsys_vma *(*heap_alloc)(struct pvr_winsys_heap *heap,
+                                        uint64_t size,
+                                        uint64_t alignment);
+   void (*heap_free)(struct pvr_winsys_vma *vma);
+
+   pvr_dev_addr_t (*vma_map)(struct pvr_winsys_vma *vma,
+                             struct pvr_winsys_bo *bo,
+                             uint64_t offset,
+                             uint64_t size);
+   void (*vma_unmap)(struct pvr_winsys_vma *vma);
+
+   VkResult (*syncobj_create)(struct pvr_winsys *ws,
+                              bool signaled,
+                              struct pvr_winsys_syncobj **const syncobj_out);
+   void (*syncobj_destroy)(struct pvr_winsys_syncobj *syncobj);
+   VkResult (*syncobjs_reset)(struct pvr_winsys *ws,
+                              struct pvr_winsys_syncobj **const syncobjs,
+                              uint32_t count);
+   VkResult (*syncobjs_signal)(struct pvr_winsys *ws,
+                               struct pvr_winsys_syncobj **const syncobjs,
+                               uint32_t count);
+   VkResult (*syncobjs_wait)(struct pvr_winsys *ws,
+                             struct pvr_winsys_syncobj **const syncobjs,
+                             uint32_t count,
+                             bool wait_all,
+                             uint64_t timeout);
+   VkResult (*syncobjs_merge)(struct pvr_winsys_syncobj *src,
+                              struct pvr_winsys_syncobj *target,
+                              struct pvr_winsys_syncobj **out);
+
+   VkResult (*free_list_create)(
+      struct pvr_winsys *ws,
+      struct pvr_winsys_vma *free_list_vma,
+      uint32_t initial_num_pages,
+      uint32_t max_num_pages,
+      uint32_t grow_num_pages,
+      uint32_t grow_threshold,
+      struct pvr_winsys_free_list *parent_free_list,
+      struct pvr_winsys_free_list **const free_list_out);
+   void (*free_list_destroy)(struct pvr_winsys_free_list *free_list);
+
+   VkResult (*render_target_dataset_create)(
+      struct pvr_winsys *ws,
+      const struct pvr_winsys_rt_dataset_create_info *create_info,
+      struct pvr_winsys_rt_dataset **const rt_dataset_out);
+   void (*render_target_dataset_destroy)(
+      struct pvr_winsys_rt_dataset *rt_dataset);
+
+   VkResult (*render_ctx_create)(
+      struct pvr_winsys *ws,
+      struct pvr_winsys_render_ctx_create_info *create_info,
+      struct pvr_winsys_render_ctx **const ctx_out);
+   void (*render_ctx_destroy)(struct pvr_winsys_render_ctx *ctx);
+   VkResult (*render_submit)(
+      const struct pvr_winsys_render_ctx *ctx,
+      const struct pvr_winsys_render_submit_info *submit_info,
+      struct pvr_winsys_syncobj **const syncobj_geom_out,
+      struct pvr_winsys_syncobj **const syncobj_frag_out);
+
+   VkResult (*compute_ctx_create)(
+      struct pvr_winsys *ws,
+      const struct pvr_winsys_compute_ctx_create_info *create_info,
+      struct pvr_winsys_compute_ctx **const ctx_out);
+   void (*compute_ctx_destroy)(struct pvr_winsys_compute_ctx *ctx);
+   VkResult (*compute_submit)(
+      const struct pvr_winsys_compute_ctx *ctx,
+      const struct pvr_winsys_compute_submit_info *submit_info,
+      struct pvr_winsys_syncobj **const syncobj_out);
+};
+
+struct pvr_winsys {
+   uint64_t page_size;
+   uint32_t log2_page_size;
+
+   const struct pvr_winsys_ops *ops;
+};
+
+void pvr_winsys_destroy(struct pvr_winsys *ws);
+struct pvr_winsys *pvr_winsys_create(int master_fd,
+                                     int render_fd,
+                                     const VkAllocationCallbacks *alloc);
+
+#endif /* PVR_WINSYS_H */
diff --git a/src/imagination/vulkan/winsys/pvr_winsys_helper.c b/src/imagination/vulkan/winsys/pvr_winsys_helper.c
new file mode 100644 (file)
index 0000000..ae94bab
--- /dev/null
@@ -0,0 +1,420 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+#include <xf86drm.h>
+
+#include "pvr_private.h"
+#include "pvr_winsys.h"
+#include "pvr_winsys_helper.h"
+#include "util/u_atomic.h"
+#include "vk_log.h"
+
+int pvr_winsys_helper_display_buffer_create(int master_fd,
+                                            uint64_t size,
+                                            uint32_t *const handle_out)
+{
+   struct drm_mode_create_dumb args = {
+      .width = size,
+      .height = 1,
+      .bpp = 8,
+   };
+   int ret;
+
+   ret = drmIoctl(master_fd, DRM_IOCTL_MODE_CREATE_DUMB, &args);
+   if (ret)
+      return ret;
+
+   *handle_out = args.handle;
+
+   return 0;
+}
+
+int pvr_winsys_helper_display_buffer_destroy(int master_fd, uint32_t handle)
+{
+   struct drm_mode_destroy_dumb args = {
+      .handle = handle,
+   };
+
+   return drmIoctl(master_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &args);
+}
+
+/* reserved_size can be 0 when no reserved area is needed. reserved_address must
+ * be 0 if reserved_size is 0.
+ */
+VkResult pvr_winsys_helper_winsys_heap_init(
+   struct pvr_winsys *const ws,
+   pvr_dev_addr_t base_address,
+   uint64_t size,
+   pvr_dev_addr_t reserved_address,
+   uint64_t reserved_size,
+   uint32_t log2_page_size,
+   const struct pvr_winsys_static_data_offsets *const static_data_offsets,
+   struct pvr_winsys_heap *const heap)
+{
+   const bool reserved_area_bottom_of_heap = reserved_address.addr ==
+                                             base_address.addr;
+   const uint64_t vma_heap_begin_addr =
+      base_address.addr +
+      (uint64_t)reserved_area_bottom_of_heap * reserved_size;
+   const uint64_t vma_heap_size = size - reserved_size;
+
+   assert(base_address.addr);
+   assert(reserved_size <= size);
+
+   /* As per the reserved_base powervr-km uapi documentation the reserved
+    * region can only be at the beginning of the heap or at the end.
+    * reserved_address is 0 if there is no reserved region.
+    * pvrsrv-km doesn't explicitly provide this info and it's assumed that it's
+    * always at the beginning.
+    */
+   assert(reserved_area_bottom_of_heap ||
+          reserved_address.addr + reserved_size == base_address.addr + size ||
+          (!reserved_address.addr && !reserved_size));
+
+   heap->ws = ws;
+   heap->base_addr = base_address;
+   heap->reserved_addr = reserved_address;
+
+   heap->size = size;
+   heap->reserved_size = reserved_size;
+
+   heap->page_size = 1 << log2_page_size;
+   heap->log2_page_size = log2_page_size;
+
+   util_vma_heap_init(&heap->vma_heap, vma_heap_begin_addr, vma_heap_size);
+
+   heap->vma_heap.alloc_high = false;
+
+   /* It's expected that the heap destroy function to be the last thing that's
+    * called, so we start the ref_count at 0.
+    */
+   p_atomic_set(&heap->ref_count, 0);
+
+   if (pthread_mutex_init(&heap->lock, NULL))
+      return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED);
+
+   heap->static_data_offsets = *static_data_offsets;
+
+   return VK_SUCCESS;
+}
+
+bool pvr_winsys_helper_winsys_heap_finish(struct pvr_winsys_heap *const heap)
+{
+   if (p_atomic_read(&heap->ref_count) != 0)
+      return false;
+
+   pthread_mutex_destroy(&heap->lock);
+   util_vma_heap_finish(&heap->vma_heap);
+
+   return true;
+}
+
+bool pvr_winsys_helper_heap_alloc(struct pvr_winsys_heap *const heap,
+                                  uint64_t size,
+                                  uint64_t alignment,
+                                  struct pvr_winsys_vma *const vma_out)
+{
+   struct pvr_winsys_vma vma = {
+      .heap = heap,
+   };
+
+   assert(util_is_power_of_two_nonzero(alignment));
+
+   /* pvr_srv_winsys_buffer_create() page aligns the size. We must do the same
+    * here to ensure enough heap space is allocated to be able to map the
+    * buffer to the GPU.
+    * We have to do this for the powervr kernel mode driver as well, as it
+    * returns a page aligned size when allocating buffers.
+    */
+   alignment = MAX2(alignment, heap->page_size);
+
+   size = ALIGN_POT(size, alignment);
+   vma.size = size;
+
+   pthread_mutex_lock(&heap->lock);
+   vma.dev_addr.addr =
+      util_vma_heap_alloc(&heap->vma_heap, size, heap->page_size);
+   pthread_mutex_unlock(&heap->lock);
+
+   if (!vma.dev_addr.addr) {
+      vk_error(NULL, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+      return false;
+   }
+
+   p_atomic_inc(&heap->ref_count);
+
+   *vma_out = vma;
+
+   return true;
+}
+
+void pvr_winsys_helper_heap_free(struct pvr_winsys_vma *const vma)
+{
+   struct pvr_winsys_heap *const heap = vma->heap;
+
+   /* A vma with an existing device mapping should not be freed. */
+   assert(!vma->bo);
+
+   pthread_mutex_lock(&heap->lock);
+   util_vma_heap_free(&heap->vma_heap, vma->dev_addr.addr, vma->size);
+   pthread_mutex_unlock(&heap->lock);
+
+   p_atomic_dec(&heap->ref_count);
+}
+
+/* Note: the function assumes the heap allocation in the reserved memory area
+ * can be freed with the regular heap allocation free function. The free
+ * function gets called on mapping failure.
+ */
+static VkResult
+pvr_buffer_create_and_map(struct pvr_winsys *const ws,
+                          heap_alloc_reserved_func heap_alloc_reserved,
+                          struct pvr_winsys_heap *heap,
+                          pvr_dev_addr_t dev_addr,
+                          uint64_t size,
+                          uint64_t alignment,
+                          struct pvr_winsys_vma **const vma_out)
+{
+   struct pvr_winsys_vma *vma;
+   struct pvr_winsys_bo *bo;
+   pvr_dev_addr_t addr;
+   VkResult result;
+
+   /* Address should not be NULL, this function is used to allocate and map
+    * reserved addresses and is only supposed to be used internally.
+    */
+   assert(dev_addr.addr);
+
+   result = ws->ops->buffer_create(ws,
+                                   size,
+                                   alignment,
+                                   PVR_WINSYS_BO_TYPE_GPU,
+                                   PVR_WINSYS_BO_FLAG_CPU_ACCESS,
+                                   &bo);
+   if (result != VK_SUCCESS)
+      return result;
+
+   vma = heap_alloc_reserved(heap, dev_addr, size, alignment);
+   if (!vma) {
+      result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
+      goto err_pvr_winsys_buffer_destroy;
+   }
+
+   addr = ws->ops->vma_map(vma, bo, 0, size);
+   if (!addr.addr) {
+      result = VK_ERROR_MEMORY_MAP_FAILED;
+      goto err_pvr_winsys_heap_free;
+   }
+
+   /* Note this won't destroy bo as its being used by VMA, once vma is
+    * unmapped, bo will be destroyed automatically.
+    */
+   ws->ops->buffer_destroy(bo);
+
+   *vma_out = vma;
+
+   return VK_SUCCESS;
+
+err_pvr_winsys_heap_free:
+   ws->ops->heap_free(vma);
+
+err_pvr_winsys_buffer_destroy:
+   ws->ops->buffer_destroy(bo);
+
+   return result;
+}
+
+static void inline pvr_buffer_destroy_and_unmap(struct pvr_winsys_vma *vma)
+{
+   const struct pvr_winsys *const ws = vma->heap->ws;
+
+   /* Buffer object associated with the vma will be automatically destroyed
+    * once vma is unmapped.
+    */
+   ws->ops->vma_unmap(vma);
+   ws->ops->heap_free(vma);
+}
+
+VkResult pvr_winsys_helper_allocate_static_memory(
+   struct pvr_winsys *const ws,
+   heap_alloc_reserved_func heap_alloc_reserved,
+   struct pvr_winsys_heap *const general_heap,
+   struct pvr_winsys_heap *const pds_heap,
+   struct pvr_winsys_heap *const usc_heap,
+   struct pvr_winsys_vma **const general_vma_out,
+   struct pvr_winsys_vma **const pds_vma_out,
+   struct pvr_winsys_vma **const usc_vma_out)
+{
+   struct pvr_winsys_vma *general_vma;
+   struct pvr_winsys_vma *pds_vma;
+   struct pvr_winsys_vma *usc_vma;
+   VkResult result;
+
+   result = pvr_buffer_create_and_map(ws,
+                                      heap_alloc_reserved,
+                                      general_heap,
+                                      general_heap->reserved_addr,
+                                      general_heap->reserved_size,
+                                      general_heap->page_size,
+                                      &general_vma);
+   if (result != VK_SUCCESS)
+      return result;
+
+   result = pvr_buffer_create_and_map(ws,
+                                      heap_alloc_reserved,
+                                      pds_heap,
+                                      pds_heap->reserved_addr,
+                                      pds_heap->reserved_size,
+                                      pds_heap->page_size,
+                                      &pds_vma);
+   if (result != VK_SUCCESS)
+      goto err_pvr_buffer_destroy_and_unmap_general;
+
+   result = pvr_buffer_create_and_map(ws,
+                                      heap_alloc_reserved,
+                                      usc_heap,
+                                      usc_heap->reserved_addr,
+                                      pds_heap->reserved_size,
+                                      usc_heap->page_size,
+                                      &usc_vma);
+   if (result != VK_SUCCESS)
+      goto err_pvr_buffer_destroy_and_unmap_pds;
+
+   *general_vma_out = general_vma;
+   *pds_vma_out = pds_vma;
+   *usc_vma_out = usc_vma;
+
+   return VK_SUCCESS;
+
+err_pvr_buffer_destroy_and_unmap_pds:
+   pvr_buffer_destroy_and_unmap(pds_vma);
+
+err_pvr_buffer_destroy_and_unmap_general:
+   pvr_buffer_destroy_and_unmap(general_vma);
+
+   return result;
+}
+
+void pvr_winsys_helper_free_static_memory(
+   struct pvr_winsys_vma *const general_vma,
+   struct pvr_winsys_vma *const pds_vma,
+   struct pvr_winsys_vma *const usc_vma)
+{
+   pvr_buffer_destroy_and_unmap(usc_vma);
+   pvr_buffer_destroy_and_unmap(pds_vma);
+   pvr_buffer_destroy_and_unmap(general_vma);
+}
+
+static void pvr_setup_static_vdm_sync(uint8_t *const pds_ptr,
+                                      uint64_t pds_sync_offset_in_bytes,
+                                      uint8_t *const usc_ptr,
+                                      uint64_t usc_sync_offset_in_bytes)
+{
+   /* TODO: this needs to be auto-generated */
+   const uint8_t state_update[] = { 0x44, 0xA0, 0x80, 0x05,
+                                    0x00, 0x00, 0x00, 0xFF };
+
+   struct pvr_pds_kickusc_program ppp_state_update_program = { 0 };
+
+   memcpy(usc_ptr + usc_sync_offset_in_bytes,
+          state_update,
+          sizeof(state_update));
+
+   pvr_pds_setup_doutu(&ppp_state_update_program.usc_task_control,
+                       usc_sync_offset_in_bytes,
+                       0,
+                       PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
+                       false);
+
+   pvr_pds_kick_usc(&ppp_state_update_program,
+                    (uint32_t *)&pds_ptr[pds_sync_offset_in_bytes],
+                    0,
+                    false,
+                    PDS_GENERATE_CODEDATA_SEGMENTS);
+}
+
+static void
+pvr_setup_static_pixel_event_program(uint8_t *const pds_ptr,
+                                     uint64_t pds_eot_offset_in_bytes)
+{
+   struct pvr_pds_event_program pixel_event_program = { 0 };
+
+   pvr_pds_generate_pixel_event(&pixel_event_program,
+                                (uint32_t *)&pds_ptr[pds_eot_offset_in_bytes],
+                                PDS_GENERATE_CODE_SEGMENT,
+                                NULL);
+}
+
+VkResult
+pvr_winsys_helper_fill_static_memory(struct pvr_winsys *const ws,
+                                     struct pvr_winsys_vma *const general_vma,
+                                     struct pvr_winsys_vma *const pds_vma,
+                                     struct pvr_winsys_vma *const usc_vma)
+{
+   uint8_t *general_ptr, *pds_ptr, *usc_ptr;
+   VkResult result;
+
+   general_ptr = ws->ops->buffer_map(general_vma->bo);
+   if (!general_ptr)
+      return VK_ERROR_MEMORY_MAP_FAILED;
+
+   pds_ptr = ws->ops->buffer_map(pds_vma->bo);
+   if (!pds_ptr) {
+      result = VK_ERROR_MEMORY_MAP_FAILED;
+      goto error_pvr_srv_winsys_buffer_unmap_general;
+   }
+
+   usc_ptr = ws->ops->buffer_map(usc_vma->bo);
+   if (!usc_ptr) {
+      result = VK_ERROR_MEMORY_MAP_FAILED;
+      goto error_pvr_srv_winsys_buffer_unmap_pds;
+   }
+
+   pvr_setup_static_vdm_sync(pds_ptr,
+                             pds_vma->heap->static_data_offsets.vdm_sync,
+                             usc_ptr,
+                             usc_vma->heap->static_data_offsets.vdm_sync);
+
+   pvr_setup_static_pixel_event_program(pds_ptr,
+                                        pds_vma->heap->static_data_offsets.eot);
+
+   /* TODO: Complete control block copying work. */
+
+   ws->ops->buffer_unmap(usc_vma->bo);
+   ws->ops->buffer_unmap(pds_vma->bo);
+   ws->ops->buffer_unmap(general_vma->bo);
+
+   return VK_SUCCESS;
+
+error_pvr_srv_winsys_buffer_unmap_pds:
+   ws->ops->buffer_unmap(pds_vma->bo);
+
+error_pvr_srv_winsys_buffer_unmap_general:
+   ws->ops->buffer_unmap(general_vma->bo);
+
+   return result;
+}
diff --git a/src/imagination/vulkan/winsys/pvr_winsys_helper.h b/src/imagination/vulkan/winsys/pvr_winsys_helper.h
new file mode 100644 (file)
index 0000000..dbf619a
--- /dev/null
@@ -0,0 +1,80 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_WINSYS_HELPER_H
+#define PVR_WINSYS_HELPER_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "pvr_winsys.h"
+
+typedef struct pvr_winsys_vma *(*const heap_alloc_reserved_func)(
+   struct pvr_winsys_heap *const heap,
+   const pvr_dev_addr_t reserved_dev_addr,
+   uint64_t size,
+   uint64_t alignment);
+
+int pvr_winsys_helper_display_buffer_create(int master_fd,
+                                            uint64_t size,
+                                            uint32_t *const handle_out);
+int pvr_winsys_helper_display_buffer_destroy(int master_fd, uint32_t handle);
+
+VkResult pvr_winsys_helper_winsys_heap_init(
+   struct pvr_winsys *const ws,
+   pvr_dev_addr_t base_address,
+   uint64_t size,
+   pvr_dev_addr_t reserved_address,
+   uint64_t reserved_size,
+   uint32_t log2_page_size,
+   const struct pvr_winsys_static_data_offsets *const static_data_offsets,
+   struct pvr_winsys_heap *const heap);
+bool pvr_winsys_helper_winsys_heap_finish(struct pvr_winsys_heap *const heap);
+
+bool pvr_winsys_helper_heap_alloc(struct pvr_winsys_heap *const heap,
+                                  uint64_t size,
+                                  uint64_t alignment,
+                                  struct pvr_winsys_vma *const vma);
+void pvr_winsys_helper_heap_free(struct pvr_winsys_vma *const vma);
+
+VkResult pvr_winsys_helper_allocate_static_memory(
+   struct pvr_winsys *const ws,
+   heap_alloc_reserved_func heap_alloc_reserved,
+   struct pvr_winsys_heap *const general_heap,
+   struct pvr_winsys_heap *const pds_heap,
+   struct pvr_winsys_heap *const usc_heap,
+   struct pvr_winsys_vma **const general_vma_out,
+   struct pvr_winsys_vma **const pds_vma_out,
+   struct pvr_winsys_vma **const usc_vma_out);
+void pvr_winsys_helper_free_static_memory(
+   struct pvr_winsys_vma *const general_vma,
+   struct pvr_winsys_vma *const pds_vma,
+   struct pvr_winsys_vma *const usc_vma);
+
+VkResult
+pvr_winsys_helper_fill_static_memory(struct pvr_winsys *const ws,
+                                     struct pvr_winsys_vma *const general_vma,
+                                     struct pvr_winsys_vma *const pds_vma,
+                                     struct pvr_winsys_vma *const usc_vma);
+
+#endif /* PVR_WINSYS_HELPER_H */
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif.h b/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif.h
new file mode 100644 (file)
index 0000000..abcaf18
--- /dev/null
@@ -0,0 +1,440 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_ROGUE_FWIF_H
+#define PVR_ROGUE_FWIF_H
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "pvr_rogue_fwif_shared.h"
+
+/** Indicates the number of RTDATAs per RTDATASET. */
+#define ROGUE_FWIF_NUM_RTDATAS 2U
+
+/** Render needs flipped sample positions. */
+#define ROGUE_FWIF_RENDERFLAGS_FLIP_SAMPLE_POSITIONS 0x00000001UL
+/**
+ * The scene has been aborted, free the parameters and dummy process to
+ * completion.
+ */
+#define ROGUE_FWIF_RENDERFLAGS_ABORT 0x00000002UL
+/** The TA before this was not marked as LAST. */
+#define ROGUE_FWIF_RENDERFLAGS_3D_ONLY 0x00000004UL
+/** Use single core in a multi core setup. */
+#define ROGUE_FWIF_RENDERFLAGS_SINGLE_CORE 0x00000008UL
+/**
+ * This render has visibility result associated with it. Setting this flag will
+ * cause the firmware to collect the visibility results.
+ */
+#define ROGUE_FWIF_RENDERFLAGS_GETVISRESULTS 0x00000020UL
+/** Indicates whether a depth buffer is present. */
+#define ROGUE_FWIF_RENDERFLAGS_DEPTHBUFFER 0x00000080UL
+/** Indicates whether a stencil buffer is present. */
+#define ROGUE_FWIF_RENDERFLAGS_STENCILBUFFER 0x00000100UL
+/** This render needs DRM Security. */
+#define ROGUE_FWIF_RENDERFLAGS_SECURE 0x00002000UL
+/**
+ * This flags goes in hand with ABORT and explicitly ensures no mem free is
+ * issued in case of first TA job.
+ */
+#define ROGUE_FWIF_RENDERFLAGS_ABORT_NOFREE 0x00004000UL
+/** Force disabling of pixel merging. */
+#define ROGUE_FWIF_RENDERFLAGS_DISABLE_PIXELMERGE 0x00008000UL
+
+/** Force 4 lines of coeffs on render. */
+#define ROGUE_FWIF_RENDERFLAGS_CSRM_MAX_COEFFS 0x00020000UL
+
+/** Partial render must write to scratch buffer. */
+#define ROGUE_FWIF_RENDERFLAGS_SPMSCRATCHBUFFER 0x00080000UL
+
+/** Render uses paired tile feature, empty tiles must always be enabled. */
+#define ROGUE_FWIF_RENDERFLAGS_PAIRED_TILES 0x00100000UL
+
+#define ROGUE_FWIF_RENDERFLAGS_RESERVED 0x01000000UL
+
+/** Disallow compute overlapped with this render. */
+#define ROGUE_FWIF_RENDERFLAGS_PREVENT_CDM_OVERLAP 0x04000000UL
+/**
+ * The host must indicate if this is the first and/or last command to be issued
+ * for the specified task.
+ */
+#define ROGUE_FWIF_TAFLAGS_FIRSTKICK 0x00000001UL
+#define ROGUE_FWIF_TAFLAGS_LASTKICK 0x00000002UL
+#define ROGUE_FWIF_TAFLAGS_FLIP_SAMPLE_POSITIONS 0x00000004UL
+/** Use single core in a multi core setup. */
+#define ROGUE_FWIF_TAFLAGS_SINGLE_CORE 0x00000008UL
+
+/** Enable Tile Region Protection for this TA. */
+#define ROGUE_FWIF_TAFLAGS_TRP 0x00000010UL
+
+/** Indicates the particular TA needs to be aborted. */
+#define ROGUE_FWIF_TAFLAGS_TA_ABORT 0x00000100UL
+#define ROGUE_FWIF_TAFLAGS_SECURE 0x00080000UL
+
+/**
+ * Indicates that the CSRM should be reconfigured to support maximum coeff
+ * space before this command is scheduled.
+ */
+#define ROGUE_FWIF_TAFLAGS_CSRM_MAX_COEFFS 0x00200000UL
+
+#define ROGUE_FWIF_TAFLAGS_PHR_TRIGGER 0x02000000UL
+
+/* Flags for transfer queue commands. */
+#define ROGUE_FWIF_CMDTRANSFER_FLAG_SECURE 0x00000001U
+/** Use single core in a multi core setup. */
+#define ROGUE_FWIF_CMDTRANSFER_SINGLE_CORE 0x00000002U
+
+/* Flags for 2D commands. */
+#define ROGUE_FWIF_CMD2D_FLAG_SECURE 0x00000001U
+
+#define ROGUE_FWIF_CMD3DTQ_SLICE_WIDTH_MASK 0x00000038UL
+#define ROGUE_FWIF_CMD3DTQ_SLICE_WIDTH_SHIFT (3)
+#define ROGUE_FWIF_CMD3DTQ_SLICE_GRANULARITY (0x10U)
+
+/* Flags for compute commands. */
+#define ROGUE_FWIF_COMPUTE_FLAG_SECURE 0x00000001U
+#define ROGUE_FWIF_COMPUTE_FLAG_PREVENT_ALL_OVERLAP 0x00000002U
+#define ROGUE_FWIF_COMPUTE_FLAG_FORCE_TPU_CLK 0x00000004U
+
+#define ROGUE_FWIF_COMPUTE_FLAG_PREVENT_ALL_NON_TAOOM_OVERLAP 0x00000010U
+
+/** Use single core in a multi core setup. */
+#define ROGUE_FWIF_COMPUTE_FLAG_SINGLE_CORE 0x00000020U
+
+/***********************************************
+   Parameter/HWRTData control structures.
+ ***********************************************/
+
+/**
+ * Configuration registers which need to be loaded by the firmware before a TA
+ * job can be started.
+ */
+struct rogue_fwif_ta_regs {
+   uint64_t vdm_ctrl_stream_base;
+   uint64_t tpu_border_colour_table;
+
+   uint32_t ppp_ctrl;
+   uint32_t te_psg;
+   uint32_t tpu;
+
+   uint32_t vdm_context_resume_task0_size;
+
+   /* FIXME: HIGH: FIX_HW_BRN_56279 changes the structure's layout, given we
+    * are supporting Features/ERNs/BRNs at runtime, we need to look into this
+    * and find a solution to keep layout intact.
+    */
+   /* Available if FIX_HW_BRN_56279 is present. */
+   uint32_t pds_ctrl;
+
+   uint32_t view_idx;
+};
+
+/**
+ * Represents a TA command that can be used to tile a whole scene's objects as
+ * per TA behavior.
+ */
+struct rogue_fwif_cmd_ta {
+   /**
+    * rogue_fwif_cmd_ta_3d_shared field must always be at the beginning of the
+    * struct.
+    *
+    * The command struct (rogue_fwif_cmd_ta) is shared between Client and
+    * Firmware. Kernel is unable to perform read/write operations on the
+    * command struct, the SHARED region is the only exception from this rule.
+    * This region must be the first member so that Kernel can easily access it.
+    * For more info, see pvr_cmd_ta_3d definition.
+    */
+   struct rogue_fwif_cmd_ta_3d_shared cmd_shared;
+
+   struct rogue_fwif_ta_regs ALIGN(8) geom_regs;
+   uint32_t ALIGN(8) flags;
+   /**
+    * Holds the TA/3D fence value to allow the 3D partial render command
+    * to go through.
+    */
+   struct rogue_fwif_ufo partial_render_ta_3d_fence;
+};
+
+static_assert(
+   offsetof(struct rogue_fwif_cmd_ta, cmd_shared) == 0U,
+   "rogue_fwif_cmd_ta_3d_shared must be the first member of rogue_fwif_cmd_ta");
+
+static_assert(
+   sizeof(struct rogue_fwif_cmd_ta) <= ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE,
+   "kernel expects command size be increased to match current TA command size");
+
+/**
+ * Configuration registers which need to be loaded by the firmware before ISP
+ * can be started.
+ */
+struct rogue_fwif_3d_regs {
+   /**
+    * All 32 bit values should be added in the top section. This then requires
+    * only a single ALIGN(8) to align all the 64 bit values in the second
+    * section.
+    */
+   uint32_t usc_pixel_output_ctrl;
+   /* FIXME: HIGH: RGX_MAXIMUM_OUTPUT_REGISTERS_PER_PIXEL changes the
+    * structure's layout.
+    */
+#define ROGUE_MAXIMUM_OUTPUT_REGISTERS_PER_PIXEL 8U
+   uint32_t usc_clear_register[ROGUE_MAXIMUM_OUTPUT_REGISTERS_PER_PIXEL];
+
+   uint32_t isp_bgobjdepth;
+   uint32_t isp_bgobjvals;
+   uint32_t isp_aa;
+   uint32_t isp_ctl;
+
+   uint32_t tpu;
+
+   uint32_t event_pixel_pds_info;
+
+   /* FIXME: HIGH: RGX_FEATURE_CLUSTER_GROUPING changes the structure's
+    * layout.
+    */
+   uint32_t pixel_phantom;
+
+   uint32_t view_idx;
+
+   uint32_t event_pixel_pds_data;
+   /* FIXME: HIGH: MULTIBUFFER_OCLQRY changes the structure's layout.
+    * Commenting out for now as it's not supported by 4.V.2.51.
+    */
+   /* uint32_t isp_oclqry_stride; */
+
+   /* All values below the ALIGN(8) must be 64 bit. */
+   uint64_t ALIGN(8) isp_scissor_base;
+   uint64_t isp_dbias_base;
+   uint64_t isp_oclqry_base;
+   uint64_t isp_zlsctl;
+   uint64_t isp_zload_store_base;
+   uint64_t isp_stencil_load_store_base;
+   /* FIXME: HIGH: RGX_FEATURE_ZLS_SUBTILE changes the structure's layout. */
+   uint64_t isp_zls_pixels;
+
+   /* FIXME: HIGH: RGX_HW_REQUIRES_FB_CDC_ZLS_SETUP changes the structure's
+    * layout.
+    */
+   uint64_t deprecated;
+
+   /* FIXME: HIGH: RGX_PBE_WORDS_REQUIRED_FOR_RENDERS changes the structure's
+    * layout.
+    */
+#define ROGUE_PBE_WORDS_REQUIRED_FOR_RENDERS 2U
+   uint64_t pbe_word[8U][ROGUE_PBE_WORDS_REQUIRED_FOR_RENDERS];
+   uint64_t tpu_border_colour_table;
+   uint64_t pds_bgnd[3U];
+   uint64_t pds_pr_bgnd[3U];
+};
+
+struct rogue_fwif_cmd_3d {
+   /**
+    * This struct is shared between Client and Firmware.
+    * Kernel is unable to perform read/write operations on the command struct,
+    * the SHARED region is our only exception from that rule.
+    * This region must be the first member so Kernel can easily access it.
+    * For more info, see rogue_fwif_cmd_ta_3d_shared definition.
+    */
+   struct rogue_fwif_cmd_ta_3d_shared ALIGN(8) cmd_shared;
+
+   struct rogue_fwif_3d_regs ALIGN(8) regs;
+   /** command control flags. */
+   uint32_t flags;
+   /** Stride IN BYTES for Z-Buffer in case of RTAs. */
+   uint32_t zls_stride;
+   /** Stride IN BYTES for S-Buffer in case of RTAs. */
+   uint32_t sls_stride;
+};
+
+static_assert(
+   offsetof(struct rogue_fwif_cmd_3d, cmd_shared) == 0U,
+   "rogue_fwif_cmd_ta_3d_shared must be the first member of rogue_fwif_cmd_3d");
+
+static_assert(
+   sizeof(struct rogue_fwif_cmd_3d) <= ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE,
+   "kernel expects command size be increased to match current 3D command size");
+
+struct rogue_fwif_transfer_regs {
+   /**
+    * All 32 bit values should be added in the top section. This then requires
+    * only a single ALIGN(8) to align all the 8 byte values in the second
+    * section.
+    */
+   uint32_t isp_bgobjvals;
+
+   uint32_t usc_pixel_output_ctrl;
+   uint32_t usc_clear_register0;
+   uint32_t usc_clear_register1;
+   uint32_t usc_clear_register2;
+   uint32_t usc_clear_register3;
+
+   uint32_t isp_mtile_size;
+   uint32_t isp_render_origin;
+   uint32_t isp_ctl;
+
+   uint32_t isp_aa;
+
+   uint32_t event_pixel_pds_info;
+
+   uint32_t event_pixel_pds_code;
+   uint32_t event_pixel_pds_data;
+
+   uint32_t isp_render;
+   uint32_t isp_rgn;
+   /* FIXME: HIGH: RGX_FEATURE_GPU_MULTICORE_SUPPORT changes the structure's
+    * layout. Commenting out for now as it's not supported by 4.V.2.51.
+    */
+   /* uint32_t frag_screen; */
+   /** All values below the RGXFW_ALIGN must be 64 bit. */
+   uint64_t ALIGN(8) pds_bgnd0_base;
+   uint64_t pds_bgnd1_base;
+   uint64_t pds_bgnd3_sizeinfo;
+
+   uint64_t isp_mtile_base;
+   /* FIXME: HIGH: RGX_PBE_WORDS_REQUIRED_FOR_TQS changes the structure's
+    * layout.
+    */
+#define ROGUE_PBE_WORDS_REQUIRED_FOR_TRANSFER 3
+   /* TQ_MAX_RENDER_TARGETS * PBE_STATE_SIZE */
+   uint64_t pbe_wordx_mrty[3 * ROGUE_PBE_WORDS_REQUIRED_FOR_TRANSFER];
+};
+
+struct rogue_fwif_cmd_transfer {
+   struct rogue_fwif_cmd_common ALIGN(8) cmn;
+   struct rogue_fwif_transfer_regs ALIGN(8) regs;
+
+   uint32_t flags;
+};
+
+static_assert(
+   offsetof(struct rogue_fwif_cmd_transfer, cmn) == 0U,
+   "rogue_fwif_cmd_common must be the first member of rogue_fwif_cmd_transfer");
+
+static_assert(
+   sizeof(struct rogue_fwif_cmd_transfer) <=
+      ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE,
+   "kernel expects command size be increased to match current TRANSFER command size");
+
+struct rogue_fwif_2d_regs {
+   uint64_t tla_cmd_stream;
+   uint64_t deprecated_0;
+   uint64_t deprecated_1;
+   uint64_t deprecated_2;
+   uint64_t deprecated_3;
+   /* FIXME: HIGH: FIX_HW_BRN_57193 changes the structure's layout. */
+   uint64_t brn57193_tla_cmd_stream;
+};
+
+struct rogue_fwif_cmd_2d {
+   struct rogue_fwif_cmd_common ALIGN(8) cmn;
+   struct rogue_fwif_2d_regs ALIGN(8) regs;
+
+   uint32_t flags;
+};
+
+static_assert(
+   offsetof(struct rogue_fwif_cmd_2d, cmn) == 0U,
+   "rogue_fwif_cmd_common must be the first member of rogue_fwif_cmd_2d");
+
+static_assert(
+   sizeof(struct rogue_fwif_cmd_2d) <= ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE,
+   "kernel expects command size be increased to match current 2D command size");
+
+/***********************************************
+   Host interface structures.
+ ***********************************************/
+
+/**
+ * Configuration registers which need to be loaded by the firmware before CDM
+ * can be started.
+ */
+struct rogue_fwif_cdm_regs {
+   uint64_t tpu_border_colour_table;
+
+   /* FIXME: HIGH: RGX_FEATURE_COMPUTE_MORTON_CAPABLE changes the structure's
+    * layout.
+    */
+   uint64_t cdm_item;
+   /* FIXME: HIGH: RGX_FEATURE_CLUSTER_GROUPING changes the structure's layout.
+    */
+   uint64_t compute_cluster;
+
+   /* FIXME: HIGH: RGX_FEATURE_TPU_DM_GLOBAL_REGISTERS changes the structure's
+    * layout. Commenting out for now as it's not supported by 4.V.2.51.
+    */
+   /* uint64_t tpu_tag_cdm_ctrl; */
+   uint64_t cdm_ctrl_stream_base;
+
+   uint32_t tpu;
+
+   uint32_t cdm_resume_pds1;
+};
+
+struct rogue_fwif_cmd_compute {
+   struct rogue_fwif_cmd_common ALIGN(8) cmn;
+   struct rogue_fwif_cdm_regs ALIGN(8) regs;
+   uint32_t ALIGN(8) flags;
+};
+
+static_assert(
+   offsetof(struct rogue_fwif_cmd_compute, cmn) == 0U,
+   "rogue_fwif_cmd_common must be the first member of rogue_fwif_cmd_compute");
+
+static_assert(
+   sizeof(struct rogue_fwif_cmd_compute) <=
+      ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE,
+   "kernel expects command size be increased to match current COMPUTE command size");
+
+/* TODO: Rename the RGX_* macros in the comments once they are imported. */
+/* Applied to RGX_CR_VDM_SYNC_PDS_DATA_BASE. */
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_PDS_HEAP_VDM_SYNC_OFFSET_BYTES 0U
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_PDS_HEAP_VDM_SYNC_MAX_SIZE_BYTES 128U
+
+/** Applied to RGX_CR_EVENT_PIXEL_PDS_CODE. */
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_PDS_HEAP_EOT_OFFSET_BYTES 128U
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_PDS_HEAP_EOT_MAX_SIZE_BYTES 128U
+
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_PDS_HEAP_TOTAL_BYTES 4096U
+
+/** Pointed to by PDS code at RGX_CR_VDM_SYNC_PDS_DATA_BASE. */
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_USC_HEAP_VDM_SYNC_OFFSET_BYTES 0U
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_USC_HEAP_VDM_SYNC_MAX_SIZE_BYTES 128U
+
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_USC_HEAP_TOTAL_BYTES 4096U
+
+/**
+ * Applied to RGX_CR_MCU_FENCE, and RGX_CR_PM_MTILE_ARRAY
+ * (defined(RGX_FEATURE_SIMPLE_INTERNAL_PARAMETER_FORMAT)).
+ */
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_GENERAL_HEAP_FENCE_OFFSET_BYTES 0U
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_GENERAL_HEAP_FENCE_MAX_SIZE_BYTES 128U
+
+/** Applied to RGX_CR_TPU_YUV_CSC_COEFFICIENTS. */
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_GENERAL_HEAP_YUV_CSC_OFFSET_BYTES 128U
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_GENERAL_HEAP_YUV_CSC_MAX_SIZE_BYTES 1024U
+
+#define ROGUE_FWIF_HEAP_FIXED_OFFSET_GENERAL_HEAP_TOTAL_BYTES 4096U
+
+#endif /* PVR_ROGUE_FWIF_H */
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif_rf.h b/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif_rf.h
new file mode 100644 (file)
index 0000000..35da376
--- /dev/null
@@ -0,0 +1,45 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_ROGUE_FWIF_RF_H
+#define PVR_ROGUE_FWIF_RF_H
+
+#include <stdint.h>
+
+#include "pvr_rogue_fwif_shared.h"
+
+struct rogue_fwif_rf_regs {
+   uint64_t cdm_ctrl_stream_base;
+};
+
+/* Enables the reset framework in the firmware. */
+#define ROGUE_FWIF_RF_FLAG_ENABLE 0x00000001U
+
+struct rogue_fwif_rf_cmd {
+   uint32_t flags;
+
+   /* THIS MUST BE THE LAST MEMBER OF THE CONTAINING STRUCTURE */
+   struct rogue_fwif_rf_regs ALIGN(8) regs;
+};
+
+#endif /* PVR_ROGUE_FWIF_RF_H */
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif_shared.h b/src/imagination/vulkan/winsys/pvrsrvkm/fw-api/pvr_rogue_fwif_shared.h
new file mode 100644 (file)
index 0000000..1a2c771
--- /dev/null
@@ -0,0 +1,252 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_ROGUE_FWIF_SHARED_H
+#define PVR_ROGUE_FWIF_SHARED_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#define ALIGN(x) __attribute__((aligned(x)))
+
+/**
+ * Maximum number of UFOs in a CCB command.
+ * The number is based on having 32 sync prims (as originally), plus 32 sync
+ * checkpoints.
+ * Once the use of sync prims is no longer supported, we will retain
+ * the same total (64) as the number of sync checkpoints which may be
+ * supporting a fence is not visible to the client driver and has to
+ * allow for the number of different timelines involved in fence merges.
+ */
+#define ROGUE_FWIF_CCB_CMD_MAX_UFOS (32U + 32U)
+
+/**
+ * This is a generic limit imposed on any DM (TA,3D,CDM,TDM,2D,TRANSFER)
+ * command passed through the bridge.
+ * Just across the bridge in the server, any incoming kick command size is
+ * checked against this maximum limit.
+ * In case the incoming command size is larger than the specified limit,
+ * the bridge call is retired with error.
+ */
+#define ROGUE_FWIF_DM_INDEPENDENT_KICK_CMD_SIZE (1024U)
+
+#define ROGUE_FWIF_PRBUFFER_START (0)
+#define ROGUE_FWIF_PRBUFFER_ZSBUFFER (0)
+#define ROGUE_FWIF_PRBUFFER_MSAABUFFER (1)
+#define ROGUE_FWIF_PRBUFFER_MAXSUPPORTED (2)
+
+struct rogue_fwif_dev_addr {
+   uint32_t addr;
+};
+
+struct rogue_fwif_dma_addr {
+   uint64_t ALIGN(8) dev_vaddr;
+   struct rogue_fwif_dev_addr fw_addr;
+} ALIGN(8);
+
+struct rogue_fwif_ufo {
+   struct rogue_fwif_dev_addr ufo_addr;
+   uint32_t value;
+};
+
+struct rogue_fwif_cleanup_ctl {
+   /** Number of commands received by the FW. */
+   uint32_t submitted_cmds;
+
+   /** Number of commands executed by the FW. */
+   uint32_t executed_cmds;
+} ALIGN(8);
+
+/**
+ * Used to share frame numbers across UM-KM-FW,
+ * frame number is set in UM,
+ * frame number is required in both KM for HTB and FW for FW trace.
+ *
+ * May be used to house Kick flags in the future.
+ */
+struct rogue_fwif_cmd_common {
+   /** Associated frame number. */
+   uint32_t frame_num;
+};
+
+/**
+ * TA and 3D commands require set of firmware addresses that are stored in the
+ * Kernel. Client has handle(s) to Kernel containers storing these addresses,
+ * instead of raw addresses. We have to patch/write these addresses in KM to
+ * prevent UM from controlling FW addresses directly.
+ * Structures for TA and 3D commands are shared between Client and Firmware
+ * (both single-BVNC). Kernel is implemented in a multi-BVNC manner, so it can't
+ * use TA|3D CMD type definitions directly. Therefore we have a SHARED block
+ * that is shared between UM-KM-FW across all BVNC configurations.
+ */
+struct rogue_fwif_cmd_ta_3d_shared {
+   /** Common command attributes. */
+   struct rogue_fwif_cmd_common cmn;
+
+   /**
+    * RTData associated with this command, this is used for context
+    * selection and for storing out HW-context, when TA is switched out for
+    * continuing later.
+    */
+   struct rogue_fwif_dev_addr hw_rt_data;
+
+   /** Supported PR Buffers like Z/S/MSAA Scratch. */
+   struct rogue_fwif_dev_addr pr_buffers[ROGUE_FWIF_PRBUFFER_MAXSUPPORTED];
+};
+
+/**
+ * Client Circular Command Buffer (CCCB) control structure.
+ * This is shared between the KM driver and the Firmware and holds byte offsets
+ * into the CCCB as well as the wrapping mask to aid wrap around. A given
+ * snapshot of this queue with Cmd 1 running on the GPU might be:
+ *
+ *          Roff                           Doff                 Woff
+ * [..........|-1----------|=2===|=3===|=4===|~5~~~~|~6~~~~|~7~~~~|..........]
+ *            <      runnable commands       ><   !ready to run   >
+ *
+ * Cmd 1    : Currently executing on the GPU data master.
+ * Cmd 2,3,4: Fence dependencies met, commands runnable.
+ * Cmd 5... : Fence dependency not met yet.
+ */
+struct rogue_fwif_cccb_ctl {
+   /** Host write offset into CCB. This must be aligned to 16 bytes. */
+   uint32_t write_offset;
+
+   /**
+    * Firmware read offset into CCB. Points to the command that is runnable
+    * on GPU, if R!=W.
+    */
+   uint32_t read_offset;
+
+   /**
+    * Firmware fence dependency offset. Points to commands not ready, i.e.
+    * fence dependencies are not met.
+    */
+   uint32_t dep_offset;
+
+   /** Offset wrapping mask, total capacity in bytes of the CCB-1. */
+   uint32_t wrap_mask;
+} ALIGN(8);
+
+#define ROGUE_FW_LOCAL_FREELIST 0U
+#define ROGUE_FW_GLOBAL_FREELIST 1U
+#define ROGUE_FW_FREELIST_TYPE_LAST ROGUE_FW_GLOBAL_FREELIST
+#define ROGUE_FW_MAX_FREELISTS (ROGUE_FW_FREELIST_TYPE_LAST + 1U)
+
+struct rogue_fwif_ta_regs_cswitch {
+   uint64_t vdm_context_state_base_addr;
+   uint64_t vdm_context_state_resume_addr;
+   uint64_t ta_context_state_base_addr;
+
+   struct {
+      uint64_t vdm_context_store_task0;
+      uint64_t vdm_context_store_task1;
+      uint64_t vdm_context_store_task2;
+
+      /* VDM resume state update controls. */
+      uint64_t vdm_context_resume_task0;
+      uint64_t vdm_context_resume_task1;
+      uint64_t vdm_context_resume_task2;
+
+      uint64_t vdm_context_store_task3;
+      uint64_t vdm_context_store_task4;
+
+      uint64_t vdm_context_resume_task3;
+      uint64_t vdm_context_resume_task4;
+   } ta_state[2];
+};
+
+#define ROGUE_FWIF_TAREGISTERS_CSWITCH_SIZE \
+   sizeof(struct rogue_fwif_taregisters_cswitch)
+
+struct rogue_fwif_cdm_regs_cswitch {
+   uint64_t cdm_context_state_base_addr;
+   uint64_t cdm_context_pds0;
+   uint64_t cdm_context_pds1;
+   uint64_t cdm_terminate_pds;
+   uint64_t cdm_terminate_pds1;
+
+   /* CDM resume controls. */
+   uint64_t cdm_resume_pds0;
+   uint64_t cdm_context_pds0_b;
+   uint64_t cdm_resume_pds0_b;
+};
+
+struct rogue_fwif_static_rendercontext_state {
+   /** Geom registers for ctx switch. */
+   struct rogue_fwif_ta_regs_cswitch ALIGN(8) ctx_switch_regs;
+};
+
+#define ROGUE_FWIF_STATIC_RENDERCONTEXT_SIZE \
+   sizeof(struct rogue_fwif_static_rendercontext_state)
+
+struct rogue_fwif_static_computecontext_state {
+   /** CDM registers for ctx switch. */
+   struct rogue_fwif_cdm_regs_cswitch ALIGN(8) ctx_switch_regs;
+};
+
+#define ROGUE_FWIF_STATIC_COMPUTECONTEXT_SIZE \
+   sizeof(struct rogue_fwif_static_computecontext_state)
+
+enum rogue_fwif_prbuffer_state {
+   ROGUE_FWIF_PRBUFFER_UNBACKED = 0,
+   ROGUE_FWIF_PRBUFFER_BACKED,
+   ROGUE_FWIF_PRBUFFER_BACKING_PENDING,
+   ROGUE_FWIF_PRBUFFER_UNBACKING_PENDING,
+};
+
+struct rogue_fwif_prbuffer {
+   /** Buffer ID. */
+   uint32_t buffer_id;
+   /** Needs On-demand Z/S/MSAA Buffer allocation. */
+   bool ALIGN(4) on_demand;
+   /** Z/S/MSAA -Buffer state. */
+   enum rogue_fwif_prbuffer_state state;
+   /** Cleanup state. */
+   struct rogue_fwif_cleanup_ctl cleanup_state;
+   /** Compatibility and other flags. */
+   uint32_t pr_buffer_flags;
+} ALIGN(8);
+
+/* Last reset reason for a context. */
+enum rogue_context_reset_reason {
+   /** No reset reason recorded. */
+   ROGUE_CONTEXT_RESET_REASON_NONE = 0,
+   /** Caused a reset due to locking up. */
+   ROGUE_CONTEXT_RESET_REASON_GUILTY_LOCKUP = 1,
+   /** Affected by another context locking up. */
+   ROGUE_CONTEXT_RESET_REASON_INNOCENT_LOCKUP = 2,
+   /** Overran the global deadline. */
+   ROGUE_CONTEXT_RESET_REASON_GUILTY_OVERRUNING = 3,
+   /** Affected by another context overrunning. */
+   ROGUE_CONTEXT_RESET_REASON_INNOCENT_OVERRUNING = 4,
+   /** Forced reset to ensure scheduling requirements. */
+   ROGUE_CONTEXT_RESET_REASON_HARD_CONTEXT_SWITCH = 5,
+};
+
+struct rogue_context_reset_reason_data {
+   enum rogue_context_reset_reason reset_reason;
+   uint32_t reset_ext_job_ref;
+};
+
+#endif /* PVR_ROGUE_FWIF_SHARED_H */
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.c
new file mode 100644 (file)
index 0000000..3d12ae3
--- /dev/null
@@ -0,0 +1,521 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <xf86drm.h>
+
+#include "pvr_csb.h"
+#include "pvr_device_info.h"
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_srv_bo.h"
+#include "pvr_srv_bridge.h"
+#include "pvr_srv_job_compute.h"
+#include "pvr_srv_job_render.h"
+#include "pvr_srv_public.h"
+#include "pvr_srv_syncobj.h"
+#include "pvr_winsys.h"
+#include "pvr_winsys_helper.h"
+#include "util/log.h"
+#include "util/os_misc.h"
+#include "vk_log.h"
+
+/* Amount of space used to hold sync prim values (in bytes). */
+#define PVR_SRV_SYNC_PRIM_VALUE_SIZE 4U
+
+static VkResult pvr_srv_heap_init(
+   struct pvr_srv_winsys *srv_ws,
+   struct pvr_srv_winsys_heap *srv_heap,
+   uint32_t heap_idx,
+   const struct pvr_winsys_static_data_offsets *const static_data_offsets)
+{
+   pvr_dev_addr_t base_address;
+   uint32_t log2_page_size;
+   uint64_t reserved_size;
+   VkResult result;
+   uint64_t size;
+
+   result = pvr_srv_get_heap_details(srv_ws->render_fd,
+                                     heap_idx,
+                                     0,
+                                     NULL,
+                                     &base_address,
+                                     &size,
+                                     &reserved_size,
+                                     &log2_page_size);
+   if (result != VK_SUCCESS)
+      return result;
+
+   result = pvr_winsys_helper_winsys_heap_init(&srv_ws->base,
+                                               base_address,
+                                               size,
+                                               base_address,
+                                               reserved_size,
+                                               log2_page_size,
+                                               static_data_offsets,
+                                               &srv_heap->base);
+   if (result != VK_SUCCESS)
+      return result;
+
+   assert(srv_heap->base.page_size == srv_ws->base.page_size);
+   assert(srv_heap->base.log2_page_size == srv_ws->base.log2_page_size);
+   assert(srv_heap->base.reserved_size % PVR_SRV_RESERVED_SIZE_GRANULARITY ==
+          0);
+
+   /* Create server-side counterpart of Device Memory heap */
+   result = pvr_srv_int_heap_create(srv_ws->render_fd,
+                                    srv_heap->base.base_addr,
+                                    srv_heap->base.size,
+                                    srv_heap->base.log2_page_size,
+                                    srv_ws->server_memctx,
+                                    &srv_heap->server_heap);
+   if (result != VK_SUCCESS) {
+      pvr_winsys_helper_winsys_heap_finish(&srv_heap->base);
+      return result;
+   }
+
+   return VK_SUCCESS;
+}
+
+static bool pvr_srv_heap_finish(struct pvr_srv_winsys *srv_ws,
+                                struct pvr_srv_winsys_heap *srv_heap)
+{
+   if (!pvr_winsys_helper_winsys_heap_finish(&srv_heap->base))
+      return false;
+
+   pvr_srv_int_heap_destroy(srv_ws->render_fd, srv_heap->server_heap);
+
+   return true;
+}
+
+static VkResult pvr_srv_memctx_init(struct pvr_srv_winsys *srv_ws)
+{
+   const struct pvr_winsys_static_data_offsets
+      general_heap_static_data_offsets = {
+         .yuv_csc = FWIF_GENERAL_HEAP_YUV_CSC_OFFSET_BYTES,
+      };
+   const struct pvr_winsys_static_data_offsets pds_heap_static_data_offsets = {
+      .eot = FWIF_PDS_HEAP_EOT_OFFSET_BYTES,
+      .vdm_sync = FWIF_PDS_HEAP_VDM_SYNC_OFFSET_BYTES,
+   };
+   const struct pvr_winsys_static_data_offsets usc_heap_static_data_offsets = {
+      .vdm_sync = FWIF_USC_HEAP_VDM_SYNC_OFFSET_BYTES,
+   };
+   const struct pvr_winsys_static_data_offsets
+      rgn_hdr_heap_static_data_offsets = { 0 };
+
+   char heap_name[PVR_SRV_DEVMEM_HEAPNAME_MAXLENGTH];
+   int general_heap_idx = -1;
+   int rgn_hdr_heap_idx = -1;
+   int pds_heap_idx = -1;
+   int usc_heap_idx = -1;
+   uint32_t heap_count;
+   VkResult result;
+
+   result = pvr_srv_int_ctx_create(srv_ws->render_fd,
+                                   &srv_ws->server_memctx,
+                                   &srv_ws->server_memctx_data);
+   if (result != VK_SUCCESS)
+      return result;
+
+   os_get_page_size(&srv_ws->base.page_size);
+   srv_ws->base.log2_page_size = util_logbase2(srv_ws->base.page_size);
+
+   result = pvr_srv_get_heap_count(srv_ws->render_fd, &heap_count);
+   if (result != VK_SUCCESS)
+      goto err_pvr_srv_int_ctx_destroy;
+
+   assert(heap_count > 0);
+
+   for (uint32_t i = 0; i < heap_count; i++) {
+      result = pvr_srv_get_heap_details(srv_ws->render_fd,
+                                        i,
+                                        sizeof(heap_name),
+                                        heap_name,
+                                        NULL,
+                                        NULL,
+                                        NULL,
+                                        NULL);
+      if (result != VK_SUCCESS)
+         goto err_pvr_srv_int_ctx_destroy;
+
+      if (general_heap_idx == -1 &&
+          strncmp(heap_name,
+                  PVR_SRV_GENERAL_HEAP_IDENT,
+                  sizeof(PVR_SRV_GENERAL_HEAP_IDENT)) == 0) {
+         general_heap_idx = i;
+      } else if (pds_heap_idx == -1 &&
+                 strncmp(heap_name,
+                         PVR_SRV_PDSCODEDATA_HEAP_IDENT,
+                         sizeof(PVR_SRV_PDSCODEDATA_HEAP_IDENT)) == 0) {
+         pds_heap_idx = i;
+      } else if (rgn_hdr_heap_idx == -1 &&
+                 strncmp(heap_name,
+                         PVR_SRV_RGNHDR_BRN_63142_HEAP_IDENT,
+                         sizeof(PVR_SRV_RGNHDR_BRN_63142_HEAP_IDENT)) == 0) {
+         rgn_hdr_heap_idx = i;
+      } else if (usc_heap_idx == -1 &&
+                 strncmp(heap_name,
+                         PVR_SRV_USCCODE_HEAP_IDENT,
+                         sizeof(PVR_SRV_USCCODE_HEAP_IDENT)) == 0) {
+         usc_heap_idx = i;
+      }
+   }
+
+   /* Check for and initialize required heaps. */
+   if (general_heap_idx == -1 || pds_heap_idx == -1 || usc_heap_idx == -1) {
+      result = vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED);
+      goto err_pvr_srv_int_ctx_destroy;
+   }
+
+   result = pvr_srv_heap_init(srv_ws,
+                              &srv_ws->general_heap,
+                              general_heap_idx,
+                              &general_heap_static_data_offsets);
+   if (result != VK_SUCCESS)
+      goto err_pvr_srv_int_ctx_destroy;
+
+   result = pvr_srv_heap_init(srv_ws,
+                              &srv_ws->pds_heap,
+                              pds_heap_idx,
+                              &pds_heap_static_data_offsets);
+   if (result != VK_SUCCESS)
+      goto err_pvr_srv_heap_finish_general;
+
+   result = pvr_srv_heap_init(srv_ws,
+                              &srv_ws->usc_heap,
+                              usc_heap_idx,
+                              &usc_heap_static_data_offsets);
+   if (result != VK_SUCCESS)
+      goto err_pvr_srv_heap_finish_pds;
+
+   /* Check for and set up optional heaps. */
+   if (rgn_hdr_heap_idx != -1) {
+      result = pvr_srv_heap_init(srv_ws,
+                                 &srv_ws->rgn_hdr_heap,
+                                 rgn_hdr_heap_idx,
+                                 &rgn_hdr_heap_static_data_offsets);
+      if (result != VK_SUCCESS)
+         goto err_pvr_srv_heap_finish_usc;
+      srv_ws->rgn_hdr_heap_present = true;
+   } else {
+      srv_ws->rgn_hdr_heap_present = false;
+   }
+
+   result =
+      pvr_winsys_helper_allocate_static_memory(&srv_ws->base,
+                                               pvr_srv_heap_alloc_reserved,
+                                               &srv_ws->general_heap.base,
+                                               &srv_ws->pds_heap.base,
+                                               &srv_ws->usc_heap.base,
+                                               &srv_ws->general_vma,
+                                               &srv_ws->pds_vma,
+                                               &srv_ws->usc_vma);
+   if (result != VK_SUCCESS)
+      goto err_pvr_srv_heap_finish_rgn_hdr;
+
+   result = pvr_winsys_helper_fill_static_memory(&srv_ws->base,
+                                                 srv_ws->general_vma,
+                                                 srv_ws->pds_vma,
+                                                 srv_ws->usc_vma);
+   if (result != VK_SUCCESS)
+      goto err_pvr_srv_free_static_memory;
+
+   return VK_SUCCESS;
+
+err_pvr_srv_free_static_memory:
+   pvr_winsys_helper_free_static_memory(srv_ws->general_vma,
+                                        srv_ws->pds_vma,
+                                        srv_ws->usc_vma);
+
+err_pvr_srv_heap_finish_rgn_hdr:
+   if (srv_ws->rgn_hdr_heap_present)
+      pvr_srv_heap_finish(srv_ws, &srv_ws->rgn_hdr_heap);
+
+err_pvr_srv_heap_finish_usc:
+   pvr_srv_heap_finish(srv_ws, &srv_ws->usc_heap);
+
+err_pvr_srv_heap_finish_pds:
+   pvr_srv_heap_finish(srv_ws, &srv_ws->pds_heap);
+
+err_pvr_srv_heap_finish_general:
+   pvr_srv_heap_finish(srv_ws, &srv_ws->general_heap);
+
+err_pvr_srv_int_ctx_destroy:
+   pvr_srv_int_ctx_destroy(srv_ws->render_fd, srv_ws->server_memctx);
+
+   return result;
+}
+
+static void pvr_srv_memctx_finish(struct pvr_srv_winsys *srv_ws)
+{
+   pvr_winsys_helper_free_static_memory(srv_ws->general_vma,
+                                        srv_ws->pds_vma,
+                                        srv_ws->usc_vma);
+
+   if (srv_ws->rgn_hdr_heap_present) {
+      if (!pvr_srv_heap_finish(srv_ws, &srv_ws->rgn_hdr_heap)) {
+         vk_errorf(NULL,
+                   VK_ERROR_UNKNOWN,
+                   "Region header heap in use, can not deinit");
+      }
+   }
+
+   if (!pvr_srv_heap_finish(srv_ws, &srv_ws->usc_heap))
+      vk_errorf(NULL, VK_ERROR_UNKNOWN, "USC heap in use, can not deinit");
+
+   if (!pvr_srv_heap_finish(srv_ws, &srv_ws->pds_heap))
+      vk_errorf(NULL, VK_ERROR_UNKNOWN, "PDS heap in use, can not deinit");
+
+   if (!pvr_srv_heap_finish(srv_ws, &srv_ws->general_heap)) {
+      vk_errorf(NULL, VK_ERROR_UNKNOWN, "General heap in use, can not deinit");
+   }
+
+   pvr_srv_int_ctx_destroy(srv_ws->render_fd, srv_ws->server_memctx);
+}
+
+static VkResult pvr_srv_sync_prim_block_init(struct pvr_srv_winsys *srv_ws)
+{
+   /* We don't currently make use of this value, but we're required to provide
+    * a valid pointer to pvr_srv_alloc_sync_primitive_block.
+    */
+   void *sync_block_pmr;
+
+   return pvr_srv_alloc_sync_primitive_block(srv_ws->render_fd,
+                                             &srv_ws->sync_block_handle,
+                                             &sync_block_pmr,
+                                             &srv_ws->sync_block_size,
+                                             &srv_ws->sync_block_fw_addr);
+}
+
+static void pvr_srv_sync_prim_block_finish(struct pvr_srv_winsys *srv_ws)
+{
+   pvr_srv_free_sync_primitive_block(srv_ws->render_fd,
+                                     srv_ws->sync_block_handle);
+   srv_ws->sync_block_handle = NULL;
+}
+
+static void pvr_srv_winsys_destroy(struct pvr_winsys *ws)
+{
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+   int fd = srv_ws->render_fd;
+
+   pvr_srv_sync_prim_block_finish(srv_ws);
+   pvr_srv_memctx_finish(srv_ws);
+   vk_free(srv_ws->alloc, srv_ws);
+   pvr_srv_connection_destroy(fd);
+}
+
+static int pvr_srv_winsys_device_info_init(struct pvr_winsys *ws,
+                                           struct pvr_device_info *dev_info)
+{
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+   int ret;
+
+   ret = pvr_device_info_init(dev_info, srv_ws->bvnc);
+   if (ret) {
+      mesa_logw("Unsupported BVNC: %u.%u.%u.%u\n",
+                PVR_BVNC_UNPACK_B(srv_ws->bvnc),
+                PVR_BVNC_UNPACK_V(srv_ws->bvnc),
+                PVR_BVNC_UNPACK_N(srv_ws->bvnc),
+                PVR_BVNC_UNPACK_C(srv_ws->bvnc));
+      return ret;
+   }
+
+   return 0;
+}
+
+static void pvr_srv_winsys_get_heaps_info(struct pvr_winsys *ws,
+                                          struct pvr_winsys_heaps *heaps)
+{
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+
+   heaps->general_heap = &srv_ws->general_heap.base;
+   heaps->pds_heap = &srv_ws->pds_heap.base;
+   heaps->usc_heap = &srv_ws->usc_heap.base;
+
+   if (srv_ws->rgn_hdr_heap_present)
+      heaps->rgn_hdr_heap = &srv_ws->rgn_hdr_heap.base;
+   else
+      heaps->rgn_hdr_heap = &srv_ws->general_heap.base;
+}
+
+static const struct pvr_winsys_ops srv_winsys_ops = {
+   .destroy = pvr_srv_winsys_destroy,
+   .device_info_init = pvr_srv_winsys_device_info_init,
+   .get_heaps_info = pvr_srv_winsys_get_heaps_info,
+   .buffer_create = pvr_srv_winsys_buffer_create,
+   .buffer_create_from_fd = pvr_srv_winsys_buffer_create_from_fd,
+   .buffer_destroy = pvr_srv_winsys_buffer_destroy,
+   .buffer_get_fd = pvr_srv_winsys_buffer_get_fd,
+   .buffer_map = pvr_srv_winsys_buffer_map,
+   .buffer_unmap = pvr_srv_winsys_buffer_unmap,
+   .heap_alloc = pvr_srv_winsys_heap_alloc,
+   .heap_free = pvr_srv_winsys_heap_free,
+   .vma_map = pvr_srv_winsys_vma_map,
+   .vma_unmap = pvr_srv_winsys_vma_unmap,
+   .syncobj_create = pvr_srv_winsys_syncobj_create,
+   .syncobj_destroy = pvr_srv_winsys_syncobj_destroy,
+   .syncobjs_reset = pvr_srv_winsys_syncobjs_reset,
+   .syncobjs_signal = pvr_srv_winsys_syncobjs_signal,
+   .syncobjs_wait = pvr_srv_winsys_syncobjs_wait,
+   .syncobjs_merge = pvr_srv_winsys_syncobjs_merge,
+   .free_list_create = pvr_srv_winsys_free_list_create,
+   .free_list_destroy = pvr_srv_winsys_free_list_destroy,
+   .render_target_dataset_create = pvr_srv_render_target_dataset_create,
+   .render_target_dataset_destroy = pvr_srv_render_target_dataset_destroy,
+   .render_ctx_create = pvr_srv_winsys_render_ctx_create,
+   .render_ctx_destroy = pvr_srv_winsys_render_ctx_destroy,
+   .render_submit = pvr_srv_winsys_render_submit,
+   .compute_ctx_create = pvr_srv_winsys_compute_ctx_create,
+   .compute_ctx_destroy = pvr_srv_winsys_compute_ctx_destroy,
+   .compute_submit = pvr_srv_winsys_compute_submit,
+};
+
+static bool pvr_is_driver_compatible(int render_fd)
+{
+   drmVersionPtr version;
+
+   version = drmGetVersion(render_fd);
+   if (!version)
+      return false;
+
+   assert(strcmp(version->name, "pvr") == 0);
+
+   /* Only the 1.14 driver is supported for now. */
+   if (version->version_major != PVR_SRV_VERSION_MAJ ||
+       version->version_minor != PVR_SRV_VERSION_MIN) {
+      vk_errorf(NULL,
+                VK_ERROR_INCOMPATIBLE_DRIVER,
+                "Unsupported downstream driver version (%u.%u)",
+                version->version_major,
+                version->version_minor);
+      drmFreeVersion(version);
+
+      return false;
+   }
+
+   drmFreeVersion(version);
+
+   return true;
+}
+
+struct pvr_winsys *pvr_srv_winsys_create(int master_fd,
+                                         int render_fd,
+                                         const VkAllocationCallbacks *alloc)
+{
+   struct pvr_srv_winsys *srv_ws;
+   VkResult result;
+   uint64_t bvnc;
+
+   if (!pvr_is_driver_compatible(render_fd))
+      return NULL;
+
+   result = pvr_srv_connection_create(render_fd, &bvnc);
+   if (result != VK_SUCCESS)
+      return NULL;
+
+   srv_ws =
+      vk_zalloc(alloc, sizeof(*srv_ws), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!srv_ws) {
+      vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+      goto err_pvr_srv_connection_destroy;
+   }
+
+   srv_ws->base.ops = &srv_winsys_ops;
+   srv_ws->bvnc = bvnc;
+   srv_ws->master_fd = master_fd;
+   srv_ws->render_fd = render_fd;
+   srv_ws->alloc = alloc;
+
+   result = pvr_srv_memctx_init(srv_ws);
+   if (result != VK_SUCCESS)
+      goto err_vk_free_srv_ws;
+
+   result = pvr_srv_sync_prim_block_init(srv_ws);
+   if (result != VK_SUCCESS)
+      goto err_pvr_srv_memctx_finish;
+
+   return &srv_ws->base;
+
+err_pvr_srv_memctx_finish:
+   pvr_srv_memctx_finish(srv_ws);
+
+err_vk_free_srv_ws:
+   vk_free(alloc, srv_ws);
+
+err_pvr_srv_connection_destroy:
+   pvr_srv_connection_destroy(render_fd);
+
+   return NULL;
+}
+
+struct pvr_srv_sync_prim *pvr_srv_sync_prim_alloc(struct pvr_srv_winsys *srv_ws)
+{
+   struct pvr_srv_sync_prim *sync_prim;
+
+   if (p_atomic_read(&srv_ws->sync_block_offset) == srv_ws->sync_block_size) {
+      vk_error(NULL, VK_ERROR_UNKNOWN);
+      return NULL;
+   }
+
+   sync_prim = vk_alloc(srv_ws->alloc,
+                        sizeof(*sync_prim),
+                        8,
+                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!sync_prim) {
+      vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+      return NULL;
+   }
+
+   /* p_atomic_add_return() returns the new value rather than the old one, so
+    * we have to subtract PVR_SRV_SYNC_PRIM_VALUE_SIZE to get the old value.
+    */
+   sync_prim->offset = p_atomic_add_return(&srv_ws->sync_block_offset,
+                                           PVR_SRV_SYNC_PRIM_VALUE_SIZE);
+   sync_prim->offset -= PVR_SRV_SYNC_PRIM_VALUE_SIZE;
+   if (sync_prim->offset == srv_ws->sync_block_size) {
+      /* FIXME: need to free offset back to srv_ws->sync_block_offset. */
+      vk_free(srv_ws->alloc, sync_prim);
+
+      vk_error(NULL, VK_ERROR_UNKNOWN);
+
+      return NULL;
+   }
+
+   sync_prim->srv_ws = srv_ws;
+
+   return sync_prim;
+}
+
+/* FIXME: Add support for freeing offsets back to the sync block. */
+void pvr_srv_sync_prim_free(struct pvr_srv_sync_prim *sync_prim)
+{
+   if (sync_prim) {
+      struct pvr_srv_winsys *srv_ws = sync_prim->srv_ws;
+
+      vk_free(srv_ws->alloc, sync_prim);
+   }
+}
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv.h
new file mode 100644 (file)
index 0000000..6dd9dcc
--- /dev/null
@@ -0,0 +1,129 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_H
+#define PVR_SRV_H
+
+#include <stdint.h>
+#include <pthread.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_winsys.h"
+#include "util/macros.h"
+#include "util/vma.h"
+
+/*******************************************
+   Misc defines
+ *******************************************/
+
+/* 64KB is MAX anticipated OS page size */
+#define PVR_SRV_RESERVED_SIZE_GRANULARITY 0x10000
+
+#define PVR_SRV_DEVMEM_HEAPNAME_MAXLENGTH 160
+
+#define PVR_SRV_GENERAL_HEAP_IDENT "General"
+#define PVR_SRV_RGNHDR_BRN_63142_HEAP_IDENT "RgnHdr BRN63142"
+#define PVR_SRV_PDSCODEDATA_HEAP_IDENT "PDS Code and Data"
+#define PVR_SRV_USCCODE_HEAP_IDENT "USC Code"
+
+#define FWIF_PDS_HEAP_TOTAL_BYTES 4096
+#define FWIF_PDS_HEAP_VDM_SYNC_OFFSET_BYTES 0
+#define FWIF_PDS_HEAP_EOT_OFFSET_BYTES 128
+#define FWIF_GENERAL_HEAP_TOTAL_BYTES 4096
+#define FWIF_USC_HEAP_TOTAL_BYTES 4096
+#define FWIF_USC_HEAP_VDM_SYNC_OFFSET_BYTES 0
+#define FWIF_GENERAL_HEAP_YUV_CSC_OFFSET_BYTES 128U
+
+/*******************************************
+    structure definitions
+ *******************************************/
+struct pvr_srv_winsys_heap {
+   struct pvr_winsys_heap base;
+
+   void *server_heap;
+};
+
+struct pvr_srv_winsys {
+   struct pvr_winsys base;
+
+   int master_fd;
+   int render_fd;
+
+   const VkAllocationCallbacks *alloc;
+
+   /* Packed bvnc */
+   uint64_t bvnc;
+
+   void *server_memctx;
+   void *server_memctx_data;
+
+   /* Required heaps */
+   struct pvr_srv_winsys_heap general_heap;
+   struct pvr_srv_winsys_heap pds_heap;
+   struct pvr_srv_winsys_heap usc_heap;
+
+   /* Optional heaps */
+   bool rgn_hdr_heap_present;
+   struct pvr_srv_winsys_heap rgn_hdr_heap;
+
+   /* vma's for reserved memory regions */
+   struct pvr_winsys_vma *pds_vma;
+   struct pvr_winsys_vma *usc_vma;
+   struct pvr_winsys_vma *general_vma;
+
+   /* Sync block used for allocating sync primitives. */
+   void *sync_block_handle;
+   uint32_t sync_block_size;
+   uint32_t sync_block_fw_addr;
+   uint16_t sync_block_offset;
+};
+
+struct pvr_srv_sync_prim {
+   struct pvr_srv_winsys *srv_ws;
+   uint32_t offset;
+   uint32_t value;
+};
+
+/*******************************************
+    helper macros
+ *******************************************/
+
+#define to_pvr_srv_winsys(ws) container_of((ws), struct pvr_srv_winsys, base)
+#define to_pvr_srv_winsys_heap(heap) \
+   container_of((heap), struct pvr_srv_winsys_heap, base)
+
+/*******************************************
+    functions
+ *******************************************/
+
+struct pvr_srv_sync_prim *
+pvr_srv_sync_prim_alloc(struct pvr_srv_winsys *srv_ws);
+void pvr_srv_sync_prim_free(struct pvr_srv_sync_prim *sync_prim);
+
+static inline uint32_t
+pvr_srv_sync_prim_get_fw_addr(const struct pvr_srv_sync_prim *const sync_prim)
+{
+   return sync_prim->srv_ws->sync_block_fw_addr + sync_prim->offset;
+}
+
+#endif /* PVR_SRV_H */
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bo.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bo.c
new file mode 100644 (file)
index 0000000..11c9170
--- /dev/null
@@ -0,0 +1,596 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <xf86drm.h>
+
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_srv_bo.h"
+#include "pvr_srv_bridge.h"
+#include "pvr_winsys_helper.h"
+#include "util/u_atomic.h"
+#include "util/bitscan.h"
+#include "util/macros.h"
+#include "util/u_math.h"
+#include "vk_log.h"
+
+/* Note: This function does not have an associated pvr_srv_free_display_pmr
+ * function, use pvr_srv_free_pmr instead.
+ */
+static VkResult pvr_srv_alloc_display_pmr(struct pvr_srv_winsys *srv_ws,
+                                          uint64_t size,
+                                          uint64_t srv_flags,
+                                          void **const pmr_out,
+                                          uint32_t *const handle_out)
+{
+   uint64_t aligment_out;
+   uint64_t size_out;
+   VkResult result;
+   uint32_t handle;
+   int ret;
+   int fd;
+
+   ret =
+      pvr_winsys_helper_display_buffer_create(srv_ws->master_fd, size, &handle);
+   if (ret)
+      return vk_error(NULL, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+   ret = drmPrimeHandleToFD(srv_ws->master_fd, handle, O_CLOEXEC, &fd);
+   if (ret) {
+      result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+      goto err_display_buffer_destroy;
+   }
+
+   result = pvr_srv_physmem_import_dmabuf(srv_ws->render_fd,
+                                          fd,
+                                          srv_flags,
+                                          pmr_out,
+                                          &size_out,
+                                          &aligment_out);
+
+   assert(size_out >= size);
+   assert(aligment_out == srv_ws->base.page_size);
+
+   /* close fd, not needed anymore */
+   close(fd);
+
+   if (result != VK_SUCCESS)
+      goto err_display_buffer_destroy;
+
+   *handle_out = handle;
+
+   return VK_SUCCESS;
+
+err_display_buffer_destroy:
+   pvr_winsys_helper_display_buffer_destroy(srv_ws->master_fd, handle);
+
+   return result;
+}
+
+static void buffer_acquire(struct pvr_srv_winsys_bo *srv_bo)
+{
+   p_atomic_inc(&srv_bo->ref_count);
+}
+
+static void buffer_release(struct pvr_srv_winsys_bo *srv_bo)
+{
+   struct pvr_srv_winsys *srv_ws;
+
+   /* If all references were dropped the pmr can be freed and unlocked */
+   if (p_atomic_dec_return(&srv_bo->ref_count) == 0) {
+      srv_ws = to_pvr_srv_winsys(srv_bo->base.ws);
+      pvr_srv_free_pmr(srv_ws->render_fd, srv_bo->pmr);
+
+      if (srv_bo->is_display_buffer) {
+         pvr_winsys_helper_display_buffer_destroy(srv_ws->master_fd,
+                                                  srv_bo->handle);
+      }
+
+      vk_free(srv_ws->alloc, srv_bo);
+   }
+}
+
+static uint64_t pvr_srv_get_alloc_flags(uint32_t ws_flags)
+{
+   /* TODO: For now we assume that buffers should always be accessible to the
+    * kernel and that the PVR_WINSYS_BO_FLAG_CPU_ACCESS flag only applies to
+    * userspace mappings. Check to see if there's any situations where we
+    * wouldn't want this to be the case.
+    */
+   uint64_t srv_flags = PVR_SRV_MEMALLOCFLAG_GPU_READABLE |
+                        PVR_SRV_MEMALLOCFLAG_GPU_WRITEABLE |
+                        PVR_SRV_MEMALLOCFLAG_KERNEL_CPU_MAPPABLE |
+                        PVR_SRV_MEMALLOCFLAG_CPU_UNCACHED_WC;
+
+   if (ws_flags & PVR_WINSYS_BO_FLAG_CPU_ACCESS) {
+      srv_flags |= PVR_SRV_MEMALLOCFLAG_CPU_READABLE |
+                   PVR_SRV_MEMALLOCFLAG_CPU_WRITEABLE;
+   }
+
+   if (ws_flags & PVR_WINSYS_BO_FLAG_GPU_UNCACHED)
+      srv_flags |= PVR_SRV_MEMALLOCFLAG_GPU_UNCACHED;
+   else
+      srv_flags |= PVR_SRV_MEMALLOCFLAG_GPU_CACHE_INCOHERENT;
+
+   if (ws_flags & PVR_WINSYS_BO_FLAG_PM_FW_PROTECT)
+      srv_flags |= PVR_SRV_MEMALLOCFLAG_DEVICE_FLAG(PM_FW_PROTECT);
+
+   if (ws_flags & PVR_WINSYS_BO_FLAG_ZERO_ON_ALLOC)
+      srv_flags |= PVR_SRV_MEMALLOCFLAG_ZERO_ON_ALLOC;
+
+   return srv_flags;
+}
+
+VkResult pvr_srv_winsys_buffer_create(struct pvr_winsys *ws,
+                                      uint64_t size,
+                                      uint64_t alignment,
+                                      enum pvr_winsys_bo_type type,
+                                      uint32_t ws_flags,
+                                      struct pvr_winsys_bo **const bo_out)
+{
+   const uint64_t srv_flags = pvr_srv_get_alloc_flags(ws_flags);
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+   struct pvr_srv_winsys_bo *srv_bo;
+   VkResult result;
+
+   assert(util_is_power_of_two_nonzero(alignment));
+
+   /* Kernel will page align the size, we do the same here so we have access to
+    * all the allocated memory.
+    */
+   alignment = MAX2(alignment, ws->page_size);
+   size = ALIGN_POT(size, alignment);
+
+   srv_bo = vk_zalloc(srv_ws->alloc,
+                      sizeof(*srv_bo),
+                      8,
+                      VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!srv_bo)
+      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   srv_bo->is_display_buffer = (type == PVR_WINSYS_BO_TYPE_DISPLAY);
+   if (srv_bo->is_display_buffer) {
+      result = pvr_srv_alloc_display_pmr(srv_ws,
+                                         size,
+                                         srv_flags &
+                                            PVR_SRV_MEMALLOCFLAGS_PMRFLAGSMASK,
+                                         &srv_bo->pmr,
+                                         &srv_bo->handle);
+
+      srv_bo->base.is_imported = true;
+   } else {
+      result =
+         pvr_srv_alloc_pmr(srv_ws->render_fd,
+                           size,
+                           size,
+                           1,
+                           1,
+                           srv_ws->base.log2_page_size,
+                           (srv_flags & PVR_SRV_MEMALLOCFLAGS_PMRFLAGSMASK),
+                           getpid(),
+                           &srv_bo->pmr);
+   }
+
+   if (result != VK_SUCCESS)
+      goto err_vk_free_srv_bo;
+
+   srv_bo->base.size = size;
+   srv_bo->base.ws = ws;
+   srv_bo->flags = srv_flags;
+
+   p_atomic_set(&srv_bo->ref_count, 1);
+
+   *bo_out = &srv_bo->base;
+
+   return VK_SUCCESS;
+
+err_vk_free_srv_bo:
+   vk_free(srv_ws->alloc, srv_bo);
+
+   return result;
+}
+
+VkResult
+pvr_srv_winsys_buffer_create_from_fd(struct pvr_winsys *ws,
+                                     int fd,
+                                     struct pvr_winsys_bo **const bo_out)
+{
+   /* FIXME: PVR_SRV_MEMALLOCFLAG_CPU_UNCACHED_WC should be changed to
+    * PVR_SRV_MEMALLOCFLAG_CPU_CACHE_INCOHERENT, as dma-buf is always mapped
+    * as cacheable by the exporter. Flags are not passed to the exporter and it
+    * doesn't really change the behavior, but these can be used for internal
+    * checking so it should reflect the correct cachability of the buffer.
+    * Ref: pvr_GetMemoryFdPropertiesKHR
+    *      https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops
+    */
+   static const uint64_t srv_flags =
+      PVR_SRV_MEMALLOCFLAG_CPU_READABLE | PVR_SRV_MEMALLOCFLAG_CPU_WRITEABLE |
+      PVR_SRV_MEMALLOCFLAG_CPU_UNCACHED_WC | PVR_SRV_MEMALLOCFLAG_GPU_READABLE |
+      PVR_SRV_MEMALLOCFLAG_GPU_WRITEABLE |
+      PVR_SRV_MEMALLOCFLAG_GPU_CACHE_INCOHERENT;
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+   struct pvr_srv_winsys_bo *srv_bo;
+   uint64_t aligment_out;
+   uint64_t size_out;
+   VkResult result;
+
+   srv_bo = vk_zalloc(srv_ws->alloc,
+                      sizeof(*srv_bo),
+                      8,
+                      VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!srv_bo)
+      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   result = pvr_srv_physmem_import_dmabuf(srv_ws->render_fd,
+                                          fd,
+                                          srv_flags,
+                                          &srv_bo->pmr,
+                                          &size_out,
+                                          &aligment_out);
+   if (result != VK_SUCCESS)
+      goto err_vk_free_srv_bo;
+
+   assert(aligment_out == srv_ws->base.page_size);
+
+   srv_bo->base.ws = ws;
+   srv_bo->base.size = size_out;
+   srv_bo->base.is_imported = true;
+   srv_bo->flags = srv_flags;
+
+   p_atomic_set(&srv_bo->ref_count, 1);
+
+   *bo_out = &srv_bo->base;
+
+   return VK_SUCCESS;
+
+err_vk_free_srv_bo:
+   vk_free(srv_ws->alloc, srv_bo);
+
+   return result;
+}
+
+void pvr_srv_winsys_buffer_destroy(struct pvr_winsys_bo *bo)
+{
+   struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(bo);
+
+   buffer_release(srv_bo);
+}
+
+VkResult pvr_srv_winsys_buffer_get_fd(struct pvr_winsys_bo *bo,
+                                      int *const fd_out)
+{
+   struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(bo);
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(bo->ws);
+   int ret;
+
+   if (!srv_bo->is_display_buffer)
+      return pvr_srv_physmem_export_dmabuf(srv_ws->render_fd,
+                                           srv_bo->pmr,
+                                           fd_out);
+
+   /* For display buffers, export using saved buffer handle */
+   ret =
+      drmPrimeHandleToFD(srv_ws->master_fd, srv_bo->handle, O_CLOEXEC, fd_out);
+   if (ret)
+      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   return VK_SUCCESS;
+}
+
+void *pvr_srv_winsys_buffer_map(struct pvr_winsys_bo *bo)
+{
+   struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(bo);
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(bo->ws);
+   const int prot =
+      (srv_bo->flags & PVR_SRV_MEMALLOCFLAG_CPU_WRITEABLE ? PROT_WRITE : 0) |
+      (srv_bo->flags & PVR_SRV_MEMALLOCFLAG_CPU_READABLE ? PROT_READ : 0);
+
+   /* assert if memory is already mapped */
+   assert(!bo->map);
+
+   /* Map the full PMR to CPU space */
+   bo->map = mmap(NULL,
+                  bo->size,
+                  prot,
+                  MAP_SHARED,
+                  srv_ws->render_fd,
+                  (off_t)srv_bo->pmr << srv_ws->base.log2_page_size);
+   if (bo->map == MAP_FAILED) {
+      bo->map = NULL;
+      vk_error(NULL, VK_ERROR_MEMORY_MAP_FAILED);
+      return NULL;
+   }
+
+   VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map,
+                                bo->size,
+                                0,
+                                srv_bo->flags &
+                                   PVR_SRV_MEMALLOCFLAG_ZERO_ON_ALLOC));
+
+   buffer_acquire(srv_bo);
+
+   return bo->map;
+}
+
+void pvr_srv_winsys_buffer_unmap(struct pvr_winsys_bo *bo)
+{
+   struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(bo);
+
+   /* output error if trying to unmap memory that is not previously mapped */
+   assert(bo->map);
+
+   /* Unmap the whole PMR from CPU space */
+   if (munmap(bo->map, bo->size))
+      vk_error(NULL, VK_ERROR_UNKNOWN);
+
+   VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0));
+
+   bo->map = NULL;
+
+   buffer_release(srv_bo);
+}
+
+/* This function must be used to allocate inside reserved region and must be
+ * used internally only. This also means whoever is using it, must know what
+ * they are doing.
+ */
+struct pvr_winsys_vma *
+pvr_srv_heap_alloc_reserved(struct pvr_winsys_heap *heap,
+                            const pvr_dev_addr_t reserved_dev_addr,
+                            uint64_t size,
+                            uint64_t alignment)
+{
+   struct pvr_srv_winsys_heap *srv_heap = to_pvr_srv_winsys_heap(heap);
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(heap->ws);
+   struct pvr_srv_winsys_vma *srv_vma;
+   VkResult result;
+   uint64_t addr;
+
+   assert(util_is_power_of_two_nonzero(alignment));
+
+   /* pvr_srv_winsys_buffer_create() page aligns the size. We must do the same
+    * here to ensure enough heap space is allocated to be able to map the
+    * buffer to the GPU.
+    */
+   alignment = MAX2(alignment, heap->ws->page_size);
+   size = ALIGN_POT(size, alignment);
+
+   srv_vma = vk_alloc(srv_ws->alloc,
+                      sizeof(*srv_vma),
+                      8,
+                      VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!srv_vma) {
+      vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+      return NULL;
+   }
+
+   /* Just check address is correct and aligned, locking is not required as
+    * user is responsible to provide a distinct address.
+    */
+   if (reserved_dev_addr.addr < heap->base_addr.addr ||
+       reserved_dev_addr.addr + size >
+          heap->base_addr.addr + heap->reserved_size ||
+       reserved_dev_addr.addr & ((srv_ws->base.page_size) - 1))
+      goto err_vk_free_srv_vma;
+
+   addr = reserved_dev_addr.addr;
+
+   /* Reserve the virtual range in the MMU and create a mapping structure */
+   result = pvr_srv_int_reserve_addr(srv_ws->render_fd,
+                                     srv_heap->server_heap,
+                                     (pvr_dev_addr_t){ .addr = addr },
+                                     size,
+                                     &srv_vma->reservation);
+   if (result != VK_SUCCESS)
+      goto err_vk_free_srv_vma;
+
+   srv_vma->base.dev_addr.addr = addr;
+   srv_vma->base.bo = NULL;
+   srv_vma->base.heap = heap;
+   srv_vma->base.size = size;
+
+   p_atomic_inc(&srv_heap->base.ref_count);
+
+   return &srv_vma->base;
+
+err_vk_free_srv_vma:
+   vk_free(srv_ws->alloc, srv_vma);
+
+   return NULL;
+}
+
+struct pvr_winsys_vma *pvr_srv_winsys_heap_alloc(struct pvr_winsys_heap *heap,
+                                                 uint64_t size,
+                                                 uint64_t alignment)
+{
+   struct pvr_srv_winsys_heap *const srv_heap = to_pvr_srv_winsys_heap(heap);
+   struct pvr_srv_winsys *const srv_ws = to_pvr_srv_winsys(heap->ws);
+   struct pvr_srv_winsys_vma *srv_vma;
+   VkResult result;
+   bool ret;
+
+   srv_vma = vk_alloc(srv_ws->alloc,
+                      sizeof(*srv_vma),
+                      8,
+                      VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!srv_vma) {
+      vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+      return NULL;
+   }
+
+   ret = pvr_winsys_helper_heap_alloc(heap, size, alignment, &srv_vma->base);
+   if (!ret)
+      goto err_pvr_srv_free_vma;
+
+   /* Reserve the virtual range in the MMU and create a mapping structure. */
+   result = pvr_srv_int_reserve_addr(srv_ws->render_fd,
+                                     srv_heap->server_heap,
+                                     srv_vma->base.dev_addr,
+                                     srv_vma->base.size,
+                                     &srv_vma->reservation);
+   if (result != VK_SUCCESS)
+      goto err_pvr_srv_free_allocation;
+
+   return &srv_vma->base;
+
+err_pvr_srv_free_allocation:
+   pvr_winsys_helper_heap_free(&srv_vma->base);
+
+err_pvr_srv_free_vma:
+   vk_free(srv_ws->alloc, srv_vma);
+
+   return NULL;
+}
+
+void pvr_srv_winsys_heap_free(struct pvr_winsys_vma *vma)
+{
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(vma->heap->ws);
+   struct pvr_srv_winsys_vma *srv_vma = to_pvr_srv_winsys_vma(vma);
+
+   /* A vma with an existing device mapping should not be freed. */
+   assert(!srv_vma->base.bo);
+
+   /* Remove mapping handle and underlying reservation. */
+   pvr_srv_int_unreserve_addr(srv_ws->render_fd, srv_vma->reservation);
+
+   /* Check if we are dealing with reserved address range. */
+   if (vma->dev_addr.addr <
+       (vma->heap->base_addr.addr + vma->heap->reserved_size)) {
+      /* For the reserved addresses just decrement the reference count. */
+      p_atomic_dec(&vma->heap->ref_count);
+   } else {
+      /* Free allocated virtual space. */
+      pvr_winsys_helper_heap_free(vma);
+   }
+
+   vk_free(srv_ws->alloc, srv_vma);
+}
+
+/* * We assume the vma has been allocated with extra space to accommodate the
+ *   offset.
+ * * The offset passed in is unchanged and can be used to calculate the extra
+ *   size that needs to be mapped and final device virtual address.
+ */
+pvr_dev_addr_t pvr_srv_winsys_vma_map(struct pvr_winsys_vma *vma,
+                                      struct pvr_winsys_bo *bo,
+                                      uint64_t offset,
+                                      uint64_t size)
+{
+   struct pvr_srv_winsys_vma *srv_vma = to_pvr_srv_winsys_vma(vma);
+   struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(bo);
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(bo->ws);
+   const uint64_t srv_flags = srv_bo->flags &
+                              PVR_SRV_MEMALLOCFLAGS_VIRTUAL_MASK;
+   const uint32_t virt_offset = offset & (vma->heap->page_size - 1);
+   const uint64_t aligned_virt_size =
+      ALIGN_POT(virt_offset + size, vma->heap->page_size);
+   VkResult result;
+
+   /* Address should not be mapped already */
+   assert(!srv_vma->base.bo);
+
+   if (srv_bo->is_display_buffer) {
+      struct pvr_srv_winsys_heap *srv_heap = to_pvr_srv_winsys_heap(vma->heap);
+
+      /* In case of display buffers, we only support to map whole PMR */
+      if (offset != 0 || bo->size != ALIGN_POT(size, srv_ws->base.page_size) ||
+          vma->size != bo->size) {
+         vk_error(NULL, VK_ERROR_MEMORY_MAP_FAILED);
+         return (pvr_dev_addr_t){ .addr = 0UL };
+      }
+
+      /* Map the requested pmr */
+      result = pvr_srv_int_map_pmr(srv_ws->render_fd,
+                                   srv_heap->server_heap,
+                                   srv_vma->reservation,
+                                   srv_bo->pmr,
+                                   srv_flags,
+                                   &srv_vma->mapping);
+
+   } else {
+      const uint32_t phys_page_offset = (offset - virt_offset) >>
+                                        srv_ws->base.log2_page_size;
+      const uint32_t phys_page_count = aligned_virt_size >>
+                                       srv_ws->base.log2_page_size;
+
+      /* Check if bo and vma can accommodate the given size and offset */
+      if (ALIGN_POT(offset + size, vma->heap->page_size) > bo->size ||
+          aligned_virt_size > vma->size) {
+         vk_error(NULL, VK_ERROR_MEMORY_MAP_FAILED);
+         return (pvr_dev_addr_t){ .addr = 0UL };
+      }
+
+      /* Map the requested pages */
+      result = pvr_srv_int_map_pages(srv_ws->render_fd,
+                                     srv_vma->reservation,
+                                     srv_bo->pmr,
+                                     phys_page_count,
+                                     phys_page_offset,
+                                     srv_flags,
+                                     vma->dev_addr);
+   }
+
+   if (result != VK_SUCCESS)
+      return (pvr_dev_addr_t){ .addr = 0UL };
+
+   buffer_acquire(srv_bo);
+
+   vma->bo = &srv_bo->base;
+   vma->bo_offset = offset;
+   vma->mapped_size = aligned_virt_size;
+
+   return (pvr_dev_addr_t){ .addr = vma->dev_addr.addr + virt_offset };
+}
+
+void pvr_srv_winsys_vma_unmap(struct pvr_winsys_vma *vma)
+{
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(vma->heap->ws);
+   struct pvr_srv_winsys_vma *srv_vma = to_pvr_srv_winsys_vma(vma);
+   struct pvr_srv_winsys_bo *srv_bo;
+
+   /* Address should be mapped */
+   assert(srv_vma->base.bo);
+
+   srv_bo = to_pvr_srv_winsys_bo(srv_vma->base.bo);
+
+   if (srv_bo->is_display_buffer) {
+      /* Unmap the requested pmr */
+      pvr_srv_int_unmap_pmr(srv_ws->render_fd, srv_vma->mapping);
+   } else {
+      /* Unmap requested pages */
+      pvr_srv_int_unmap_pages(srv_ws->render_fd,
+                              srv_vma->reservation,
+                              vma->dev_addr,
+                              vma->mapped_size >> srv_ws->base.log2_page_size);
+   }
+
+   buffer_release(srv_bo);
+
+   srv_vma->base.bo = NULL;
+}
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bo.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bo.h
new file mode 100644 (file)
index 0000000..a924c9a
--- /dev/null
@@ -0,0 +1,186 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_BO_H
+#define PVR_SRV_BO_H
+
+#include <stdint.h>
+
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_winsys.h"
+#include "util/macros.h"
+
+/*******************************************
+   MemAlloc flags
+ *******************************************/
+
+/* TODO: remove unused and redundant flags */
+#define PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_OFFSET 26U
+#define PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_MASK \
+   (0x3ULL << PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_OFFSET)
+#define PVR_SRV_MEMALLOCFLAG_CPU_CACHE_CLEAN BITFIELD_BIT(19U)
+#define PVR_SRV_MEMALLOCFLAG_KERNEL_CPU_MAPPABLE BITFIELD_BIT(14U)
+#define PVR_SRV_MEMALLOCFLAG_ZERO_ON_ALLOC BITFIELD_BIT(31U)
+#define PVR_SRV_MEMALLOCFLAG_SVM_ALLOC BITFIELD_BIT(17U)
+#define PVR_SRV_MEMALLOCFLAG_POISON_ON_ALLOC BITFIELD_BIT(30U)
+#define PVR_SRV_MEMALLOCFLAG_POISON_ON_FREE BITFIELD_BIT(29U)
+#define PVR_SRV_MEMALLOCFLAG_GPU_READABLE BITFIELD_BIT(0U)
+#define PVR_SRV_MEMALLOCFLAG_GPU_WRITEABLE BITFIELD_BIT(1U)
+#define PVR_SRV_MEMALLOCFLAG_GPU_CACHE_MODE_MASK (7ULL << 8U)
+#define PVR_SRV_MEMALLOCFLAGS_GPU_MMUFLAGSMASK                               \
+   (PVR_SRV_MEMALLOCFLAG_GPU_READABLE | PVR_SRV_MEMALLOCFLAG_GPU_WRITEABLE | \
+    PVR_SRV_MEMALLOCFLAG_GPU_CACHE_MODE_MASK)
+#define PVR_SRV_MEMALLOCFLAG_CPU_READABLE BITFIELD_BIT(4U)
+#define PVR_SRV_MEMALLOCFLAG_CPU_WRITEABLE BITFIELD_BIT(5U)
+#define PVR_SRV_MEMALLOCFLAG_CPU_CACHE_MODE_MASK (7ULL << 11U)
+#define PVR_SRV_MEMALLOCFLAG_CPU_CACHE_INCOHERENT (3ULL << 11U)
+#define PVR_SRV_MEMALLOCFLAGS_CPU_MMUFLAGSMASK                               \
+   (PVR_SRV_MEMALLOCFLAG_CPU_READABLE | PVR_SRV_MEMALLOCFLAG_CPU_WRITEABLE | \
+    PVR_SRV_MEMALLOCFLAG_CPU_CACHE_MODE_MASK)
+#define PVR_SRV_MEMALLOCFLAG_NO_OSPAGES_ON_ALLOC BITFIELD_BIT(15U)
+#define PVR_SRV_MEMALLOCFLAG_SPARSE_NO_DUMMY_BACKING BITFIELD_BIT(18U)
+#define PVR_SRV_MEMALLOCFLAG_SPARSE_ZERO_BACKING BITFIELD_BIT(20U)
+#define PVR_SRV_MEMALLOCFLAG_FW_ALLOC_OSID_MASK (7ULL << 23U)
+#define PVR_SRV_MEMALLOCFLAG_VAL_SECURE_BUFFER BITFIELD64_BIT(34U)
+#define PVR_SRV_MEMALLOCFLAG_VAL_SHARED_BUFFER BITFIELD64_BIT(35U)
+#define PVR_SRV_PHYS_HEAP_HINT_SHIFT (60U)
+#define PVR_SRV_PHYS_HEAP_HINT_MASK (0xFULL << PVR_SRV_PHYS_HEAP_HINT_SHIFT)
+#define PVR_SRV_MEMALLOCFLAG_GPU_UNCACHED BITFIELD_BIT(8U)
+#define PVR_SRV_MEMALLOCFLAG_GPU_CACHE_INCOHERENT (3ULL << 8U)
+#define PVR_SRV_MEMALLOCFLAG_CPU_UNCACHED_WC (0ULL << 11U)
+#define PVR_SRV_MEMALLOCFLAG_GPU_READ_PERMITTED BITFIELD_BIT(2U)
+#define PVR_SRV_MEMALLOCFLAG_GPU_WRITE_PERMITTED BITFIELD_BIT(3U)
+#define PVR_SRV_MEMALLOCFLAG_CPU_READ_PERMITTED BITFIELD_BIT(6U)
+#define PVR_SRV_MEMALLOCFLAG_CPU_WRITE_PERMITTED BITFIELD_BIT(7U)
+
+#define PVR_SRV_MEMALLOCFLAGS_PMRFLAGSMASK                                \
+   (PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_MASK |                              \
+    PVR_SRV_MEMALLOCFLAG_CPU_CACHE_CLEAN |                                \
+    PVR_SRV_MEMALLOCFLAG_KERNEL_CPU_MAPPABLE |                            \
+    PVR_SRV_MEMALLOCFLAG_ZERO_ON_ALLOC | PVR_SRV_MEMALLOCFLAG_SVM_ALLOC | \
+    PVR_SRV_MEMALLOCFLAG_POISON_ON_ALLOC |                                \
+    PVR_SRV_MEMALLOCFLAG_POISON_ON_FREE |                                 \
+    PVR_SRV_MEMALLOCFLAGS_GPU_MMUFLAGSMASK |                              \
+    PVR_SRV_MEMALLOCFLAGS_CPU_MMUFLAGSMASK |                              \
+    PVR_SRV_MEMALLOCFLAG_NO_OSPAGES_ON_ALLOC |                            \
+    PVR_SRV_MEMALLOCFLAG_SPARSE_NO_DUMMY_BACKING |                        \
+    PVR_SRV_MEMALLOCFLAG_SPARSE_ZERO_BACKING |                            \
+    PVR_SRV_MEMALLOCFLAG_FW_ALLOC_OSID_MASK |                             \
+    PVR_SRV_MEMALLOCFLAG_VAL_SECURE_BUFFER |                              \
+    PVR_SRV_MEMALLOCFLAG_VAL_SHARED_BUFFER | PVR_SRV_PHYS_HEAP_HINT_MASK)
+
+#define PVR_SRV_MEMALLOCFLAGS_PHYSICAL_MASK    \
+   (PVR_SRV_MEMALLOCFLAGS_CPU_MMUFLAGSMASK |   \
+    PVR_SRV_MEMALLOCFLAG_GPU_CACHE_MODE_MASK | \
+    PVR_SRV_MEMALLOCFLAG_CPU_READ_PERMITTED |  \
+    PVR_SRV_MEMALLOCFLAG_CPU_WRITE_PERMITTED | \
+    PVR_SRV_MEMALLOCFLAG_CPU_CACHE_CLEAN |     \
+    PVR_SRV_MEMALLOCFLAG_ZERO_ON_ALLOC |       \
+    PVR_SRV_MEMALLOCFLAG_POISON_ON_ALLOC |     \
+    PVR_SRV_MEMALLOCFLAG_POISON_ON_FREE | PVR_SRV_PHYS_HEAP_HINT_MASK)
+
+#define PVR_SRV_MEMALLOCFLAGS_VIRTUAL_MASK    \
+   (PVR_SRV_MEMALLOCFLAGS_GPU_MMUFLAGSMASK |  \
+    PVR_SRV_MEMALLOCFLAG_GPU_READ_PERMITTED | \
+    PVR_SRV_MEMALLOCFLAG_GPU_WRITE_PERMITTED)
+
+/* Device specific MMU flags. */
+/*!< Memory that only the PM and Firmware can access */
+#define PM_FW_PROTECT BITFIELD_BIT(0U)
+
+/* Helper macro for setting device specific MMU flags. */
+#define PVR_SRV_MEMALLOCFLAG_DEVICE_FLAG(n)             \
+   (((n) << PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_OFFSET) & \
+    PVR_SRV_MEMALLOCFLAG_DEVICE_FLAGS_MASK)
+
+/*******************************************
+   struct definitions
+ *******************************************/
+
+struct pvr_srv_winsys_bo {
+   struct pvr_winsys_bo base;
+   uint32_t ref_count;
+   void *pmr;
+
+   bool is_display_buffer;
+   uint32_t handle;
+   uint64_t flags;
+};
+
+struct pvr_srv_winsys_vma {
+   struct pvr_winsys_vma base;
+   void *reservation;
+
+   /* Required when mapping whole PMR, used for display buffers mapping. */
+   void *mapping;
+};
+
+/*******************************************
+   function prototypes
+ *******************************************/
+
+VkResult pvr_srv_winsys_buffer_create(struct pvr_winsys *ws,
+                                      uint64_t size,
+                                      uint64_t alignment,
+                                      enum pvr_winsys_bo_type type,
+                                      uint32_t ws_flags,
+                                      struct pvr_winsys_bo **const bo_out);
+VkResult
+pvr_srv_winsys_buffer_create_from_fd(struct pvr_winsys *ws,
+                                     int fd,
+                                     struct pvr_winsys_bo **const bo_out);
+void pvr_srv_winsys_buffer_destroy(struct pvr_winsys_bo *bo);
+
+VkResult pvr_srv_winsys_buffer_get_fd(struct pvr_winsys_bo *bo,
+                                      int *const fd_out);
+
+void *pvr_srv_winsys_buffer_map(struct pvr_winsys_bo *bo);
+void pvr_srv_winsys_buffer_unmap(struct pvr_winsys_bo *bo);
+
+struct pvr_winsys_vma *
+pvr_srv_heap_alloc_reserved(struct pvr_winsys_heap *heap,
+                            const pvr_dev_addr_t reserved_dev_addr,
+                            uint64_t size,
+                            uint64_t alignment);
+struct pvr_winsys_vma *pvr_srv_winsys_heap_alloc(struct pvr_winsys_heap *heap,
+                                                 uint64_t size,
+                                                 uint64_t alignment);
+void pvr_srv_winsys_heap_free(struct pvr_winsys_vma *vma);
+
+pvr_dev_addr_t pvr_srv_winsys_vma_map(struct pvr_winsys_vma *vma,
+                                      struct pvr_winsys_bo *bo,
+                                      uint64_t offset,
+                                      uint64_t size);
+void pvr_srv_winsys_vma_unmap(struct pvr_winsys_vma *vma);
+
+/*******************************************
+   helper macros
+ *******************************************/
+
+#define to_pvr_srv_winsys_bo(bo) \
+   container_of((bo), struct pvr_srv_winsys_bo, base)
+#define to_pvr_srv_winsys_vma(vma) \
+   container_of((vma), struct pvr_srv_winsys_vma, base)
+
+#endif /* PVR_SRV_BO_H */
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.c
new file mode 100644 (file)
index 0000000..064cf8f
--- /dev/null
@@ -0,0 +1,1293 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <xf86drm.h>
+
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_srv_bridge.h"
+#include "util/log.h"
+#include "util/macros.h"
+#include "vk_log.h"
+
+#define vk_bridge_err(vk_err, bridge_func, bridge_ret)  \
+   vk_errorf(NULL,                                      \
+             vk_err,                                    \
+             "%s failed, PVR_SRV_ERROR: %d, Errno: %s", \
+             bridge_func,                               \
+             (bridge_ret).error,                        \
+             strerror(errno))
+
+static int pvr_srv_bridge_call(int fd,
+                               uint8_t bridge_id,
+                               uint32_t function_id,
+                               void *input,
+                               uint32_t input_buffer_size,
+                               void *output,
+                               uint32_t output_buffer_size)
+{
+   struct drm_srvkm_cmd cmd = {
+      .bridge_id = bridge_id,
+      .bridge_func_id = function_id,
+      .in_data_ptr = (uint64_t)(uintptr_t)input,
+      .out_data_ptr = (uint64_t)(uintptr_t)output,
+      .in_data_size = input_buffer_size,
+      .out_data_size = output_buffer_size,
+   };
+
+   int ret = drmIoctl(fd, DRM_IOCTL_SRVKM_CMD, &cmd);
+   if (unlikely(ret))
+      return ret;
+
+   VG(VALGRIND_MAKE_MEM_DEFINED(output, output_buffer_size));
+
+   return 0U;
+}
+
+VkResult pvr_srv_connection_create(int fd, uint64_t *const bvnc_out)
+{
+   struct pvr_srv_bridge_connect_cmd cmd = {
+      .flags = PVR_SRV_FLAGS_CLIENT_64BIT_COMPAT,
+      .build_options = RGX_BUILD_OPTIONS,
+      .DDK_version = PVR_SRV_VERSION,
+      .DDK_build = PVR_SRV_VERSION_BUILD,
+   };
+
+   /* Initialize ret.error to a default error */
+   struct pvr_srv_bridge_connect_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_SRVCORE,
+                                PVR_SRV_BRIDGE_SRVCORE_CONNECT,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+                           "PVR_SRV_BRIDGE_SRVCORE_CONNECT",
+                           ret);
+   }
+
+   *bvnc_out = ret.bvnc;
+
+   return VK_SUCCESS;
+}
+
+void pvr_srv_connection_destroy(int fd)
+{
+   /* Initialize ret.error to a default error */
+   struct pvr_srv_bridge_disconnect_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_SRVCORE,
+                                PVR_SRV_BRIDGE_SRVCORE_DISCONNECT,
+                                NULL,
+                                0,
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      vk_bridge_err(VK_ERROR_UNKNOWN, "PVR_SRV_BRIDGE_SRVCORE_DISCONNECT", ret);
+   }
+}
+
+VkResult pvr_srv_alloc_sync_primitive_block(int fd,
+                                            void **const handle_out,
+                                            void **const pmr_out,
+                                            uint32_t *const size_out,
+                                            uint32_t *const addr_out)
+{
+   /* Initialize ret.error to a default error */
+   struct pvr_srv_bridge_alloc_sync_primitive_block_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_SYNC,
+                                PVR_SRV_BRIDGE_SYNC_ALLOCSYNCPRIMITIVEBLOCK,
+                                NULL,
+                                0,
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+                           "PVR_SRV_BRIDGE_SYNC_ALLOCSYNCPRIMITIVEBLOCK",
+                           ret);
+   }
+
+   *handle_out = ret.handle;
+   *pmr_out = ret.pmr;
+   *size_out = ret.size;
+   *addr_out = ret.addr;
+
+   return VK_SUCCESS;
+}
+
+void pvr_srv_free_sync_primitive_block(int fd, void *handle)
+{
+   struct pvr_srv_bridge_free_sync_primitive_block_cmd cmd = {
+      .handle = handle,
+   };
+
+   /* Initialize ret.error to a default error */
+   struct pvr_srv_bridge_free_sync_primitive_block_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_SYNC,
+                                PVR_SRV_BRIDGE_SYNC_FREESYNCPRIMITIVEBLOCK,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      vk_bridge_err(VK_ERROR_UNKNOWN,
+                    "PVR_SRV_BRIDGE_SYNC_FREESYNCPRIMITIVEBLOCK",
+                    ret);
+   }
+}
+
+VkResult pvr_srv_get_heap_count(int fd, uint32_t *const heap_count_out)
+{
+   struct pvr_srv_heap_count_cmd cmd = {
+      .heap_config_index = 0,
+   };
+
+   struct pvr_srv_heap_count_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_MM,
+                                PVR_SRV_BRIDGE_MM_HEAPCFGHEAPCOUNT,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+                           "PVR_SRV_BRIDGE_MM_HEAPCFGHEAPCOUNT",
+                           ret);
+   }
+
+   *heap_count_out = ret.heap_count;
+
+   return VK_SUCCESS;
+}
+
+VkResult pvr_srv_int_heap_create(int fd,
+                                 pvr_dev_addr_t base_address,
+                                 uint64_t size,
+                                 uint32_t log2_page_size,
+                                 void *server_memctx,
+                                 void **const server_heap_out)
+{
+   struct pvr_srv_devmem_int_heap_create_cmd cmd = {
+      .server_memctx = server_memctx,
+      .base_addr = base_address,
+      .size = size,
+      .log2_page_size = log2_page_size,
+   };
+
+   struct pvr_srv_devmem_int_heap_create_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_MM,
+                                PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPCREATE,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+                           "PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPCREATE",
+                           ret);
+   }
+
+   *server_heap_out = ret.server_heap;
+
+   return VK_SUCCESS;
+}
+
+void pvr_srv_int_heap_destroy(int fd, void *server_heap)
+{
+   struct pvr_srv_devmem_int_heap_destroy_cmd cmd = {
+      .server_heap = server_heap,
+   };
+
+   struct pvr_srv_devmem_int_heap_destroy_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_MM,
+                                PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPDESTROY,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+                    "PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPDESTROY",
+                    ret);
+   }
+}
+
+/* This bridge function allows to independently query heap name and heap
+ * details, i-e buffer/base_address/size/reserved_size/log2_page_size pointers
+ * are allowed to be NULL.
+ */
+VkResult pvr_srv_get_heap_details(int fd,
+                                  uint32_t heap_index,
+                                  uint32_t buffer_size,
+                                  char *const buffer_out,
+                                  pvr_dev_addr_t *const base_address_out,
+                                  uint64_t *const size_out,
+                                  uint64_t *const reserved_size_out,
+                                  uint32_t *const log2_page_size_out)
+{
+   struct pvr_srv_heap_cfg_details_cmd cmd = {
+      .heap_config_index = 0,
+      .heap_index = heap_index,
+      .buffer_size = buffer_size,
+      .buffer = buffer_out,
+   };
+
+   struct pvr_srv_heap_cfg_details_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+      .buffer = buffer_out,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_MM,
+                                PVR_SRV_BRIDGE_MM_HEAPCFGHEAPDETAILS,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+                           "PVR_SRV_BRIDGE_MM_HEAPCFGHEAPDETAILS",
+                           ret);
+   }
+
+   VG(VALGRIND_MAKE_MEM_DEFINED(buffer_out, buffer_size));
+
+   if (base_address_out)
+      *base_address_out = ret.base_addr;
+
+   if (size_out)
+      *size_out = ret.size;
+
+   if (reserved_size_out)
+      *reserved_size_out = ret.reserved_size;
+
+   if (log2_page_size_out)
+      *log2_page_size_out = ret.log2_page_size;
+
+   return VK_SUCCESS;
+}
+
+void pvr_srv_int_ctx_destroy(int fd, void *server_memctx)
+{
+   struct pvr_srv_devmem_int_ctx_destroy_cmd cmd = {
+      .server_memctx = server_memctx,
+   };
+
+   struct pvr_srv_devmem_int_ctx_destroy_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_MM,
+                                PVR_SRV_BRIDGE_MM_DEVMEMINTCTXDESTROY,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+                    "PVR_SRV_BRIDGE_MM_DEVMEMINTCTXDESTROY",
+                    ret);
+   }
+}
+
+VkResult pvr_srv_int_ctx_create(int fd,
+                                void **const server_memctx_out,
+                                void **const server_memctx_data_out)
+{
+   struct pvr_srv_devmem_int_ctx_create_cmd cmd = {
+      .kernel_memory_ctx = false,
+   };
+
+   struct pvr_srv_devmem_int_ctx_create_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_MM,
+                                PVR_SRV_BRIDGE_MM_DEVMEMINTCTXCREATE,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+                           "PVR_SRV_BRIDGE_MM_DEVMEMINTCTXCREATE",
+                           ret);
+   }
+
+   *server_memctx_out = ret.server_memctx;
+   *server_memctx_data_out = ret.server_memctx_data;
+
+   return VK_SUCCESS;
+}
+
+VkResult pvr_srv_int_reserve_addr(int fd,
+                                  void *server_heap,
+                                  pvr_dev_addr_t addr,
+                                  uint64_t size,
+                                  void **const reservation_out)
+{
+   struct pvr_srv_devmem_int_reserve_range_cmd cmd = {
+      .server_heap = server_heap,
+      .addr = addr,
+      .size = size,
+   };
+
+   struct pvr_srv_devmem_int_reserve_range_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_MM,
+                                PVR_SRV_BRIDGE_MM_DEVMEMINTRESERVERANGE,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+                           "PVR_SRV_BRIDGE_MM_DEVMEMINTRESERVERANGE",
+                           ret);
+   }
+
+   *reservation_out = ret.reservation;
+
+   return VK_SUCCESS;
+}
+
+void pvr_srv_int_unreserve_addr(int fd, void *reservation)
+{
+   struct pvr_srv_bridge_in_devmem_int_unreserve_range_cmd cmd = {
+      .reservation = reservation,
+   };
+
+   struct pvr_srv_bridge_in_devmem_int_unreserve_range_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_MM,
+                                PVR_SRV_BRIDGE_MM_DEVMEMINTUNRESERVERANGE,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+                    "PVR_SRV_BRIDGE_MM_DEVMEMINTUNRESERVERANGE",
+                    ret);
+   }
+}
+
+VkResult pvr_srv_alloc_pmr(int fd,
+                           uint64_t size,
+                           uint64_t block_size,
+                           uint32_t phy_blocks,
+                           uint32_t virt_blocks,
+                           uint32_t log2_page_size,
+                           uint64_t flags,
+                           uint32_t pid,
+                           void **const pmr_out)
+{
+   const char *annotation = "VK PHYSICAL ALLOCATION";
+   const uint32_t annotation_size =
+      strnlen(annotation, DEVMEM_ANNOTATION_MAX_LEN - 1) + 1;
+   uint32_t mapping_table = 0;
+
+   struct pvr_srv_physmem_new_ram_backed_locked_pmr_cmd cmd = {
+      .size = size,
+      .block_size = block_size,
+      .phy_blocks = phy_blocks,
+      .virt_blocks = virt_blocks,
+      .mapping_table = &mapping_table,
+      .log2_page_size = log2_page_size,
+      .flags = flags,
+      .annotation_size = annotation_size,
+      .annotation = annotation,
+      .pid = pid,
+      .pdump_flags = 0x00000000U,
+   };
+
+   struct pvr_srv_physmem_new_ram_backed_locked_pmr_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_MM,
+                                PVR_SRV_BRIDGE_MM_PHYSMEMNEWRAMBACKEDLOCKEDPMR,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_MEMORY_MAP_FAILED,
+                           "PVR_SRV_BRIDGE_MM_PHYSMEMNEWRAMBACKEDLOCKEDPMR",
+                           ret);
+   }
+
+   *pmr_out = ret.pmr;
+
+   return VK_SUCCESS;
+}
+
+void pvr_srv_free_pmr(int fd, void *pmr)
+{
+   struct pvr_srv_pmr_unref_unlock_pmr_cmd cmd = {
+      .pmr = pmr,
+   };
+
+   struct pvr_srv_pmr_unref_unlock_pmr_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_MM,
+                                PVR_SRV_BRIDGE_MM_PMRUNREFUNLOCKPMR,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      vk_bridge_err(VK_ERROR_UNKNOWN,
+                    "PVR_SRV_BRIDGE_MM_PMRUNREFUNLOCKPMR",
+                    ret);
+   }
+}
+
+VkResult pvr_srv_int_map_pages(int fd,
+                               void *reservation,
+                               void *pmr,
+                               uint32_t page_count,
+                               uint32_t page_offset,
+                               uint64_t flags,
+                               pvr_dev_addr_t addr)
+{
+   struct pvr_srv_devmem_int_map_pages_cmd cmd = {
+      .reservation = reservation,
+      .pmr = pmr,
+      .page_count = page_count,
+      .page_offset = page_offset,
+      .flags = flags,
+      .addr = addr,
+   };
+
+   struct pvr_srv_devmem_int_map_pages_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_MM,
+                                PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPAGES,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_MEMORY_MAP_FAILED,
+                           "PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPAGES",
+                           ret);
+   }
+
+   return VK_SUCCESS;
+}
+
+void pvr_srv_int_unmap_pages(int fd,
+                             void *reservation,
+                             pvr_dev_addr_t dev_addr,
+                             uint32_t page_count)
+{
+   struct pvr_srv_devmem_int_unmap_pages_cmd cmd = {
+      .reservation = reservation,
+      .dev_addr = dev_addr,
+      .page_count = page_count,
+   };
+
+   struct pvr_srv_devmem_int_unmap_pages_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_MM,
+                                PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPAGES,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      vk_bridge_err(VK_ERROR_UNKNOWN,
+                    "PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPAGES",
+                    ret);
+   }
+}
+
+VkResult pvr_srv_int_map_pmr(int fd,
+                             void *server_heap,
+                             void *reservation,
+                             void *pmr,
+                             uint64_t flags,
+                             void **const mapping_out)
+{
+   struct pvr_srv_devmem_int_map_pmr_cmd cmd = {
+      .server_heap = server_heap,
+      .reservation = reservation,
+      .pmr = pmr,
+      .flags = flags,
+   };
+
+   struct pvr_srv_devmem_int_map_pmr_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_MM,
+                                PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPMR,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_MEMORY_MAP_FAILED,
+                           "PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPMR",
+                           ret);
+   }
+
+   *mapping_out = ret.mapping;
+
+   return VK_SUCCESS;
+}
+
+void pvr_srv_int_unmap_pmr(int fd, void *mapping)
+{
+   struct pvr_srv_devmem_int_unmap_pmr_cmd cmd = {
+      .mapping = mapping,
+   };
+
+   struct pvr_srv_devmem_int_unmap_pmr_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_MM,
+                                PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPMR,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      vk_bridge_err(VK_ERROR_UNKNOWN,
+                    "PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPMR",
+                    ret);
+   }
+}
+
+VkResult pvr_srv_physmem_import_dmabuf(int fd,
+                                       int buffer_fd,
+                                       uint64_t flags,
+                                       void **const pmr_out,
+                                       uint64_t *const size_out,
+                                       uint64_t *const align_out)
+{
+   struct pvr_srv_phys_mem_import_dmabuf_cmd cmd = {
+      .buffer_fd = buffer_fd,
+      .flags = flags,
+      .name_size = 0,
+      .name = NULL,
+   };
+
+   struct pvr_srv_phys_mem_import_dmabuf_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_DMABUF,
+                                PVR_SRV_BRIDGE_DMABUF_PHYSMEMIMPORTDMABUF,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_INVALID_EXTERNAL_HANDLE,
+                           "PVR_SRV_BRIDGE_DMABUF_PHYSMEMIMPORTDMABUF",
+                           ret);
+   }
+
+   *pmr_out = ret.pmr;
+   *size_out = ret.size;
+   *align_out = ret.align;
+
+   return VK_SUCCESS;
+}
+
+VkResult pvr_srv_physmem_export_dmabuf(int fd, void *pmr, int *const fd_out)
+{
+   struct pvr_srv_phys_mem_export_dmabuf_cmd cmd = {
+      .pmr = pmr,
+   };
+
+   struct pvr_srv_phys_mem_export_dmabuf_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_DMABUF,
+                                PVR_SRV_BRIDGE_DMABUF_PHYSMEMEXPORTDMABUF,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_OUT_OF_HOST_MEMORY,
+                           "PVR_SRV_BRIDGE_DMABUF_PHYSMEMEXPORTDMABUF",
+                           ret);
+   }
+
+   *fd_out = ret.fd;
+
+   return VK_SUCCESS;
+}
+
+VkResult
+pvr_srv_rgx_create_compute_context(int fd,
+                                   uint32_t priority,
+                                   uint32_t reset_framework_cmd_size,
+                                   uint8_t *reset_framework_cmd,
+                                   void *priv_data,
+                                   uint32_t static_compute_context_state_size,
+                                   uint8_t *static_compute_context_state,
+                                   uint32_t packed_ccb_size,
+                                   uint32_t context_flags,
+                                   uint64_t robustness_address,
+                                   uint32_t max_deadline_ms,
+                                   void **const compute_context_out)
+{
+   struct pvr_srv_rgx_create_compute_context_cmd cmd = {
+      .priority = priority,
+      .reset_framework_cmd_size = reset_framework_cmd_size,
+      .reset_framework_cmd = reset_framework_cmd,
+      .priv_data = priv_data,
+      .static_compute_context_state_size = static_compute_context_state_size,
+      .static_compute_context_state = static_compute_context_state,
+      .packed_ccb_size = packed_ccb_size,
+      .context_flags = context_flags,
+      .robustness_address = robustness_address,
+      .max_deadline_ms = max_deadline_ms,
+   };
+
+   struct pvr_srv_rgx_create_compute_context_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_RGXCMP,
+                                PVR_SRV_BRIDGE_RGXCMP_RGXCREATECOMPUTECONTEXT,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+                           "PVR_SRV_BRIDGE_RGXCMP_RGXCREATECOMPUTECONTEXT",
+                           ret);
+   }
+
+   *compute_context_out = ret.compute_context;
+
+   return VK_SUCCESS;
+}
+
+void pvr_srv_rgx_destroy_compute_context(int fd, void *compute_context)
+{
+   struct pvr_srv_rgx_destroy_compute_context_cmd cmd = {
+      .compute_context = compute_context,
+   };
+
+   struct pvr_srv_rgx_destroy_compute_context_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_RGXCMP,
+                                PVR_SRV_BRIDGE_RGXCMP_RGXDESTROYCOMPUTECONTEXT,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      vk_bridge_err(VK_ERROR_UNKNOWN,
+                    "PVR_SRV_BRIDGE_RGXCMP_RGXDESTROYCOMPUTECONTEXT",
+                    ret);
+   }
+}
+
+VkResult pvr_srv_rgx_kick_compute2(int fd,
+                                   void *compute_context,
+                                   uint32_t client_cache_op_seq_num,
+                                   uint32_t client_update_count,
+                                   void **client_update_ufo_sync_prim_block,
+                                   uint32_t *client_update_offset,
+                                   uint32_t *client_update_value,
+                                   int32_t check_fence,
+                                   int32_t update_timeline,
+                                   uint32_t cmd_size,
+                                   uint8_t *cdm_cmd,
+                                   uint32_t ext_job_ref,
+                                   uint32_t num_work_groups,
+                                   uint32_t num_work_items,
+                                   uint32_t pdump_flags,
+                                   uint64_t max_deadline_us,
+                                   char *update_fence_name,
+                                   int32_t *const update_fence_out)
+{
+   struct pvr_srv_rgx_kick_cdm2_cmd cmd = {
+      .max_deadline_us = max_deadline_us,
+      .compute_context = compute_context,
+      .client_update_offset = client_update_offset,
+      .client_update_value = client_update_value,
+      .cdm_cmd = cdm_cmd,
+      .update_fence_name = update_fence_name,
+      .client_update_ufo_sync_prim_block = client_update_ufo_sync_prim_block,
+      .check_fence = check_fence,
+      .update_timeline = update_timeline,
+      .client_cache_op_seq_num = client_cache_op_seq_num,
+      .client_update_count = client_update_count,
+      .cmd_size = cmd_size,
+      .ext_job_ref = ext_job_ref,
+      .num_work_groups = num_work_groups,
+      .num_work_items = num_work_items,
+      .pdump_flags = pdump_flags,
+   };
+
+   struct pvr_srv_rgx_kick_cdm2_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_RGXCMP,
+                                PVR_SRV_BRIDGE_RGXCMP_RGXKICKCDM2,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_OUT_OF_DEVICE_MEMORY,
+                           "PVR_SRV_BRIDGE_RGXCMP_RGXKICKCDM2",
+                           ret);
+   }
+
+   *update_fence_out = ret.update_fence;
+
+   return VK_SUCCESS;
+}
+
+VkResult
+pvr_srv_rgx_create_hwrt_dataset(int fd,
+                                pvr_dev_addr_t pm_mlist_dev_addr0,
+                                pvr_dev_addr_t pm_mlist_dev_addr1,
+                                pvr_dev_addr_t tail_ptrs_dev_addr,
+                                pvr_dev_addr_t macrotile_array_dev_addr0,
+                                pvr_dev_addr_t macrotile_array_dev_addr1,
+                                pvr_dev_addr_t rtc_dev_addr,
+                                pvr_dev_addr_t rgn_header_dev_addr0,
+                                pvr_dev_addr_t rgn_header_dev_addr1,
+                                pvr_dev_addr_t vheap_table_dev_add,
+                                uint64_t flipped_multi_sample_ctl,
+                                uint64_t multi_sample_ctl,
+                                uint64_t rgn_header_size,
+                                void **free_lists,
+                                uint32_t mtile_stride,
+                                uint32_t ppp_screen,
+                                uint32_t te_aa,
+                                uint32_t te_mtile1,
+                                uint32_t te_mtile2,
+                                uint32_t te_screen,
+                                uint32_t tpc_size,
+                                uint32_t tpc_stride,
+                                uint32_t isp_merge_lower_x,
+                                uint32_t isp_merge_lower_y,
+                                uint32_t isp_merge_scale_x,
+                                uint32_t isp_merge_scale_y,
+                                uint32_t isp_merge_upper_x,
+                                uint32_t isp_merge_upper_y,
+                                uint32_t isp_mtile_size,
+                                uint16_t max_rts,
+                                void **const hwrt_dataset0_out,
+                                void **const hwrt_dataset1_out)
+{
+   struct pvr_srv_rgx_create_hwrt_dataset_cmd cmd = {
+      .pm_mlist_dev_addr0 = pm_mlist_dev_addr0,
+      .pm_mlist_dev_addr1 = pm_mlist_dev_addr1,
+      .tail_ptrs_dev_addr = tail_ptrs_dev_addr,
+      .macrotile_array_dev_addr0 = macrotile_array_dev_addr0,
+      .macrotile_array_dev_addr1 = macrotile_array_dev_addr1,
+      .rtc_dev_addr = rtc_dev_addr,
+      .rgn_header_dev_addr0 = rgn_header_dev_addr0,
+      .rgn_header_dev_addr1 = rgn_header_dev_addr1,
+      .vheap_table_dev_add = vheap_table_dev_add,
+      .flipped_multi_sample_ctl = flipped_multi_sample_ctl,
+      .multi_sample_ctl = multi_sample_ctl,
+      .rgn_header_size = rgn_header_size,
+      .free_lists = free_lists,
+      .mtile_stride = mtile_stride,
+      .ppp_screen = ppp_screen,
+      .te_aa = te_aa,
+      .te_mtile1 = te_mtile1,
+      .te_mtile2 = te_mtile2,
+      .te_screen = te_screen,
+      .tpc_size = tpc_size,
+      .tpc_stride = tpc_stride,
+      .isp_merge_lower_x = isp_merge_lower_x,
+      .isp_merge_lower_y = isp_merge_lower_y,
+      .isp_merge_scale_x = isp_merge_scale_x,
+      .isp_merge_scale_y = isp_merge_scale_y,
+      .isp_merge_upper_x = isp_merge_upper_x,
+      .isp_merge_upper_y = isp_merge_upper_y,
+      .isp_mtile_size = isp_mtile_size,
+      .max_rts = max_rts,
+   };
+
+   struct pvr_srv_rgx_create_hwrt_dataset_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_RGXTA3D,
+                                PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEHWRTDATASET,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+                           "PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEHWRTDATASET",
+                           ret);
+   }
+
+   *hwrt_dataset0_out = ret.hwrt_dataset0;
+   *hwrt_dataset1_out = ret.hwrt_dataset1;
+
+   return VK_SUCCESS;
+}
+
+void pvr_srv_rgx_destroy_hwrt_dataset(int fd, void *hwrt_dataset)
+{
+   struct pvr_srv_rgx_destroy_hwrt_dataset_cmd cmd = {
+      .hwrt_dataset = hwrt_dataset,
+   };
+
+   struct pvr_srv_rgx_destroy_hwrt_dataset_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_RGXTA3D,
+                                PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYHWRTDATASET,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+                    "PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYHWRTDATASET",
+                    ret);
+   }
+}
+
+VkResult pvr_srv_rgx_create_free_list(int fd,
+                                      void *mem_ctx_priv_data,
+                                      uint32_t max_free_list_pages,
+                                      uint32_t init_free_list_pages,
+                                      uint32_t grow_free_list_pages,
+                                      uint32_t grow_param_threshold,
+                                      void *global_free_list,
+                                      enum pvr_srv_bool free_list_check,
+                                      pvr_dev_addr_t free_list_dev_addr,
+                                      void *free_list_pmr,
+                                      uint64_t pmr_offset,
+                                      void **const cleanup_cookie_out)
+{
+   struct pvr_srv_rgx_create_free_list_cmd cmd = {
+      .free_list_dev_addr = free_list_dev_addr,
+      .pmr_offset = pmr_offset,
+      .mem_ctx_priv_data = mem_ctx_priv_data,
+      .free_list_pmr = free_list_pmr,
+      .global_free_list = global_free_list,
+      .free_list_check = free_list_check,
+      .grow_free_list_pages = grow_free_list_pages,
+      .grow_param_threshold = grow_param_threshold,
+      .init_free_list_pages = init_free_list_pages,
+      .max_free_list_pages = max_free_list_pages,
+   };
+
+   struct pvr_srv_rgx_create_free_list_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_RGXTA3D,
+                                PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEFREELIST,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+                           "PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEFREELIST",
+                           ret);
+   }
+
+   *cleanup_cookie_out = ret.cleanup_cookie;
+
+   return VK_SUCCESS;
+}
+
+void pvr_srv_rgx_destroy_free_list(int fd, void *cleanup_cookie)
+{
+   struct pvr_srv_rgx_destroy_free_list_cmd cmd = {
+      .cleanup_cookie = cleanup_cookie,
+   };
+
+   struct pvr_srv_rgx_destroy_free_list_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   /* FIXME: Do we want to propagate the retry error up the call chain so that
+    * we can do something better than busy wait or is the expectation that we
+    * should never get into this situation because the driver doesn't attempt
+    * to free any resources while they're in use?
+    */
+   do {
+      result = pvr_srv_bridge_call(fd,
+                                   PVR_SRV_BRIDGE_RGXTA3D,
+                                   PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYFREELIST,
+                                   &cmd,
+                                   sizeof(cmd),
+                                   &ret,
+                                   sizeof(ret));
+   } while (result == PVR_SRV_ERROR_RETRY);
+
+   if (result || ret.error != PVR_SRV_OK) {
+      vk_bridge_err(VK_ERROR_UNKNOWN,
+                    "PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYFREELIST",
+                    ret);
+   }
+}
+
+VkResult
+pvr_srv_rgx_create_render_context(int fd,
+                                  uint32_t priority,
+                                  pvr_dev_addr_t vdm_callstack_addr,
+                                  uint32_t reset_framework_cmd_size,
+                                  uint8_t *reset_framework_cmd,
+                                  void *priv_data,
+                                  uint32_t static_render_context_state_size,
+                                  uint8_t *static_render_context_state,
+                                  uint32_t packed_ccb_size,
+                                  uint32_t context_flags,
+                                  uint64_t robustness_address,
+                                  uint32_t max_geom_deadline_ms,
+                                  uint32_t max_frag_deadline_ms,
+                                  void **const render_context_out)
+{
+   struct pvr_srv_rgx_create_render_context_cmd cmd = {
+      .priority = priority,
+      .vdm_callstack_addr = vdm_callstack_addr,
+      .reset_framework_cmd_size = reset_framework_cmd_size,
+      .reset_framework_cmd = reset_framework_cmd,
+      .priv_data = priv_data,
+      .static_render_context_state_size = static_render_context_state_size,
+      .static_render_context_state = static_render_context_state,
+      .packed_ccb_size = packed_ccb_size,
+      .context_flags = context_flags,
+      .robustness_address = robustness_address,
+      .max_ta_deadline_ms = max_geom_deadline_ms,
+      .max_3d_deadline_ms = max_frag_deadline_ms,
+   };
+
+   struct pvr_srv_rgx_create_render_context_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_RGXTA3D,
+                                PVR_SRV_BRIDGE_RGXTA3D_RGXCREATERENDERCONTEXT,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      return vk_bridge_err(VK_ERROR_INITIALIZATION_FAILED,
+                           "PVR_SRV_BRIDGE_RGXTA3D_RGXCREATERENDERCONTEXT",
+                           ret);
+   }
+
+   *render_context_out = ret.render_context;
+
+   return VK_SUCCESS;
+}
+
+void pvr_srv_rgx_destroy_render_context(int fd, void *render_context)
+{
+   struct pvr_srv_rgx_destroy_render_context_cmd cmd = {
+      .render_context = render_context,
+   };
+
+   struct pvr_srv_rgx_destroy_render_context_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_RGXTA3D,
+                                PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYRENDERCONTEXT,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      vk_bridge_err(VK_ERROR_UNKNOWN,
+                    "PVR_SRV_BRIDGE_RGXTA3D_RGXDESTORYRENDERCONTEXT",
+                    ret);
+   }
+}
+
+VkResult pvr_srv_rgx_kick_render2(int fd,
+                                  void *render_ctx,
+                                  uint32_t client_cache_op_seq_num,
+                                  uint32_t client_geom_fence_count,
+                                  void **client_geom_fence_sync_prim_block,
+                                  uint32_t *client_geom_fence_sync_offset,
+                                  uint32_t *client_geom_fence_value,
+                                  uint32_t client_geom_update_count,
+                                  void **client_geom_update_sync_prim_block,
+                                  uint32_t *client_geom_update_sync_offset,
+                                  uint32_t *client_geom_update_value,
+                                  uint32_t client_frag_update_count,
+                                  void **client_frag_update_sync_prim_block,
+                                  uint32_t *client_frag_update_sync_offset,
+                                  uint32_t *client_frag_update_value,
+                                  void *pr_fence_ufo_sync_prim_block,
+                                  uint32_t client_pr_fence_ufo_sync_offset,
+                                  uint32_t client_pr_fence_value,
+                                  int32_t check_fence,
+                                  int32_t update_timeline,
+                                  int32_t *const update_fence_out,
+                                  char *update_fence_name,
+                                  int32_t check_fence_frag,
+                                  int32_t update_timeline_frag,
+                                  int32_t *const update_fence_frag_out,
+                                  char *update_fence_name_frag,
+                                  uint32_t cmd_geom_size,
+                                  uint8_t *cmd_geom,
+                                  uint32_t cmd_frag_pr_size,
+                                  uint8_t *cmd_frag_pr,
+                                  uint32_t cmd_frag_size,
+                                  uint8_t *cmd_frag,
+                                  uint32_t ext_job_ref,
+                                  bool kick_geom,
+                                  bool kick_pr,
+                                  bool kick_frag,
+                                  bool abort,
+                                  uint32_t pdump_flags,
+                                  void *hw_rt_dataset,
+                                  void *zs_buffer,
+                                  void *msaa_scratch_buffer,
+                                  uint32_t sync_pmr_count,
+                                  uint32_t *sync_pmr_flags,
+                                  void **sync_pmrs,
+                                  uint32_t render_target_size,
+                                  uint32_t num_draw_calls,
+                                  uint32_t num_indices,
+                                  uint32_t num_mrts,
+                                  uint64_t deadline)
+{
+   struct pvr_srv_rgx_kick_ta3d2_cmd cmd = {
+      .deadline = deadline,
+      .hw_rt_dataset = hw_rt_dataset,
+      .msaa_scratch_buffer = msaa_scratch_buffer,
+      .pr_fence_ufo_sync_prim_block = pr_fence_ufo_sync_prim_block,
+      .render_ctx = render_ctx,
+      .zs_buffer = zs_buffer,
+      .client_3d_update_sync_offset = client_frag_update_sync_offset,
+      .client_3d_update_value = client_frag_update_value,
+      .client_ta_fence_sync_offset = client_geom_fence_sync_offset,
+      .client_ta_fence_value = client_geom_fence_value,
+      .client_ta_update_sync_offset = client_geom_update_sync_offset,
+      .client_ta_update_value = client_geom_update_value,
+      .sync_pmr_flags = sync_pmr_flags,
+      .cmd_3d = cmd_frag,
+      .cmd_3d_pr = cmd_frag_pr,
+      .cmd_ta = cmd_geom,
+      .update_fence_name = update_fence_name,
+      .update_fence_name_3d = update_fence_name_frag,
+      .client_3d_update_sync_prim_block = client_frag_update_sync_prim_block,
+      .client_ta_fence_sync_prim_block = client_geom_fence_sync_prim_block,
+      .client_ta_update_sync_prim_block = client_geom_update_sync_prim_block,
+      .sync_pmrs = sync_pmrs,
+      .abort = abort,
+      .kick_3d = kick_frag,
+      .kick_pr = kick_pr,
+      .kick_ta = kick_geom,
+      .check_fence = check_fence,
+      .check_fence_3d = check_fence_frag,
+      .update_timeline = update_timeline,
+      .update_timeline_3d = update_timeline_frag,
+      .cmd_3d_size = cmd_frag_size,
+      .cmd_3d_pr_size = cmd_frag_pr_size,
+      .client_3d_update_count = client_frag_update_count,
+      .client_cache_op_seq_num = client_cache_op_seq_num,
+      .client_ta_fence_count = client_geom_fence_count,
+      .client_ta_update_count = client_geom_update_count,
+      .ext_job_ref = ext_job_ref,
+      .client_pr_fence_ufo_sync_offset = client_pr_fence_ufo_sync_offset,
+      .client_pr_fence_value = client_pr_fence_value,
+      .num_draw_calls = num_draw_calls,
+      .num_indices = num_indices,
+      .num_mrts = num_mrts,
+      .pdump_flags = pdump_flags,
+      .render_target_size = render_target_size,
+      .sync_pmr_count = sync_pmr_count,
+      .cmd_ta_size = cmd_geom_size,
+   };
+
+   struct pvr_srv_rgx_kick_ta3d2_ret ret = {
+      .error = PVR_SRV_ERROR_BRIDGE_CALL_FAILED,
+      .update_fence = -1,
+      .update_fence_3d = -1,
+   };
+
+   int result;
+
+   result = pvr_srv_bridge_call(fd,
+                                PVR_SRV_BRIDGE_RGXTA3D,
+                                PVR_SRV_BRIDGE_RGXTA3D_RGXKICKTA3D2,
+                                &cmd,
+                                sizeof(cmd),
+                                &ret,
+                                sizeof(ret));
+   if (result || ret.error != PVR_SRV_OK) {
+      /* There is no 'retry' VkResult, so treat it as VK_NOT_READY instead. */
+      if (result == PVR_SRV_ERROR_RETRY)
+         return VK_NOT_READY;
+
+      return vk_bridge_err(VK_ERROR_OUT_OF_DEVICE_MEMORY,
+                           "PVR_SRV_BRIDGE_RGXTA3D_RGXKICKTA3D2",
+                           ret);
+   }
+
+   *update_fence_out = ret.update_fence;
+   *update_fence_frag_out = ret.update_fence_3d;
+
+   return VK_SUCCESS;
+}
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_bridge.h
new file mode 100644 (file)
index 0000000..fa82988
--- /dev/null
@@ -0,0 +1,943 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_BRIDGE_H
+#define PVR_SRV_BRIDGE_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_winsys.h"
+#include "util/macros.h"
+
+/******************************************************************************
+   Services bridges
+ ******************************************************************************/
+
+#define PVR_SRV_BRIDGE_SRVCORE 1UL
+
+#define PVR_SRV_BRIDGE_SRVCORE_CONNECT 0UL
+#define PVR_SRV_BRIDGE_SRVCORE_DISCONNECT 1UL
+
+#define PVR_SRV_BRIDGE_SYNC 2UL
+
+#define PVR_SRV_BRIDGE_SYNC_ALLOCSYNCPRIMITIVEBLOCK 0UL
+#define PVR_SRV_BRIDGE_SYNC_FREESYNCPRIMITIVEBLOCK 1UL
+
+#define PVR_SRV_BRIDGE_MM 6UL
+
+#define PVR_SRV_BRIDGE_MM_PMRUNREFUNLOCKPMR 8UL
+#define PVR_SRV_BRIDGE_MM_PHYSMEMNEWRAMBACKEDLOCKEDPMR 10UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTCTXCREATE 15UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTCTXDESTROY 16UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPCREATE 17UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPDESTROY 18UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPMR 19UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPMR 20UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTRESERVERANGE 21UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTUNRESERVERANGE 22UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPAGES 24UL
+#define PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPAGES 25UL
+#define PVR_SRV_BRIDGE_MM_HEAPCFGHEAPCOUNT 30UL
+#define PVR_SRV_BRIDGE_MM_HEAPCFGHEAPDETAILS 32UL
+
+#define PVR_SRV_BRIDGE_DMABUF 11UL
+
+#define PVR_SRV_BRIDGE_DMABUF_PHYSMEMIMPORTDMABUF 0UL
+#define PVR_SRV_BRIDGE_DMABUF_PHYSMEMEXPORTDMABUF 1UL
+
+#define PVR_SRV_BRIDGE_RGXCMP 129UL
+
+#define PVR_SRV_BRIDGE_RGXCMP_RGXCREATECOMPUTECONTEXT 0UL
+#define PVR_SRV_BRIDGE_RGXCMP_RGXDESTROYCOMPUTECONTEXT 1UL
+#define PVR_SRV_BRIDGE_RGXCMP_RGXKICKCDM2 5UL
+
+#define PVR_SRV_BRIDGE_RGXTA3D 130UL
+
+#define PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEHWRTDATASET 0UL
+#define PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYHWRTDATASET 1UL
+#define PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEFREELIST 6UL
+#define PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYFREELIST 7UL
+#define PVR_SRV_BRIDGE_RGXTA3D_RGXCREATERENDERCONTEXT 8UL
+#define PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYRENDERCONTEXT 9UL
+#define PVR_SRV_BRIDGE_RGXTA3D_RGXKICKTA3D2 12UL
+
+/******************************************************************************
+   DRM Services specific defines
+ ******************************************************************************/
+/* DRM command numbers, relative to DRM_COMMAND_BASE.
+ * These defines must be prefixed with "DRM_".
+ */
+#define DRM_SRVKM_CMD 0U /* Used for Services ioctls */
+
+/* These defines must be prefixed with "DRM_IOCTL_". */
+#define DRM_IOCTL_SRVKM_CMD \
+   DRM_IOWR(DRM_COMMAND_BASE + DRM_SRVKM_CMD, struct drm_srvkm_cmd)
+
+/******************************************************************************
+   Misc defines
+ ******************************************************************************/
+
+#define SUPPORT_RGX_SET_OFFSET BITFIELD_BIT(4U)
+#define DEBUG_SET_OFFSET BITFIELD_BIT(10U)
+#define SUPPORT_BUFFER_SYNC_SET_OFFSET BITFIELD_BIT(11U)
+#define OPTIONS_BIT31 BITFIELD_BIT(31U)
+
+#define RGX_BUILD_OPTIONS                       \
+   (SUPPORT_RGX_SET_OFFSET | DEBUG_SET_OFFSET | \
+    SUPPORT_BUFFER_SYNC_SET_OFFSET | OPTIONS_BIT31)
+
+#define PVR_SRV_VERSION_MAJ 1U
+#define PVR_SRV_VERSION_MIN 14U
+
+#define PVR_SRV_VERSION                                            \
+   (((uint32_t)((uint32_t)(PVR_SRV_VERSION_MAJ)&0xFFFFU) << 16U) | \
+    (((PVR_SRV_VERSION_MIN)&0xFFFFU) << 0U))
+
+#define PVR_SRV_VERSION_BUILD 5843584
+
+/*! This flags gets set if the client is 64 Bit compatible. */
+#define PVR_SRV_FLAGS_CLIENT_64BIT_COMPAT BITFIELD_BIT(5U)
+
+#define DEVMEM_ANNOTATION_MAX_LEN 64U
+
+#define PVR_SRV_SYNC_MAX 12U
+
+#define PVR_BUFFER_FLAG_READ BITFIELD_BIT(0U)
+#define PVR_BUFFER_FLAG_WRITE BITFIELD_BIT(1U)
+
+/******************************************************************************
+   Services Boolean
+ ******************************************************************************/
+
+enum pvr_srv_bool {
+   PVR_SRV_FALSE = 0,
+   PVR_SRV_TRUE = 1,
+   PVR_SRV_FORCE_ALIGN = 0x7fffffff
+};
+
+/******************************************************************************
+   Service Error codes
+ ******************************************************************************/
+
+enum pvr_srv_error {
+   PVR_SRV_OK,
+   PVR_SRV_ERROR_RETRY = 25,
+   PVR_SRV_ERROR_BRIDGE_CALL_FAILED = 37,
+   PVR_SRV_ERROR_FORCE_I32 = 0x7fffffff
+};
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_SRVCORE_CONNECT structs
+ ******************************************************************************/
+
+struct pvr_srv_bridge_connect_cmd {
+   uint32_t build_options;
+   uint32_t DDK_build;
+   uint32_t DDK_version;
+   uint32_t flags;
+} PACKED;
+
+struct pvr_srv_bridge_connect_ret {
+   uint64_t bvnc;
+   enum pvr_srv_error error;
+   uint32_t capability_flags;
+   uint8_t kernel_arch;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_SRVCORE_DISCONNECT struct
+ ******************************************************************************/
+
+struct pvr_srv_bridge_disconnect_ret {
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_SYNC_ALLOCSYNCPRIMITIVEBLOCK struct
+ ******************************************************************************/
+
+struct pvr_srv_bridge_alloc_sync_primitive_block_ret {
+   void *handle;
+   void *pmr;
+   enum pvr_srv_error error;
+   uint32_t size;
+   uint32_t addr;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_SYNC_FREESYNCPRIMITIVEBLOCK structs
+ ******************************************************************************/
+
+struct pvr_srv_bridge_free_sync_primitive_block_cmd {
+   void *handle;
+} PACKED;
+
+struct pvr_srv_bridge_free_sync_primitive_block_ret {
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_MM_DEVMEMINTCTXCREATE structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_ctx_create_cmd {
+   uint32_t kernel_memory_ctx;
+} PACKED;
+
+struct pvr_srv_devmem_int_ctx_create_ret {
+   void *server_memctx;
+   void *server_memctx_data;
+   enum pvr_srv_error error;
+   uint32_t cpu_cache_line_size;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_MM_DEVMEMINTCTXDESTROY structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_ctx_destroy_cmd {
+   void *server_memctx;
+} PACKED;
+
+struct pvr_srv_devmem_int_ctx_destroy_ret {
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_MM_HEAPCFGHEAPCOUNT structs
+ ******************************************************************************/
+
+struct pvr_srv_heap_count_cmd {
+   uint32_t heap_config_index;
+} PACKED;
+
+struct pvr_srv_heap_count_ret {
+   enum pvr_srv_error error;
+   uint32_t heap_count;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_MM_HEAPCFGHEAPDETAILS structs
+ ******************************************************************************/
+
+struct pvr_srv_heap_cfg_details_cmd {
+   char *buffer;
+   uint32_t heap_config_index;
+   uint32_t heap_index;
+   uint32_t buffer_size;
+} PACKED;
+
+struct pvr_srv_heap_cfg_details_ret {
+   pvr_dev_addr_t base_addr;
+   uint64_t size;
+   uint64_t reserved_size;
+   char *buffer;
+   enum pvr_srv_error error;
+   uint32_t log2_page_size;
+   uint32_t log2_alignment;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPCREATE structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_heap_create_cmd {
+   pvr_dev_addr_t base_addr;
+   uint64_t size;
+   void *server_memctx;
+   uint32_t log2_page_size;
+} PACKED;
+
+struct pvr_srv_devmem_int_heap_create_ret {
+   void *server_heap;
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_MM_DEVMEMINTHEAPDESTROY structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_heap_destroy_cmd {
+   void *server_heap;
+} PACKED;
+
+struct pvr_srv_devmem_int_heap_destroy_ret {
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_MM_DEVMEMINTRESERVERANGE structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_reserve_range_cmd {
+   pvr_dev_addr_t addr;
+   uint64_t size;
+   void *server_heap;
+} PACKED;
+
+struct pvr_srv_devmem_int_reserve_range_ret {
+   void *reservation;
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_MM_DEVMEMINTUNRESERVERANGE structs
+ ******************************************************************************/
+
+struct pvr_srv_bridge_in_devmem_int_unreserve_range_cmd {
+   void *reservation;
+} PACKED;
+
+struct pvr_srv_bridge_in_devmem_int_unreserve_range_ret {
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_MM_PHYSMEMNEWRAMBACKEDLOCKEDPMR structs
+ ******************************************************************************/
+
+struct pvr_srv_physmem_new_ram_backed_locked_pmr_cmd {
+   uint64_t block_size;
+   uint64_t size;
+   uint32_t *mapping_table;
+   const char *annotation;
+   uint32_t annotation_size;
+   uint32_t log2_page_size;
+   uint32_t phy_blocks;
+   uint32_t virt_blocks;
+   uint32_t pdump_flags;
+   uint32_t pid;
+   uint64_t flags;
+} PACKED;
+
+struct pvr_srv_physmem_new_ram_backed_locked_pmr_ret {
+   void *pmr;
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_MM_PMRUNREFUNLOCKPMR structs
+ ******************************************************************************/
+
+struct pvr_srv_pmr_unref_unlock_pmr_cmd {
+   void *pmr;
+} PACKED;
+
+struct pvr_srv_pmr_unref_unlock_pmr_ret {
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPAGES structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_map_pages_cmd {
+   pvr_dev_addr_t addr;
+   void *pmr;
+   void *reservation;
+   uint32_t page_count;
+   uint32_t page_offset;
+   uint64_t flags;
+} PACKED;
+
+struct pvr_srv_devmem_int_map_pages_ret {
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPAGES structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_unmap_pages_cmd {
+   pvr_dev_addr_t dev_addr;
+   void *reservation;
+   uint32_t page_count;
+} PACKED;
+
+struct pvr_srv_devmem_int_unmap_pages_ret {
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_MM_DEVMEMINTMAPPMR structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_map_pmr_cmd {
+   void *server_heap;
+   void *pmr;
+   void *reservation;
+   uint64_t flags;
+} PACKED;
+
+struct pvr_srv_devmem_int_map_pmr_ret {
+   void *mapping;
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_MM_DEVMEMINTUNMAPPMR structs
+ ******************************************************************************/
+
+struct pvr_srv_devmem_int_unmap_pmr_cmd {
+   void *mapping;
+} PACKED;
+
+struct pvr_srv_devmem_int_unmap_pmr_ret {
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_DMABUF_PHYSMEMIMPORTDMABUF structs
+ ******************************************************************************/
+
+struct pvr_srv_phys_mem_import_dmabuf_cmd {
+   const char *name;
+   int buffer_fd;
+   uint32_t name_size;
+   uint64_t flags;
+} PACKED;
+
+struct pvr_srv_phys_mem_import_dmabuf_ret {
+   uint64_t align;
+   uint64_t size;
+   void *pmr;
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_DMABUF_PHYSMEMEXPORTDMABUF structs
+ ******************************************************************************/
+
+struct pvr_srv_phys_mem_export_dmabuf_cmd {
+   void *pmr;
+} PACKED;
+
+struct pvr_srv_phys_mem_export_dmabuf_ret {
+   enum pvr_srv_error error;
+   int fd;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_RGXCMP_RGXCREATECOMPUTECONTEXT structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_create_compute_context_cmd {
+   uint64_t robustness_address;
+   void *priv_data;
+   uint8_t *reset_framework_cmd;
+   uint8_t *static_compute_context_state;
+   uint32_t context_flags;
+   uint32_t reset_framework_cmd_size;
+   uint32_t max_deadline_ms;
+   uint32_t packed_ccb_size;
+   /* RGX_CONTEXT_PRIORITY_... flags. */
+   uint32_t priority;
+   uint32_t static_compute_context_state_size;
+} PACKED;
+
+struct pvr_srv_rgx_create_compute_context_ret {
+   void *compute_context;
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_RGXCMP_RGXDESTROYCOMPUTECONTEXT structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_destroy_compute_context_cmd {
+   void *compute_context;
+} PACKED;
+
+struct pvr_srv_rgx_destroy_compute_context_ret {
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_RGXCMP_RGXKICKCDM2 structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_kick_cdm2_cmd {
+   uint64_t max_deadline_us;
+   void *compute_context;
+   uint32_t *client_update_offset;
+   uint32_t *client_update_value;
+   uint8_t *cdm_cmd;
+   char *update_fence_name;
+   void **client_update_ufo_sync_prim_block;
+   int32_t check_fence;
+   int32_t update_timeline;
+   uint32_t client_cache_op_seq_num;
+   uint32_t client_update_count;
+   uint32_t cmd_size;
+   uint32_t ext_job_ref;
+   uint32_t num_work_groups;
+   uint32_t num_work_items;
+   uint32_t pdump_flags;
+} PACKED;
+
+struct pvr_srv_rgx_kick_cdm2_ret {
+   enum pvr_srv_error error;
+   int32_t update_fence;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEHWRTDATASET structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_create_hwrt_dataset_cmd {
+   pvr_dev_addr_t pm_mlist_dev_addr0;
+   pvr_dev_addr_t pm_mlist_dev_addr1;
+   pvr_dev_addr_t tail_ptrs_dev_addr;
+   pvr_dev_addr_t macrotile_array_dev_addr0;
+   pvr_dev_addr_t macrotile_array_dev_addr1;
+   pvr_dev_addr_t rtc_dev_addr;
+   pvr_dev_addr_t rgn_header_dev_addr0;
+   pvr_dev_addr_t rgn_header_dev_addr1;
+   pvr_dev_addr_t vheap_table_dev_add;
+   uint64_t flipped_multi_sample_ctl;
+   uint64_t multi_sample_ctl;
+   uint64_t rgn_header_size;
+   void **free_lists;
+   uint32_t mtile_stride;
+   uint32_t ppp_screen;
+   uint32_t te_aa;
+   uint32_t te_mtile1;
+   uint32_t te_mtile2;
+   uint32_t te_screen;
+   uint32_t tpc_size;
+   uint32_t tpc_stride;
+   uint32_t isp_merge_lower_x;
+   uint32_t isp_merge_lower_y;
+   uint32_t isp_merge_scale_x;
+   uint32_t isp_merge_scale_y;
+   uint32_t isp_merge_upper_x;
+   uint32_t isp_merge_upper_y;
+   uint32_t isp_mtile_size;
+   uint16_t max_rts;
+} PACKED;
+
+struct pvr_srv_rgx_create_hwrt_dataset_ret {
+   void *hwrt_dataset0;
+   void *hwrt_dataset1;
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYHWRTDATASET structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_destroy_hwrt_dataset_cmd {
+   void *hwrt_dataset;
+} PACKED;
+
+struct pvr_srv_rgx_destroy_hwrt_dataset_ret {
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_RGXTA3D_RGXCREATEFREELIST structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_create_free_list_cmd {
+   pvr_dev_addr_t free_list_dev_addr;
+   uint64_t pmr_offset;
+   void *mem_ctx_priv_data;
+   void *free_list_pmr;
+   void *global_free_list;
+   enum pvr_srv_bool free_list_check;
+   uint32_t grow_free_list_pages;
+   uint32_t grow_param_threshold;
+   uint32_t init_free_list_pages;
+   uint32_t max_free_list_pages;
+} PACKED;
+
+struct pvr_srv_rgx_create_free_list_ret {
+   void *cleanup_cookie;
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYFREELIST structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_destroy_free_list_cmd {
+   void *cleanup_cookie;
+} PACKED;
+
+struct pvr_srv_rgx_destroy_free_list_ret {
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_RGXTA3D_RGXCREATERENDERCONTEXT structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_create_render_context_cmd {
+   pvr_dev_addr_t vdm_callstack_addr;
+   uint64_t robustness_address;
+   void *priv_data;
+   uint8_t *reset_framework_cmd;
+   uint8_t *static_render_context_state;
+#define RGX_CONTEXT_FLAG_DISABLESLR BITFIELD_BIT(0U)
+   uint32_t context_flags;
+   uint32_t reset_framework_cmd_size;
+   uint32_t max_3d_deadline_ms;
+   uint32_t max_ta_deadline_ms;
+   uint32_t packed_ccb_size;
+#define RGX_CONTEXT_PRIORITY_REALTIME UINT32_MAX
+#define RGX_CONTEXT_PRIORITY_HIGH 2U
+#define RGX_CONTEXT_PRIORITY_MEDIUM 1U
+#define RGX_CONTEXT_PRIORITY_LOW 0U
+   uint32_t priority;
+   uint32_t static_render_context_state_size;
+} PACKED;
+
+struct pvr_srv_rgx_create_render_context_ret {
+   void *render_context;
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_RGXTA3D_RGXDESTROYRENDERCONTEXT structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_destroy_render_context_cmd {
+   void *render_context;
+} PACKED;
+
+struct pvr_srv_rgx_destroy_render_context_ret {
+   enum pvr_srv_error error;
+} PACKED;
+
+/******************************************************************************
+   PVR_SRV_BRIDGE_RGXTA3D_RGXKICKTA3D2 structs
+ ******************************************************************************/
+
+struct pvr_srv_rgx_kick_ta3d2_cmd {
+   uint64_t deadline;
+   void *hw_rt_dataset;
+   void *msaa_scratch_buffer;
+   void *pr_fence_ufo_sync_prim_block;
+   void *render_ctx;
+   void *zs_buffer;
+   uint32_t *client_3d_update_sync_offset;
+   uint32_t *client_3d_update_value;
+   uint32_t *client_ta_fence_sync_offset;
+   uint32_t *client_ta_fence_value;
+   uint32_t *client_ta_update_sync_offset;
+   uint32_t *client_ta_update_value;
+   uint32_t *sync_pmr_flags;
+   uint8_t *cmd_3d;
+   uint8_t *cmd_3d_pr;
+   uint8_t *cmd_ta;
+   char *update_fence_name;
+   char *update_fence_name_3d;
+   void **client_3d_update_sync_prim_block;
+   void **client_ta_fence_sync_prim_block;
+   void **client_ta_update_sync_prim_block;
+   void **sync_pmrs;
+   enum pvr_srv_bool abort;
+   enum pvr_srv_bool kick_3d;
+   enum pvr_srv_bool kick_pr;
+   enum pvr_srv_bool kick_ta;
+   int32_t check_fence;
+   int32_t check_fence_3d;
+   int32_t update_timeline;
+   int32_t update_timeline_3d;
+   uint32_t cmd_3d_size;
+   uint32_t cmd_3d_pr_size;
+   uint32_t client_3d_update_count;
+   uint32_t client_cache_op_seq_num;
+   uint32_t client_ta_fence_count;
+   uint32_t client_ta_update_count;
+   uint32_t ext_job_ref;
+   uint32_t client_pr_fence_ufo_sync_offset;
+   uint32_t client_pr_fence_value;
+   uint32_t num_draw_calls;
+   uint32_t num_indices;
+   uint32_t num_mrts;
+   uint32_t pdump_flags;
+   uint32_t render_target_size;
+   uint32_t sync_pmr_count;
+   uint32_t cmd_ta_size;
+} PACKED;
+
+struct pvr_srv_rgx_kick_ta3d2_ret {
+   enum pvr_srv_error error;
+   int32_t update_fence;
+   int32_t update_fence_3d;
+} PACKED;
+
+/******************************************************************************
+   Ioctl structure to pass cmd and ret structures
+ ******************************************************************************/
+
+struct drm_srvkm_cmd {
+   uint32_t bridge_id;
+   uint32_t bridge_func_id;
+   uint64_t in_data_ptr;
+   uint64_t out_data_ptr;
+   uint32_t in_data_size;
+   uint32_t out_data_size;
+};
+
+/******************************************************************************
+   Bridge function prototype
+ ******************************************************************************/
+
+VkResult pvr_srv_connection_create(int fd, uint64_t *const bvnc_out);
+void pvr_srv_connection_destroy(int fd);
+
+VkResult pvr_srv_alloc_sync_primitive_block(int fd,
+                                            void **const handle_out,
+                                            void **const pmr_out,
+                                            uint32_t *const size_out,
+                                            uint32_t *const addr_out);
+void pvr_srv_free_sync_primitive_block(int fd, void *handle);
+
+VkResult pvr_srv_get_heap_count(int fd, uint32_t *const heap_count_out);
+VkResult pvr_srv_get_heap_details(int fd,
+                                  uint32_t heap_index,
+                                  uint32_t buffer_size,
+                                  char *const buffer_out,
+                                  pvr_dev_addr_t *const base_address_out,
+                                  uint64_t *const size_out,
+                                  uint64_t *const reserved_size_out,
+                                  uint32_t *const log2_page_size_out);
+
+VkResult pvr_srv_int_heap_create(int fd,
+                                 pvr_dev_addr_t base_address,
+                                 uint64_t size,
+                                 uint32_t log2_page_size,
+                                 void *server_memctx,
+                                 void **const server_heap_out);
+void pvr_srv_int_heap_destroy(int fd, void *server_heap);
+
+VkResult pvr_srv_int_ctx_create(int fd,
+                                void **const server_memctx_out,
+                                void **const server_memctx_data_out);
+void pvr_srv_int_ctx_destroy(int fd, void *server_memctx);
+
+VkResult pvr_srv_int_reserve_addr(int fd,
+                                  void *server_heap,
+                                  pvr_dev_addr_t addr,
+                                  uint64_t size,
+                                  void **const reservation_out);
+void pvr_srv_int_unreserve_addr(int fd, void *reservation);
+
+VkResult pvr_srv_alloc_pmr(int fd,
+                           uint64_t size,
+                           uint64_t block_size,
+                           uint32_t phy_blocks,
+                           uint32_t virt_blocks,
+                           uint32_t log2_page_size,
+                           uint64_t flags,
+                           uint32_t pid,
+                           void **const pmr_out);
+void pvr_srv_free_pmr(int fd, void *pmr);
+
+VkResult pvr_srv_int_map_pages(int fd,
+                               void *reservation,
+                               void *pmr,
+                               uint32_t page_count,
+                               uint32_t page_offset,
+                               uint64_t flags,
+                               pvr_dev_addr_t addr);
+void pvr_srv_int_unmap_pages(int fd,
+                             void *reservation,
+                             pvr_dev_addr_t dev_addr,
+                             uint32_t page_count);
+
+VkResult pvr_srv_int_map_pmr(int fd,
+                             void *server_heap,
+                             void *reservation,
+                             void *pmr,
+                             uint64_t flags,
+                             void **const mapping_out);
+void pvr_srv_int_unmap_pmr(int fd, void *mapping);
+
+VkResult pvr_srv_physmem_import_dmabuf(int fd,
+                                       int buffer_fd,
+                                       uint64_t flags,
+                                       void **const pmr_out,
+                                       uint64_t *const size_out,
+                                       uint64_t *const align_out);
+VkResult pvr_srv_physmem_export_dmabuf(int fd, void *pmr, int *const fd_out);
+
+VkResult
+pvr_srv_rgx_create_compute_context(int fd,
+                                   uint32_t priority,
+                                   uint32_t reset_framework_cmd_size,
+                                   uint8_t *reset_framework_cmd,
+                                   void *priv_data,
+                                   uint32_t static_compute_context_state_size,
+                                   uint8_t *static_compute_context_state,
+                                   uint32_t packed_ccb_size,
+                                   uint32_t context_flags,
+                                   uint64_t robustness_address,
+                                   uint32_t max_deadline_ms,
+                                   void **const compute_context_out);
+void pvr_srv_rgx_destroy_compute_context(int fd, void *compute_context);
+
+VkResult pvr_srv_rgx_kick_compute2(int fd,
+                                   void *compute_context,
+                                   uint32_t client_cache_op_seq_num,
+                                   uint32_t client_update_count,
+                                   void **client_update_ufo_sync_prim_block,
+                                   uint32_t *client_update_offset,
+                                   uint32_t *client_update_value,
+                                   int32_t check_fence,
+                                   int32_t update_timeline,
+                                   uint32_t cmd_size,
+                                   uint8_t *cdm_cmd,
+                                   uint32_t ext_job_ref,
+                                   uint32_t num_work_groups,
+                                   uint32_t num_work_items,
+                                   uint32_t pdump_flags,
+                                   uint64_t max_deadline_us,
+                                   char *update_fence_name,
+                                   int32_t *const update_fence_out);
+
+VkResult
+pvr_srv_rgx_create_hwrt_dataset(int fd,
+                                pvr_dev_addr_t pm_mlist_dev_addr0,
+                                pvr_dev_addr_t pm_mlist_dev_addr1,
+                                pvr_dev_addr_t tail_ptrs_dev_addr,
+                                pvr_dev_addr_t macrotile_array_dev_addr0,
+                                pvr_dev_addr_t macrotile_array_dev_addr1,
+                                pvr_dev_addr_t rtc_dev_addr,
+                                pvr_dev_addr_t rgn_header_dev_addr0,
+                                pvr_dev_addr_t rgn_header_dev_addr1,
+                                pvr_dev_addr_t vheap_table_dev_add,
+                                uint64_t flipped_multi_sample_ctl,
+                                uint64_t multi_sample_ctl,
+                                uint64_t rgn_header_size,
+                                void **free_lists,
+                                uint32_t mtile_stride,
+                                uint32_t ppp_screen,
+                                uint32_t te_aa,
+                                uint32_t te_mtile1,
+                                uint32_t te_mtile2,
+                                uint32_t te_screen,
+                                uint32_t tpc_size,
+                                uint32_t tpc_stride,
+                                uint32_t isp_merge_lower_x,
+                                uint32_t isp_merge_lower_y,
+                                uint32_t isp_merge_scale_x,
+                                uint32_t isp_merge_scale_y,
+                                uint32_t isp_merge_upper_x,
+                                uint32_t isp_merge_upper_y,
+                                uint32_t isp_mtile_size,
+                                uint16_t max_rts,
+                                void **const hwrt_dataset0_out,
+                                void **const hwrt_dataset1_out);
+
+void pvr_srv_rgx_destroy_hwrt_dataset(int fd, void *hwrt_dataset);
+
+VkResult pvr_srv_rgx_create_free_list(int fd,
+                                      void *mem_ctx_priv_data,
+                                      uint32_t max_free_list_pages,
+                                      uint32_t init_free_list_pages,
+                                      uint32_t grow_free_list_pages,
+                                      uint32_t grow_param_threshold,
+                                      void *global_free_list,
+                                      enum pvr_srv_bool free_list_check,
+                                      pvr_dev_addr_t free_list_dev_addr,
+                                      void *free_list_pmr,
+                                      uint64_t pmr_offset,
+                                      void **const cleanup_cookie_out);
+
+void pvr_srv_rgx_destroy_free_list(int fd, void *cleanup_cookie);
+
+VkResult
+pvr_srv_rgx_create_render_context(int fd,
+                                  uint32_t priority,
+                                  pvr_dev_addr_t vdm_callstack_addr,
+                                  uint32_t reset_framework_cmd_size,
+                                  uint8_t *reset_framework_cmd,
+                                  void *priv_data,
+                                  uint32_t static_render_context_state_size,
+                                  uint8_t *static_render_context_state,
+                                  uint32_t packed_ccb_size,
+                                  uint32_t context_flags,
+                                  uint64_t robustness_address,
+                                  uint32_t max_geom_deadline_ms,
+                                  uint32_t max_frag_deadline_ms,
+                                  void **const render_context_out);
+
+void pvr_srv_rgx_destroy_render_context(int fd, void *render_context);
+
+VkResult pvr_srv_rgx_kick_render2(int fd,
+                                  void *render_ctx,
+                                  uint32_t client_cache_op_seq_num,
+                                  uint32_t client_geom_fence_count,
+                                  void **client_geom_fence_sync_prim_block,
+                                  uint32_t *client_geom_fence_sync_offset,
+                                  uint32_t *client_geom_fence_value,
+                                  uint32_t client_geom_update_count,
+                                  void **client_geom_update_sync_prim_block,
+                                  uint32_t *client_geom_update_sync_offset,
+                                  uint32_t *client_geom_update_value,
+                                  uint32_t client_frag_update_count,
+                                  void **client_frag_update_sync_prim_block,
+                                  uint32_t *client_frag_update_sync_offset,
+                                  uint32_t *client_frag_update_value,
+                                  void *client_pr_fence_ufo_sync_prim_block,
+                                  uint32_t client_pr_fence_ufo_sync_offset,
+                                  uint32_t client_pr_fence_value,
+                                  int32_t check_fence,
+                                  int32_t update_timeline,
+                                  int32_t *const update_fence_out,
+                                  char *update_fence_name,
+                                  int32_t check_fence_frag,
+                                  int32_t update_timeline_frag,
+                                  int32_t *const update_fence_frag_out,
+                                  char *update_fence_name_frag,
+                                  uint32_t cmd_geom_size,
+                                  uint8_t *cmd_geom,
+                                  uint32_t cmd_frag_pr_size,
+                                  uint8_t *cmd_frag_pr,
+                                  uint32_t cmd_frag_size,
+                                  uint8_t *cmd_frag,
+                                  uint32_t ext_job_ref,
+                                  bool kick_geom,
+                                  bool kick_pr,
+                                  bool kick_frag,
+                                  bool abort,
+                                  uint32_t pdump_flags,
+                                  void *hw_rt_dataset,
+                                  void *zs_buffer,
+                                  void *msaa_scratch_buffer,
+                                  uint32_t sync_pmr_count,
+                                  uint32_t *sync_pmr_flags,
+                                  void **sync_pmrs,
+                                  uint32_t render_target_size,
+                                  uint32_t num_draw_calls,
+                                  uint32_t num_indices,
+                                  uint32_t num_mrts,
+                                  uint64_t deadline);
+
+#endif /* PVR_SRV_BRIDGE_H */
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_common.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_common.h
new file mode 100644 (file)
index 0000000..71a5755
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_JOB_COMMON_H
+#define PVR_SRV_JOB_COMMON_H
+
+#include <stdint.h>
+
+#include "pvr_srv_bridge.h"
+#include "pvr_winsys.h"
+#include "util/macros.h"
+
+#define PVR_SRV_SYNC_DEV_PATH "/dev/pvr_sync"
+
+static inline uint32_t
+pvr_srv_from_winsys_priority(enum pvr_winsys_ctx_priority priority)
+{
+   switch (priority) {
+   case PVR_WINSYS_CTX_PRIORITY_HIGH:
+      return RGX_CONTEXT_PRIORITY_HIGH;
+   case PVR_WINSYS_CTX_PRIORITY_MEDIUM:
+      return RGX_CONTEXT_PRIORITY_MEDIUM;
+   case PVR_WINSYS_CTX_PRIORITY_LOW:
+      return RGX_CONTEXT_PRIORITY_LOW;
+   default:
+      unreachable("Invalid winsys context priority.");
+   }
+}
+
+#endif /* PVR_SRV_JOB_COMMON_H */
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.c
new file mode 100644 (file)
index 0000000..b6fd8e0
--- /dev/null
@@ -0,0 +1,253 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <fcntl.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <vulkan/vulkan.h>
+
+#include "fw-api/pvr_rogue_fwif.h"
+#include "fw-api/pvr_rogue_fwif_rf.h"
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_srv_bridge.h"
+#include "pvr_srv_job_compute.h"
+#include "pvr_srv_job_common.h"
+#include "pvr_srv_syncobj.h"
+#include "pvr_winsys.h"
+#include "util/macros.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+
+struct pvr_srv_winsys_compute_ctx {
+   struct pvr_winsys_compute_ctx base;
+
+   void *handle;
+
+   int timeline;
+};
+
+#define to_pvr_srv_winsys_compute_ctx(ctx) \
+   container_of(ctx, struct pvr_srv_winsys_compute_ctx, base)
+
+VkResult pvr_srv_winsys_compute_ctx_create(
+   struct pvr_winsys *ws,
+   const struct pvr_winsys_compute_ctx_create_info *create_info,
+   struct pvr_winsys_compute_ctx **const ctx_out)
+{
+   struct rogue_fwif_static_computecontext_state static_state = {
+               .ctx_switch_regs = {
+                       .cdm_context_state_base_addr =
+                               create_info->static_state.cdm_ctx_state_base_addr,
+
+                       .cdm_context_pds0 = create_info->static_state.cdm_ctx_store_pds0,
+                       .cdm_context_pds0_b =
+                               create_info->static_state.cdm_ctx_store_pds0_b,
+                       .cdm_context_pds1 = create_info->static_state.cdm_ctx_store_pds1,
+
+                       .cdm_terminate_pds = create_info->static_state.cdm_ctx_terminate_pds,
+                       .cdm_terminate_pds1 =
+                               create_info->static_state.cdm_ctx_terminate_pds1,
+
+                       .cdm_resume_pds0 = create_info->static_state.cdm_ctx_resume_pds0,
+                       .cdm_resume_pds0_b = create_info->static_state.cdm_ctx_resume_pds0_b,
+               },
+       };
+
+   struct rogue_fwif_rf_cmd reset_cmd = {
+      .flags = 0U,
+   };
+
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+   struct pvr_srv_winsys_compute_ctx *srv_ctx;
+   VkResult result;
+
+   srv_ctx = vk_alloc(srv_ws->alloc,
+                      sizeof(*srv_ctx),
+                      8U,
+                      VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!srv_ctx)
+      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   srv_ctx->timeline = open(PVR_SRV_SYNC_DEV_PATH, O_CLOEXEC | O_RDWR);
+   if (srv_ctx->timeline < 0)
+      goto err_free_srv_ctx;
+
+   result = pvr_srv_rgx_create_compute_context(
+      srv_ws->render_fd,
+      pvr_srv_from_winsys_priority(create_info->priority),
+      sizeof(reset_cmd) - sizeof(reset_cmd.regs),
+      (uint8_t *)&reset_cmd,
+      srv_ws->server_memctx_data,
+      sizeof(static_state),
+      (uint8_t *)&static_state,
+      0U,
+      RGX_CONTEXT_FLAG_DISABLESLR,
+      0U,
+      UINT_MAX,
+      &srv_ctx->handle);
+   if (result != VK_SUCCESS)
+      goto err_close_timeline;
+
+   srv_ctx->base.ws = ws;
+
+   *ctx_out = &srv_ctx->base;
+
+   return VK_SUCCESS;
+
+err_close_timeline:
+   close(srv_ctx->timeline);
+
+err_free_srv_ctx:
+   vk_free(srv_ws->alloc, srv_ctx);
+
+   return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED);
+}
+
+void pvr_srv_winsys_compute_ctx_destroy(struct pvr_winsys_compute_ctx *ctx)
+{
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ctx->ws);
+   struct pvr_srv_winsys_compute_ctx *srv_ctx =
+      to_pvr_srv_winsys_compute_ctx(ctx);
+
+   pvr_srv_rgx_destroy_compute_context(srv_ws->render_fd, srv_ctx->handle);
+   close(srv_ctx->timeline);
+   vk_free(srv_ws->alloc, srv_ctx);
+}
+
+static void pvr_srv_compute_cmd_init(
+   const struct pvr_winsys_compute_submit_info *submit_info,
+   struct rogue_fwif_cmd_compute *cmd)
+{
+   struct rogue_fwif_cdm_regs *fw_regs = &cmd->regs;
+
+   memset(cmd, 0, sizeof(*cmd));
+
+   cmd->cmn.frame_num = submit_info->frame_num;
+
+   fw_regs->tpu_border_colour_table = submit_info->regs.tpu_border_colour_table;
+   fw_regs->cdm_item = submit_info->regs.cdm_item;
+   fw_regs->compute_cluster = submit_info->regs.compute_cluster;
+   fw_regs->cdm_ctrl_stream_base = submit_info->regs.cdm_ctrl_stream_base;
+   fw_regs->tpu = submit_info->regs.tpu;
+   fw_regs->cdm_resume_pds1 = submit_info->regs.cdm_resume_pds1;
+
+   if (submit_info->flags & PVR_WINSYS_COMPUTE_FLAG_PREVENT_ALL_OVERLAP)
+      cmd->flags |= ROGUE_FWIF_COMPUTE_FLAG_PREVENT_ALL_OVERLAP;
+
+   if (submit_info->flags & PVR_WINSYS_COMPUTE_FLAG_SINGLE_CORE)
+      cmd->flags |= ROGUE_FWIF_COMPUTE_FLAG_SINGLE_CORE;
+}
+
+VkResult pvr_srv_winsys_compute_submit(
+   const struct pvr_winsys_compute_ctx *ctx,
+   const struct pvr_winsys_compute_submit_info *submit_info,
+   struct pvr_winsys_syncobj **const syncobj_out)
+{
+   const struct pvr_srv_winsys_compute_ctx *srv_ctx =
+      to_pvr_srv_winsys_compute_ctx(ctx);
+   const struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ctx->ws);
+
+   struct pvr_winsys_syncobj *signal_syncobj = NULL;
+   struct pvr_winsys_syncobj *wait_syncobj = NULL;
+   struct pvr_srv_winsys_syncobj *srv_syncobj;
+
+   struct rogue_fwif_cmd_compute compute_cmd;
+   VkResult result;
+   int fence;
+
+   pvr_srv_compute_cmd_init(submit_info, &compute_cmd);
+
+   for (uint32_t i = 0U; i < submit_info->semaphore_count; i++) {
+      PVR_FROM_HANDLE(pvr_semaphore, sem, submit_info->semaphores[i]);
+
+      if (!sem->syncobj)
+         continue;
+
+      if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_COMPUTE_BIT) {
+         result = pvr_srv_winsys_syncobjs_merge(sem->syncobj,
+                                                wait_syncobj,
+                                                &wait_syncobj);
+         if (result != VK_SUCCESS)
+            goto err_destroy_wait_syncobj;
+
+         submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_COMPUTE_BIT;
+      }
+
+      if (submit_info->stage_flags[i] == 0U) {
+         pvr_srv_winsys_syncobj_destroy(sem->syncobj);
+         sem->syncobj = NULL;
+      }
+   }
+
+   srv_syncobj = to_pvr_srv_winsys_syncobj(wait_syncobj);
+
+   do {
+      result = pvr_srv_rgx_kick_compute2(srv_ws->render_fd,
+                                         srv_ctx->handle,
+                                         /* No support cache operations. */
+                                         0U,
+                                         0U,
+                                         NULL,
+                                         NULL,
+                                         NULL,
+                                         wait_syncobj ? srv_syncobj->fd : -1,
+                                         srv_ctx->timeline,
+                                         sizeof(compute_cmd),
+                                         (uint8_t *)&compute_cmd,
+                                         submit_info->job_num,
+                                         0U,
+                                         0U,
+                                         0U,
+                                         0U,
+                                         "COMPUTE",
+                                         &fence);
+   } while (result == VK_NOT_READY);
+
+   if (result != VK_SUCCESS)
+      goto err_destroy_wait_syncobj;
+
+   /* Given job submission succeeded, we don't need to close wait fence and it
+    * should be consumed by the compute job itself.
+    */
+   if (wait_syncobj)
+      srv_syncobj->fd = -1;
+
+   if (fence != -1) {
+      result = pvr_srv_winsys_syncobj_create(ctx->ws, false, &signal_syncobj);
+      if (result != VK_SUCCESS)
+         goto err_destroy_wait_syncobj;
+
+      pvr_srv_set_syncobj_payload(signal_syncobj, fence);
+   }
+
+   *syncobj_out = signal_syncobj;
+
+err_destroy_wait_syncobj:
+   if (wait_syncobj)
+      pvr_srv_winsys_syncobj_destroy(wait_syncobj);
+
+   return result;
+}
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_compute.h
new file mode 100644 (file)
index 0000000..09a85a1
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_JOB_COMPUTE_H
+#define PVR_SRV_JOB_COMPUTE_H
+
+#include <vulkan/vulkan.h>
+
+struct pvr_winsys;
+struct pvr_winsys_compute_ctx;
+struct pvr_winsys_compute_ctx_create_info;
+struct pvr_winsys_compute_submit_info;
+struct pvr_winsys_syncobj;
+
+/*******************************************
+   Function prototypes
+ *******************************************/
+
+VkResult pvr_srv_winsys_compute_ctx_create(
+   struct pvr_winsys *ws,
+   const struct pvr_winsys_compute_ctx_create_info *create_info,
+   struct pvr_winsys_compute_ctx **const ctx_out);
+void pvr_srv_winsys_compute_ctx_destroy(struct pvr_winsys_compute_ctx *ctx);
+
+VkResult pvr_srv_winsys_compute_submit(
+   const struct pvr_winsys_compute_ctx *ctx,
+   const struct pvr_winsys_compute_submit_info *submit_info,
+   struct pvr_winsys_syncobj **const syncobj_out);
+
+#endif /* PVR_SRV_JOB_COMPUTE_H */
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.c
new file mode 100644 (file)
index 0000000..5084cfb
--- /dev/null
@@ -0,0 +1,706 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <assert.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <vulkan/vulkan.h>
+
+#include "fw-api/pvr_rogue_fwif.h"
+#include "fw-api/pvr_rogue_fwif_rf.h"
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_srv_bo.h"
+#include "pvr_srv_bridge.h"
+#include "pvr_srv_job_common.h"
+#include "pvr_srv_job_render.h"
+#include "pvr_srv_syncobj.h"
+#include "pvr_winsys.h"
+#include "util/log.h"
+#include "util/macros.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+#include "vk_util.h"
+
+struct pvr_srv_winsys_free_list {
+   struct pvr_winsys_free_list base;
+
+   void *handle;
+
+   struct pvr_srv_winsys_free_list *parent;
+};
+
+#define to_pvr_srv_winsys_free_list(free_list) \
+   container_of(free_list, struct pvr_srv_winsys_free_list, base)
+
+struct pvr_srv_winsys_rt_dataset {
+   struct pvr_winsys_rt_dataset base;
+
+   struct {
+      void *handle;
+      struct pvr_srv_sync_prim *sync_prim;
+   } rt_datas[ROGUE_FWIF_NUM_RTDATAS];
+};
+
+#define to_pvr_srv_winsys_rt_dataset(rt_dataset) \
+   container_of(rt_dataset, struct pvr_srv_winsys_rt_dataset, base)
+
+struct pvr_srv_winsys_render_ctx {
+   struct pvr_winsys_render_ctx base;
+
+   /* Handle to kernel context. */
+   void *handle;
+
+   int timeline_geom;
+   int timeline_frag;
+};
+
+#define to_pvr_srv_winsys_render_ctx(ctx) \
+   container_of(ctx, struct pvr_srv_winsys_render_ctx, base)
+
+VkResult pvr_srv_winsys_free_list_create(
+   struct pvr_winsys *ws,
+   struct pvr_winsys_vma *free_list_vma,
+   uint32_t initial_num_pages,
+   uint32_t max_num_pages,
+   uint32_t grow_num_pages,
+   uint32_t grow_threshold,
+   struct pvr_winsys_free_list *parent_free_list,
+   struct pvr_winsys_free_list **const free_list_out)
+{
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+   struct pvr_srv_winsys_bo *srv_free_list_bo =
+      to_pvr_srv_winsys_bo(free_list_vma->bo);
+   struct pvr_srv_winsys_free_list *srv_free_list;
+   void *parent_handle;
+   VkResult result;
+
+   srv_free_list = vk_zalloc(srv_ws->alloc,
+                             sizeof(*srv_free_list),
+                             8,
+                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!srv_free_list)
+      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   if (parent_free_list) {
+      srv_free_list->parent = to_pvr_srv_winsys_free_list(parent_free_list);
+      parent_handle = srv_free_list->parent->handle;
+   } else {
+      srv_free_list->parent = NULL;
+      parent_handle = NULL;
+   }
+
+   result = pvr_srv_rgx_create_free_list(srv_ws->render_fd,
+                                         srv_ws->server_memctx_data,
+                                         max_num_pages,
+                                         initial_num_pages,
+                                         grow_num_pages,
+                                         grow_threshold,
+                                         parent_handle,
+#if defined(DEBUG)
+                                         PVR_SRV_TRUE /* free_list_check */,
+#else
+                                         PVR_SRV_FALSE /* free_list_check */,
+#endif
+                                         free_list_vma->dev_addr,
+                                         srv_free_list_bo->pmr,
+                                         0 /* pmr_offset */,
+                                         &srv_free_list->handle);
+   if (result != VK_SUCCESS)
+      goto err_vk_free_srv_free_list;
+
+   srv_free_list->base.ws = ws;
+
+   *free_list_out = &srv_free_list->base;
+
+   return VK_SUCCESS;
+
+err_vk_free_srv_free_list:
+   vk_free(srv_ws->alloc, srv_free_list);
+
+   return result;
+}
+
+void pvr_srv_winsys_free_list_destroy(struct pvr_winsys_free_list *free_list)
+{
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(free_list->ws);
+   struct pvr_srv_winsys_free_list *srv_free_list =
+      to_pvr_srv_winsys_free_list(free_list);
+
+   pvr_srv_rgx_destroy_free_list(srv_ws->render_fd, srv_free_list->handle);
+   vk_free(srv_ws->alloc, srv_free_list);
+}
+
+VkResult pvr_srv_render_target_dataset_create(
+   struct pvr_winsys *ws,
+   const struct pvr_winsys_rt_dataset_create_info *create_info,
+   struct pvr_winsys_rt_dataset **const rt_dataset_out)
+{
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+   struct pvr_srv_winsys_free_list *srv_local_free_list =
+      to_pvr_srv_winsys_free_list(create_info->local_free_list);
+   void *free_lists[ROGUE_FW_MAX_FREELISTS] = { NULL };
+   struct pvr_srv_winsys_rt_dataset *srv_rt_dataset;
+   VkResult result;
+
+   free_lists[ROGUE_FW_LOCAL_FREELIST] = srv_local_free_list->handle;
+
+   if (srv_local_free_list->parent) {
+      free_lists[ROGUE_FW_GLOBAL_FREELIST] =
+         srv_local_free_list->parent->handle;
+   }
+
+   srv_rt_dataset = vk_zalloc(srv_ws->alloc,
+                              sizeof(*srv_rt_dataset),
+                              8,
+                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!srv_rt_dataset)
+      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   result = pvr_srv_rgx_create_hwrt_dataset(
+      srv_ws->render_fd,
+      create_info->rt_datas[0].pm_mlist_dev_addr,
+      create_info->rt_datas[1].pm_mlist_dev_addr,
+      create_info->tpc_dev_addr,
+      create_info->rt_datas[0].macrotile_array_dev_addr,
+      create_info->rt_datas[1].macrotile_array_dev_addr,
+      create_info->rtc_dev_addr,
+      create_info->rt_datas[0].rgn_header_dev_addr,
+      create_info->rt_datas[1].rgn_header_dev_addr,
+      create_info->vheap_table_dev_addr,
+      create_info->ppp_multi_sample_ctl_y_flipped,
+      create_info->ppp_multi_sample_ctl,
+      create_info->rgn_header_size,
+      free_lists,
+      create_info->mtile_stride,
+      create_info->ppp_screen,
+      create_info->te_aa,
+      create_info->te_mtile1,
+      create_info->te_mtile2,
+      create_info->te_screen,
+      create_info->tpc_size,
+      create_info->tpc_stride,
+      create_info->isp_merge_lower_x,
+      create_info->isp_merge_lower_y,
+      create_info->isp_merge_scale_x,
+      create_info->isp_merge_scale_y,
+      create_info->isp_merge_upper_x,
+      create_info->isp_merge_upper_y,
+      create_info->isp_mtile_size,
+      create_info->max_rts,
+      &srv_rt_dataset->rt_datas[0].handle,
+      &srv_rt_dataset->rt_datas[1].handle);
+   if (result != VK_SUCCESS)
+      goto err_vk_free_srv_rt_dataset;
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(srv_rt_dataset->rt_datas); i++) {
+      srv_rt_dataset->rt_datas[i].sync_prim = pvr_srv_sync_prim_alloc(srv_ws);
+      if (!srv_rt_dataset->rt_datas[i].sync_prim)
+         goto err_srv_sync_prim_free;
+   }
+
+   srv_rt_dataset->base.ws = ws;
+
+   *rt_dataset_out = &srv_rt_dataset->base;
+
+   return VK_SUCCESS;
+
+err_srv_sync_prim_free:
+   for (uint32_t i = 0; i < ARRAY_SIZE(srv_rt_dataset->rt_datas); i++) {
+      pvr_srv_sync_prim_free(srv_rt_dataset->rt_datas[i].sync_prim);
+
+      if (srv_rt_dataset->rt_datas[i].handle) {
+         pvr_srv_rgx_destroy_hwrt_dataset(srv_ws->render_fd,
+                                          srv_rt_dataset->rt_datas[i].handle);
+      }
+   }
+
+err_vk_free_srv_rt_dataset:
+   vk_free(srv_ws->alloc, srv_rt_dataset);
+
+   return result;
+}
+
+void pvr_srv_render_target_dataset_destroy(
+   struct pvr_winsys_rt_dataset *rt_dataset)
+{
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(rt_dataset->ws);
+   struct pvr_srv_winsys_rt_dataset *srv_rt_dataset =
+      to_pvr_srv_winsys_rt_dataset(rt_dataset);
+
+   for (uint32_t i = 0; i < ARRAY_SIZE(srv_rt_dataset->rt_datas); i++) {
+      pvr_srv_sync_prim_free(srv_rt_dataset->rt_datas[i].sync_prim);
+
+      if (srv_rt_dataset->rt_datas[i].handle) {
+         pvr_srv_rgx_destroy_hwrt_dataset(srv_ws->render_fd,
+                                          srv_rt_dataset->rt_datas[i].handle);
+      }
+   }
+
+   vk_free(srv_ws->alloc, srv_rt_dataset);
+}
+
+static void pvr_srv_render_ctx_fw_static_state_init(
+   struct pvr_winsys_render_ctx_create_info *create_info,
+   struct rogue_fwif_static_rendercontext_state *static_state)
+{
+   struct pvr_winsys_render_ctx_static_state *ws_static_state =
+      &create_info->static_state;
+   struct rogue_fwif_ta_regs_cswitch *regs = &static_state->ctx_switch_regs;
+
+   memset(static_state, 0, sizeof(*static_state));
+
+   regs->vdm_context_state_base_addr = ws_static_state->vdm_ctx_state_base_addr;
+   regs->ta_context_state_base_addr = ws_static_state->geom_ctx_state_base_addr;
+
+   STATIC_ASSERT(ARRAY_SIZE(regs->ta_state) ==
+                 ARRAY_SIZE(ws_static_state->geom_state));
+   for (uint32_t i = 0; i < ARRAY_SIZE(ws_static_state->geom_state); i++) {
+      regs->ta_state[i].vdm_context_store_task0 =
+         ws_static_state->geom_state[i].vdm_ctx_store_task0;
+      regs->ta_state[i].vdm_context_store_task1 =
+         ws_static_state->geom_state[i].vdm_ctx_store_task1;
+      regs->ta_state[i].vdm_context_store_task2 =
+         ws_static_state->geom_state[i].vdm_ctx_store_task2;
+
+      regs->ta_state[i].vdm_context_resume_task0 =
+         ws_static_state->geom_state[i].vdm_ctx_resume_task0;
+      regs->ta_state[i].vdm_context_resume_task1 =
+         ws_static_state->geom_state[i].vdm_ctx_resume_task1;
+      regs->ta_state[i].vdm_context_resume_task2 =
+         ws_static_state->geom_state[i].vdm_ctx_resume_task2;
+   }
+}
+
+VkResult pvr_srv_winsys_render_ctx_create(
+   struct pvr_winsys *ws,
+   struct pvr_winsys_render_ctx_create_info *create_info,
+   struct pvr_winsys_render_ctx **const ctx_out)
+{
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+   struct rogue_fwif_rf_cmd reset_cmd = {
+      .flags = 0,
+   };
+
+   struct rogue_fwif_static_rendercontext_state static_state;
+   struct pvr_srv_winsys_render_ctx *srv_ctx;
+   VkResult result;
+
+   srv_ctx = vk_zalloc(srv_ws->alloc,
+                       sizeof(*srv_ctx),
+                       8,
+                       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!srv_ctx)
+      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   srv_ctx->timeline_geom = open(PVR_SRV_SYNC_DEV_PATH, O_CLOEXEC | O_RDWR);
+   if (srv_ctx->timeline_geom < 0)
+      goto err_free_srv_ctx;
+
+   srv_ctx->timeline_frag = open(PVR_SRV_SYNC_DEV_PATH, O_CLOEXEC | O_RDWR);
+   if (srv_ctx->timeline_frag < 0)
+      goto err_close_timeline_geom;
+
+   pvr_srv_render_ctx_fw_static_state_init(create_info, &static_state);
+
+   result = pvr_srv_rgx_create_render_context(
+      srv_ws->render_fd,
+      pvr_srv_from_winsys_priority(create_info->priority),
+      create_info->vdm_callstack_addr,
+      sizeof(reset_cmd) - sizeof(reset_cmd.regs),
+      (uint8_t *)&reset_cmd,
+      srv_ws->server_memctx_data,
+      sizeof(static_state),
+      (uint8_t *)&static_state,
+      0,
+      RGX_CONTEXT_FLAG_DISABLESLR,
+      0,
+      UINT_MAX,
+      UINT_MAX,
+      &srv_ctx->handle);
+   if (result != VK_SUCCESS)
+      goto err_close_timeline_frag;
+
+   srv_ctx->base.ws = ws;
+
+   *ctx_out = &srv_ctx->base;
+
+   return VK_SUCCESS;
+
+err_close_timeline_frag:
+   close(srv_ctx->timeline_frag);
+
+err_close_timeline_geom:
+   close(srv_ctx->timeline_geom);
+
+err_free_srv_ctx:
+   vk_free(srv_ws->alloc, srv_ctx);
+
+   return vk_error(NULL, VK_ERROR_INITIALIZATION_FAILED);
+}
+
+void pvr_srv_winsys_render_ctx_destroy(struct pvr_winsys_render_ctx *ctx)
+{
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ctx->ws);
+   struct pvr_srv_winsys_render_ctx *srv_ctx =
+      to_pvr_srv_winsys_render_ctx(ctx);
+
+   pvr_srv_rgx_destroy_render_context(srv_ws->render_fd, srv_ctx->handle);
+   close(srv_ctx->timeline_frag);
+   close(srv_ctx->timeline_geom);
+   vk_free(srv_ws->alloc, srv_ctx);
+}
+
+static void pvr_srv_geometry_cmd_init(
+   const struct pvr_winsys_render_submit_info *submit_info,
+   const struct pvr_srv_sync_prim *sync_prim,
+   struct rogue_fwif_cmd_ta *cmd)
+{
+   const struct pvr_winsys_geometry_state *state = &submit_info->geometry;
+   struct rogue_fwif_ta_regs *fw_regs = &cmd->geom_regs;
+
+   memset(cmd, 0, sizeof(*cmd));
+
+   cmd->cmd_shared.cmn.frame_num = submit_info->frame_num;
+
+   fw_regs->vdm_ctrl_stream_base = state->regs.vdm_ctrl_stream_base;
+   fw_regs->tpu_border_colour_table = state->regs.tpu_border_colour_table;
+   fw_regs->ppp_ctrl = state->regs.ppp_ctrl;
+   fw_regs->te_psg = state->regs.te_psg;
+   fw_regs->tpu = state->regs.tpu;
+   fw_regs->vdm_context_resume_task0_size =
+      state->regs.vdm_ctx_resume_task0_size;
+   fw_regs->pds_ctrl = state->regs.pds_ctrl;
+
+   if (state->flags & PVR_WINSYS_GEOM_FLAG_FIRST_GEOMETRY)
+      cmd->flags |= ROGUE_FWIF_TAFLAGS_FIRSTKICK;
+
+   if (state->flags & PVR_WINSYS_GEOM_FLAG_LAST_GEOMETRY)
+      cmd->flags |= ROGUE_FWIF_TAFLAGS_LASTKICK;
+
+   if (state->flags & PVR_WINSYS_GEOM_FLAG_SINGLE_CORE)
+      cmd->flags |= ROGUE_FWIF_TAFLAGS_SINGLE_CORE;
+
+   cmd->partial_render_ta_3d_fence.ufo_addr.addr =
+      pvr_srv_sync_prim_get_fw_addr(sync_prim);
+   cmd->partial_render_ta_3d_fence.value = sync_prim->value;
+}
+
+static void pvr_srv_fragment_cmd_init(
+   const struct pvr_winsys_render_submit_info *submit_info,
+   struct rogue_fwif_cmd_3d *cmd)
+{
+   const struct pvr_winsys_fragment_state *state = &submit_info->fragment;
+   struct rogue_fwif_3d_regs *fw_regs = &cmd->regs;
+
+   memset(cmd, 0, sizeof(*cmd));
+
+   cmd->cmd_shared.cmn.frame_num = submit_info->frame_num;
+
+   fw_regs->usc_pixel_output_ctrl = state->regs.usc_pixel_output_ctrl;
+   fw_regs->isp_bgobjdepth = state->regs.isp_bgobjdepth;
+   fw_regs->isp_bgobjvals = state->regs.isp_bgobjvals;
+   fw_regs->isp_aa = state->regs.isp_aa;
+   fw_regs->isp_ctl = state->regs.isp_ctl;
+   fw_regs->tpu = state->regs.tpu;
+   fw_regs->event_pixel_pds_info = state->regs.event_pixel_pds_info;
+   fw_regs->pixel_phantom = state->regs.pixel_phantom;
+   fw_regs->event_pixel_pds_data = state->regs.event_pixel_pds_data;
+   fw_regs->isp_scissor_base = state->regs.isp_scissor_base;
+   fw_regs->isp_dbias_base = state->regs.isp_dbias_base;
+   fw_regs->isp_oclqry_base = state->regs.isp_oclqry_base;
+   fw_regs->isp_zlsctl = state->regs.isp_zlsctl;
+   fw_regs->isp_zload_store_base = state->regs.isp_zload_store_base;
+   fw_regs->isp_stencil_load_store_base =
+      state->regs.isp_stencil_load_store_base;
+   fw_regs->isp_zls_pixels = state->regs.isp_zls_pixels;
+
+   STATIC_ASSERT(ARRAY_SIZE(fw_regs->pbe_word) ==
+                 ARRAY_SIZE(state->regs.pbe_word));
+
+   STATIC_ASSERT(ARRAY_SIZE(fw_regs->pbe_word[0]) <=
+                 ARRAY_SIZE(state->regs.pbe_word[0]));
+
+#if !defined(NDEBUG)
+   /* Depending on the hardware we might have more PBE words than the firmware
+    * accepts so check that the extra words are 0.
+    */
+   if (ARRAY_SIZE(fw_regs->pbe_word[0]) < ARRAY_SIZE(state->regs.pbe_word[0])) {
+      /* For each color attachment. */
+      for (uint32_t i = 0; i < ARRAY_SIZE(state->regs.pbe_word); i++) {
+         /* For each extra PBE word not used by the firmware. */
+         for (uint32_t j = ARRAY_SIZE(fw_regs->pbe_word[0]);
+              j < ARRAY_SIZE(state->regs.pbe_word[0]);
+              j++) {
+            assert(state->regs.pbe_word[i][j] == 0);
+         }
+      }
+   }
+#endif
+
+   memcpy(fw_regs->pbe_word, state->regs.pbe_word, sizeof(fw_regs->pbe_word));
+
+   fw_regs->tpu_border_colour_table = state->regs.tpu_border_colour_table;
+
+   STATIC_ASSERT(ARRAY_SIZE(fw_regs->pds_bgnd) ==
+                 ARRAY_SIZE(state->regs.pds_bgnd));
+   typed_memcpy(fw_regs->pds_bgnd,
+                state->regs.pds_bgnd,
+                ARRAY_SIZE(fw_regs->pds_bgnd));
+
+   STATIC_ASSERT(ARRAY_SIZE(fw_regs->pds_pr_bgnd) ==
+                 ARRAY_SIZE(state->regs.pds_pr_bgnd));
+   typed_memcpy(fw_regs->pds_pr_bgnd,
+                state->regs.pds_pr_bgnd,
+                ARRAY_SIZE(fw_regs->pds_pr_bgnd));
+
+   if (state->flags & PVR_WINSYS_FRAG_FLAG_DEPTH_BUFFER_PRESENT)
+      cmd->flags |= ROGUE_FWIF_RENDERFLAGS_DEPTHBUFFER;
+
+   if (state->flags & PVR_WINSYS_FRAG_FLAG_STENCIL_BUFFER_PRESENT)
+      cmd->flags |= ROGUE_FWIF_RENDERFLAGS_STENCILBUFFER;
+
+   if (state->flags & PVR_WINSYS_FRAG_FLAG_PREVENT_CDM_OVERLAP)
+      cmd->flags |= ROGUE_FWIF_RENDERFLAGS_PREVENT_CDM_OVERLAP;
+
+   if (state->flags & PVR_WINSYS_FRAG_FLAG_SINGLE_CORE)
+      cmd->flags |= ROGUE_FWIF_RENDERFLAGS_SINGLE_CORE;
+
+   cmd->zls_stride = state->zls_stride;
+   cmd->sls_stride = state->sls_stride;
+}
+
+VkResult pvr_srv_winsys_render_submit(
+   const struct pvr_winsys_render_ctx *ctx,
+   const struct pvr_winsys_render_submit_info *submit_info,
+   struct pvr_winsys_syncobj **const syncobj_geom_out,
+   struct pvr_winsys_syncobj **const syncobj_frag_out)
+{
+   const struct pvr_srv_winsys_rt_dataset *srv_rt_dataset =
+      to_pvr_srv_winsys_rt_dataset(submit_info->rt_dataset);
+   struct pvr_srv_sync_prim *sync_prim =
+      srv_rt_dataset->rt_datas[submit_info->rt_data_idx].sync_prim;
+   void *rt_data_handle =
+      srv_rt_dataset->rt_datas[submit_info->rt_data_idx].handle;
+   const struct pvr_srv_winsys_render_ctx *srv_ctx =
+      to_pvr_srv_winsys_render_ctx(ctx);
+   const struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ctx->ws);
+
+   uint32_t sync_pmr_flags[PVR_SRV_SYNC_MAX] = { 0U };
+   void *sync_pmrs[PVR_SRV_SYNC_MAX] = { NULL };
+   uint32_t sync_pmr_count;
+
+   struct pvr_winsys_syncobj *geom_signal_syncobj = NULL;
+   struct pvr_winsys_syncobj *frag_signal_syncobj = NULL;
+   struct pvr_winsys_syncobj *geom_wait_syncobj = NULL;
+   struct pvr_winsys_syncobj *frag_wait_syncobj = NULL;
+   struct pvr_srv_winsys_syncobj *srv_geom_syncobj;
+   struct pvr_srv_winsys_syncobj *srv_frag_syncobj;
+
+   struct rogue_fwif_cmd_ta geom_cmd;
+   struct rogue_fwif_cmd_3d frag_cmd;
+
+   int fence_frag;
+   int fence_geom;
+
+   VkResult result;
+
+   pvr_srv_geometry_cmd_init(submit_info, sync_prim, &geom_cmd);
+   pvr_srv_fragment_cmd_init(submit_info, &frag_cmd);
+
+   for (uint32_t i = 0U; i < submit_info->semaphore_count; i++) {
+      PVR_FROM_HANDLE(pvr_semaphore, sem, submit_info->semaphores[i]);
+
+      if (!sem->syncobj)
+         continue;
+
+      if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_GEOM_BIT) {
+         result = pvr_srv_winsys_syncobjs_merge(sem->syncobj,
+                                                geom_wait_syncobj,
+                                                &geom_wait_syncobj);
+         if (result != VK_SUCCESS)
+            goto err_destroy_wait_syncobjs;
+
+         submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_GEOM_BIT;
+      }
+
+      if (submit_info->stage_flags[i] & PVR_PIPELINE_STAGE_FRAG_BIT) {
+         result = pvr_srv_winsys_syncobjs_merge(sem->syncobj,
+                                                frag_wait_syncobj,
+                                                &frag_wait_syncobj);
+         if (result != VK_SUCCESS)
+            goto err_destroy_wait_syncobjs;
+
+         submit_info->stage_flags[i] &= ~PVR_PIPELINE_STAGE_FRAG_BIT;
+      }
+
+      if (submit_info->stage_flags[i] == 0U) {
+         pvr_srv_winsys_syncobj_destroy(sem->syncobj);
+         sem->syncobj = NULL;
+      }
+   }
+
+   srv_geom_syncobj = to_pvr_srv_winsys_syncobj(geom_wait_syncobj);
+   srv_frag_syncobj = to_pvr_srv_winsys_syncobj(frag_wait_syncobj);
+
+   if (submit_info->bo_count <= ARRAY_SIZE(sync_pmrs)) {
+      sync_pmr_count = submit_info->bo_count;
+   } else {
+      mesa_logw("Too many bos to synchronize access to (ignoring %zu bos)\n",
+                submit_info->bo_count - ARRAY_SIZE(sync_pmrs));
+      sync_pmr_count = ARRAY_SIZE(sync_pmrs);
+   }
+
+   STATIC_ASSERT(ARRAY_SIZE(sync_pmrs) == ARRAY_SIZE(sync_pmr_flags));
+   assert(sync_pmr_count <= ARRAY_SIZE(sync_pmrs));
+   for (uint32_t i = 0; i < sync_pmr_count; i++) {
+      const struct pvr_winsys_job_bo *job_bo = &submit_info->bos[i];
+      const struct pvr_srv_winsys_bo *srv_bo = to_pvr_srv_winsys_bo(job_bo->bo);
+
+      sync_pmrs[i] = srv_bo->pmr;
+
+      if (job_bo->flags & PVR_WINSYS_JOB_BO_FLAG_WRITE)
+         sync_pmr_flags[i] = PVR_BUFFER_FLAG_WRITE;
+      else
+         sync_pmr_flags[i] = PVR_BUFFER_FLAG_READ;
+   }
+
+   /* The 1.14 PowerVR Services KM driver doesn't add a sync dependency to the
+    * fragment phase on the geometry phase for us. This makes it
+    * necessary to use a sync prim for this purpose. This requires that we pass
+    * in the same sync prim information for the geometry phase update and the
+    * PR fence. We update the sync prim value here as this is the value the
+    * sync prim will get updated to once the geometry phase has completed and
+    * the value the PR or fragment phase will be fenced on.
+    */
+   sync_prim->value++;
+
+   do {
+      result =
+         pvr_srv_rgx_kick_render2(srv_ws->render_fd,
+                                  srv_ctx->handle,
+                                  /* Currently no support for cache operations.
+                                   */
+                                  0,
+                                  0,
+                                  NULL,
+                                  NULL,
+                                  NULL,
+                                  1,
+                                  &sync_prim->srv_ws->sync_block_handle,
+                                  &sync_prim->offset,
+                                  &sync_prim->value,
+                                  0,
+                                  NULL,
+                                  NULL,
+                                  NULL,
+                                  sync_prim->srv_ws->sync_block_handle,
+                                  sync_prim->offset,
+                                  sync_prim->value,
+                                  geom_wait_syncobj ? srv_geom_syncobj->fd : -1,
+                                  srv_ctx->timeline_geom,
+                                  &fence_geom,
+                                  "GEOM",
+                                  frag_wait_syncobj ? srv_frag_syncobj->fd : -1,
+                                  srv_ctx->timeline_frag,
+                                  &fence_frag,
+                                  "FRAG",
+                                  sizeof(geom_cmd),
+                                  (uint8_t *)&geom_cmd,
+                                  /* Currently no support for PRs. */
+                                  0,
+                                  /* Currently no support for PRs. */
+                                  NULL,
+                                  sizeof(frag_cmd),
+                                  (uint8_t *)&frag_cmd,
+                                  submit_info->job_num,
+                                  true, /* Always kick the TA. */
+                                  true, /* Always kick a PR. */
+                                  submit_info->run_frag,
+                                  false,
+                                  0,
+                                  rt_data_handle,
+                                  /* Currently no support for PRs. */
+                                  NULL,
+                                  /* Currently no support for PRs. */
+                                  NULL,
+                                  sync_pmr_count,
+                                  sync_pmr_count ? sync_pmr_flags : NULL,
+                                  sync_pmr_count ? sync_pmrs : NULL,
+                                  0,
+                                  0,
+                                  0,
+                                  0,
+                                  0);
+   } while (result == VK_NOT_READY);
+
+   if (result != VK_SUCCESS)
+      goto err_destroy_wait_syncobjs;
+
+   /* Given job submission succeeded, we don't need to close wait fences, these
+    * should be consumed by the render job itself.
+    */
+   if (geom_wait_syncobj)
+      srv_geom_syncobj->fd = -1;
+
+   if (frag_wait_syncobj)
+      srv_frag_syncobj->fd = -1;
+
+   if (fence_geom != -1) {
+      result =
+         pvr_srv_winsys_syncobj_create(ctx->ws, false, &geom_signal_syncobj);
+      if (result != VK_SUCCESS)
+         goto err_destroy_wait_syncobjs;
+
+      pvr_srv_set_syncobj_payload(geom_signal_syncobj, fence_geom);
+   }
+
+   if (fence_frag != -1) {
+      result =
+         pvr_srv_winsys_syncobj_create(ctx->ws, false, &frag_signal_syncobj);
+      if (result != VK_SUCCESS) {
+         if (geom_signal_syncobj)
+            pvr_srv_winsys_syncobj_destroy(geom_signal_syncobj);
+         goto err_destroy_wait_syncobjs;
+      }
+
+      pvr_srv_set_syncobj_payload(frag_signal_syncobj, fence_frag);
+   }
+
+   *syncobj_geom_out = geom_signal_syncobj;
+   *syncobj_frag_out = frag_signal_syncobj;
+
+err_destroy_wait_syncobjs:
+   if (geom_wait_syncobj)
+      pvr_srv_winsys_syncobj_destroy(geom_wait_syncobj);
+
+   if (frag_wait_syncobj)
+      pvr_srv_winsys_syncobj_destroy(frag_wait_syncobj);
+
+   return result;
+}
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_job_render.h
new file mode 100644 (file)
index 0000000..bae71f6
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_JOB_RENDER_H
+#define PVR_SRV_JOB_RENDER_H
+
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+struct pvr_winsys;
+struct pvr_winsys_free_list;
+struct pvr_winsys_render_ctx;
+struct pvr_winsys_render_ctx_create_info;
+struct pvr_winsys_render_submit_info;
+struct pvr_winsys_rt_dataset;
+struct pvr_winsys_rt_dataset_create_info;
+struct pvr_winsys_syncobj;
+struct pvr_winsys_vma;
+
+/*******************************************
+   Function prototypes
+ *******************************************/
+
+VkResult pvr_srv_winsys_free_list_create(
+   struct pvr_winsys *ws,
+   struct pvr_winsys_vma *free_list_vma,
+   uint32_t initial_num_pages,
+   uint32_t max_num_pages,
+   uint32_t grow_num_pages,
+   uint32_t grow_threshold,
+   struct pvr_winsys_free_list *parent_free_list,
+   struct pvr_winsys_free_list **const free_list_out);
+void pvr_srv_winsys_free_list_destroy(struct pvr_winsys_free_list *free_list);
+
+VkResult pvr_srv_render_target_dataset_create(
+   struct pvr_winsys *ws,
+   const struct pvr_winsys_rt_dataset_create_info *create_info,
+   struct pvr_winsys_rt_dataset **const rt_dataset_out);
+void pvr_srv_render_target_dataset_destroy(
+   struct pvr_winsys_rt_dataset *rt_dataset);
+
+VkResult pvr_srv_winsys_render_ctx_create(
+   struct pvr_winsys *ws,
+   struct pvr_winsys_render_ctx_create_info *create_info,
+   struct pvr_winsys_render_ctx **const ctx_out);
+void pvr_srv_winsys_render_ctx_destroy(struct pvr_winsys_render_ctx *ctx);
+
+VkResult pvr_srv_winsys_render_submit(
+   const struct pvr_winsys_render_ctx *ctx,
+   const struct pvr_winsys_render_submit_info *submit_info,
+   struct pvr_winsys_syncobj **const syncobj_geom_out,
+   struct pvr_winsys_syncobj **const syncobj_frag_out);
+
+#endif /* PVR_SRV_JOB_RENDER_H */
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_public.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_public.h
new file mode 100644 (file)
index 0000000..22c088e
--- /dev/null
@@ -0,0 +1,35 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_PUBLIC_H
+#define PVR_SRV_PUBLIC_H
+
+#include <vulkan/vulkan.h>
+
+#include "pvr_winsys.h"
+
+struct pvr_winsys *pvr_srv_winsys_create(int master_fd,
+                                         int render_fd,
+                                         const VkAllocationCallbacks *alloc);
+
+#endif /* PVR_SRV_PUBLIC_H */
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_syncobj.c b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_syncobj.c
new file mode 100644 (file)
index 0000000..a25601a
--- /dev/null
@@ -0,0 +1,349 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "pvr_private.h"
+#include "pvr_srv.h"
+#include "pvr_srv_syncobj.h"
+#include "pvr_winsys.h"
+#include "util/libsync.h"
+#include "util/macros.h"
+#include "util/timespec.h"
+#include "vk_alloc.h"
+#include "vk_log.h"
+
+VkResult
+pvr_srv_winsys_syncobj_create(struct pvr_winsys *ws,
+                              bool signaled,
+                              struct pvr_winsys_syncobj **const syncobj_out)
+{
+   struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+   struct pvr_srv_winsys_syncobj *srv_syncobj;
+
+   srv_syncobj = vk_alloc(srv_ws->alloc,
+                          sizeof(*srv_syncobj),
+                          8,
+                          VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
+   if (!srv_syncobj)
+      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   srv_syncobj->base.ws = ws;
+   srv_syncobj->signaled = signaled;
+   srv_syncobj->fd = -1;
+
+   *syncobj_out = &srv_syncobj->base;
+
+   return VK_SUCCESS;
+}
+
+void pvr_srv_winsys_syncobj_destroy(struct pvr_winsys_syncobj *syncobj)
+{
+   struct pvr_srv_winsys_syncobj *srv_syncobj;
+   struct pvr_srv_winsys *srv_ws;
+
+   assert(syncobj);
+
+   srv_ws = to_pvr_srv_winsys(syncobj->ws);
+   srv_syncobj = to_pvr_srv_winsys_syncobj(syncobj);
+
+   if (srv_syncobj->fd != -1)
+      close(srv_syncobj->fd);
+
+   vk_free(srv_ws->alloc, srv_syncobj);
+}
+
+/* Note: function closes the fd. */
+static void pvr_set_syncobj_state(struct pvr_srv_winsys_syncobj *srv_syncobj,
+                                  bool signaled)
+{
+   if (srv_syncobj->fd != -1) {
+      close(srv_syncobj->fd);
+      srv_syncobj->fd = -1;
+   }
+
+   srv_syncobj->signaled = signaled;
+}
+
+void pvr_srv_set_syncobj_payload(struct pvr_winsys_syncobj *syncobj,
+                                 int payload)
+{
+   struct pvr_srv_winsys_syncobj *srv_syncobj =
+      to_pvr_srv_winsys_syncobj(syncobj);
+
+   if (srv_syncobj->fd != -1)
+      close(srv_syncobj->fd);
+
+   srv_syncobj->fd = payload;
+   /* FIXME: Is this valid? */
+   srv_syncobj->signaled = (payload == -1);
+}
+
+VkResult
+pvr_srv_winsys_syncobjs_reset(struct pvr_winsys *ws,
+                              struct pvr_winsys_syncobj **const syncobjs,
+                              uint32_t count)
+{
+   for (uint32_t i = 0; i < count; i++) {
+      struct pvr_srv_winsys_syncobj *srv_syncobj;
+
+      if (!syncobjs[i])
+         continue;
+
+      srv_syncobj = to_pvr_srv_winsys_syncobj(syncobjs[i]);
+      pvr_set_syncobj_state(srv_syncobj, false);
+   }
+
+   return VK_SUCCESS;
+}
+
+VkResult
+pvr_srv_winsys_syncobjs_signal(struct pvr_winsys *ws,
+                               struct pvr_winsys_syncobj **const syncobjs,
+                               uint32_t count)
+{
+   for (uint32_t i = 0; i < count; i++) {
+      struct pvr_srv_winsys_syncobj *srv_syncobj;
+
+      if (!syncobjs[i])
+         continue;
+
+      srv_syncobj = to_pvr_srv_winsys_syncobj(syncobjs[i]);
+      pvr_set_syncobj_state(srv_syncobj, true);
+   }
+
+   return VK_SUCCESS;
+}
+
+/* Careful, timeout might overflow. */
+static inline void pvr_start_timeout(struct timespec *timeout,
+                                     uint64_t timeout_ns)
+{
+   clock_gettime(CLOCK_MONOTONIC, timeout);
+   timespec_add_nsec(timeout, timeout, timeout_ns);
+}
+
+/* Careful, a negative value might be returned. */
+static inline struct timespec
+pvr_get_remaining_time(const struct timespec *timeout)
+{
+   struct timespec time;
+
+   clock_gettime(CLOCK_MONOTONIC, &time);
+   timespec_sub(&time, timeout, &time);
+
+   return time;
+}
+
+/* timeout == 0 -> Get status without waiting.
+ * timeout == ~0 -> Wait infinitely
+ * else wait for the given timeout in nanoseconds. */
+VkResult
+pvr_srv_winsys_syncobjs_wait(struct pvr_winsys *ws,
+                             struct pvr_winsys_syncobj **const syncobjs,
+                             uint32_t count,
+                             bool wait_all,
+                             uint64_t timeout)
+{
+   const struct pvr_srv_winsys *srv_ws = to_pvr_srv_winsys(ws);
+   uint32_t unsignaled_count = 0U;
+   struct timespec end_time;
+   struct pollfd *poll_fds;
+   VkResult result;
+   int ppoll_ret;
+
+   if (timeout != 0U && timeout != ~0U) {
+      /* We don't worry about overflow since ppoll() returns EINVAL on
+       * negative timeout.
+       */
+      pvr_start_timeout(&end_time, timeout);
+   }
+
+   poll_fds = vk_alloc(srv_ws->alloc,
+                       sizeof(*poll_fds) * count,
+                       8,
+                       VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
+   if (!poll_fds)
+      return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+
+   for (uint32_t i = 0; i < count; i++) {
+      struct pvr_srv_winsys_syncobj *srv_syncobj =
+         to_pvr_srv_winsys_syncobj(syncobjs[i]);
+
+      /* -1 in case if fence is signaled or uninitialized, ppoll will skip the
+       * fence.
+       */
+      if (!srv_syncobj || srv_syncobj->signaled || srv_syncobj->fd == -1) {
+         poll_fds[i].fd = -1;
+      } else {
+         poll_fds[i].fd = srv_syncobj->fd;
+         unsignaled_count++;
+      }
+
+      poll_fds[i].events = POLLIN;
+      poll_fds[i].revents = 0U;
+   }
+
+   if (unsignaled_count == 0U) {
+      result = VK_SUCCESS;
+      goto end_wait_for_fences;
+   }
+
+   /* TODO: Implement device loss handling like anvil: reporting the loss
+    * save the reported status, maybe abort() on env flag, etc.
+    */
+
+   do {
+      if (timeout == ~0U) {
+         ppoll_ret = ppoll(poll_fds, count, NULL, NULL);
+      } else {
+         struct timespec remaining_time;
+
+         if (timeout == 0U) {
+            remaining_time = (struct timespec){ 0UL, 0UL };
+         } else {
+            /* ppoll() returns EINVAL on negative timeout. Nothing to worry.
+             */
+            remaining_time = pvr_get_remaining_time(&end_time);
+         }
+
+         ppoll_ret = ppoll(poll_fds, count, &remaining_time, NULL);
+      }
+
+      if (ppoll_ret > 0U) {
+         /* ppoll_ret contains the amount of structs updated by poll(). */
+         unsignaled_count -= ppoll_ret;
+
+         /* ppoll_ret > 0 is for early loop termination. */
+         for (uint32_t i = 0; ppoll_ret > 0 && i < count; i++) {
+            struct pvr_srv_winsys_syncobj *srv_syncobj;
+
+            if (poll_fds[i].revents == 0)
+               continue;
+
+            if (poll_fds[i].revents & (POLLNVAL | POLLERR)) {
+               result = vk_error(NULL, VK_ERROR_DEVICE_LOST);
+               goto end_wait_for_fences;
+            }
+
+            srv_syncobj = to_pvr_srv_winsys_syncobj(syncobjs[i]);
+            pvr_set_syncobj_state(srv_syncobj, true);
+
+            if (!wait_all) {
+               result = VK_SUCCESS;
+               goto end_wait_for_fences;
+            }
+
+            /* -1 makes ppoll ignore it and set revents to 0. */
+            poll_fds[i].fd = -1;
+            ppoll_ret--;
+         }
+
+         /* For zero timeout, just return even if we still have unsignaled
+          * fences.
+          */
+         if (timeout == 0U && unsignaled_count != 0U) {
+            result = VK_TIMEOUT;
+            goto end_wait_for_fences;
+         }
+      } else if (ppoll_ret == 0) {
+         result = VK_TIMEOUT;
+         goto end_wait_for_fences;
+      }
+
+      /* Careful as we might have decremented ppoll_ret to 0. */
+   } while ((ppoll_ret != -1 && unsignaled_count != 0) ||
+            (ppoll_ret == -1 && (errno == EINTR || errno == EAGAIN)));
+
+   /* We assume device loss in case of an unknown error or invalid fd. */
+   if (ppoll_ret != -1)
+      result = VK_SUCCESS;
+   else if (errno == EINVAL)
+      result = VK_TIMEOUT;
+   else if (errno == ENOMEM)
+      result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+   else
+      result = vk_error(NULL, VK_ERROR_DEVICE_LOST);
+
+end_wait_for_fences:
+   vk_free(srv_ws->alloc, poll_fds);
+
+   return result;
+}
+
+VkResult pvr_srv_winsys_syncobjs_merge(struct pvr_winsys_syncobj *src,
+                                       struct pvr_winsys_syncobj *target,
+                                       struct pvr_winsys_syncobj **syncobj_out)
+{
+   struct pvr_srv_winsys_syncobj *srv_target =
+      to_pvr_srv_winsys_syncobj(target);
+   struct pvr_srv_winsys_syncobj *srv_src = to_pvr_srv_winsys_syncobj(src);
+   struct pvr_srv_winsys_syncobj *srv_output;
+   struct pvr_winsys_syncobj *output = NULL;
+   VkResult result;
+
+   if (!srv_src || srv_src->fd == -1) {
+      *syncobj_out = target;
+      return VK_SUCCESS;
+   }
+
+   result = pvr_srv_winsys_syncobj_create(src->ws, false, &output);
+   if (result != VK_SUCCESS)
+      return result;
+
+   srv_output = to_pvr_srv_winsys_syncobj(output);
+
+   if (!srv_target || srv_target->fd == -1) {
+      int fd = dup(srv_src->fd);
+      if (fd < 0) {
+         result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+         goto err_syncobj_destroy;
+      }
+
+      pvr_srv_set_syncobj_payload(output, fd);
+      if (target)
+         pvr_srv_winsys_syncobj_destroy(target);
+      *syncobj_out = output;
+      return VK_SUCCESS;
+   }
+
+   srv_output->fd = sync_merge("", srv_src->fd, srv_target->fd);
+   if (srv_output->fd < 0) {
+      result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
+      goto err_syncobj_destroy;
+   }
+
+   pvr_srv_winsys_syncobj_destroy(target);
+
+   *syncobj_out = output;
+
+   return VK_SUCCESS;
+
+err_syncobj_destroy:
+   pvr_srv_winsys_syncobj_destroy(output);
+
+   return result;
+}
diff --git a/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_syncobj.h b/src/imagination/vulkan/winsys/pvrsrvkm/pvr_srv_syncobj.h
new file mode 100644 (file)
index 0000000..857d7ef
--- /dev/null
@@ -0,0 +1,76 @@
+/*
+ * Copyright © 2022 Imagination Technologies Ltd.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef PVR_SRV_SYNCOBJ_H
+#define PVR_SRV_SYNCOBJ_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include <vulkan/vulkan.h>
+
+#include "pvr_winsys.h"
+#include "util/macros.h"
+
+struct pvr_srv_winsys_syncobj {
+   struct pvr_winsys_syncobj base;
+
+   /* Cached version of completion. */
+   bool signaled;
+
+   int fd;
+};
+
+#define to_pvr_srv_winsys_syncobj(syncobj) \
+   container_of(syncobj, struct pvr_srv_winsys_syncobj, base)
+
+/*******************************************
+   function prototypes
+ *******************************************/
+
+VkResult
+pvr_srv_winsys_syncobj_create(struct pvr_winsys *ws,
+                              bool signaled,
+                              struct pvr_winsys_syncobj **const syncobj_out);
+void pvr_srv_winsys_syncobj_destroy(struct pvr_winsys_syncobj *syncobj);
+VkResult
+pvr_srv_winsys_syncobjs_reset(struct pvr_winsys *ws,
+                              struct pvr_winsys_syncobj **const syncobjs,
+                              uint32_t count);
+VkResult
+pvr_srv_winsys_syncobjs_signal(struct pvr_winsys *ws,
+                               struct pvr_winsys_syncobj **const syncobjs,
+                               uint32_t count);
+VkResult
+pvr_srv_winsys_syncobjs_wait(struct pvr_winsys *ws,
+                             struct pvr_winsys_syncobj **const syncobjs,
+                             uint32_t count,
+                             bool wait_all,
+                             uint64_t timeout);
+VkResult pvr_srv_winsys_syncobjs_merge(struct pvr_winsys_syncobj *src,
+                                       struct pvr_winsys_syncobj *target,
+                                       struct pvr_winsys_syncobj **out);
+
+void pvr_srv_set_syncobj_payload(struct pvr_winsys_syncobj *syncobj,
+                                 int payload);
+
+#endif /* PVR_SRV_SYNCOBJ_H */
index eac75a3..5104e18 100644 (file)
@@ -94,6 +94,9 @@ endif
 if with_gallium_freedreno or with_freedreno_vk or with_tools.contains('freedreno')
   subdir('freedreno')
 endif
+if with_imagination_vk
+  subdir('imagination')
+endif
 if with_gallium_panfrost or with_gallium_lima or with_panfrost_vk or with_tools.contains('panfrost')
   subdir('panfrost')
 endif