2 * Copyright © 2019 Raspberry Pi Ltd
4 * based in part on anv driver which is:
5 * Copyright © 2015 Intel Corporation
7 * based in part on radv driver which is:
8 * Copyright © 2016 Red Hat.
9 * Copyright © 2016 Bas Nieuwenhuizen
11 * Permission is hereby granted, free of charge, to any person obtaining a
12 * copy of this software and associated documentation files (the "Software"),
13 * to deal in the Software without restriction, including without limitation
14 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
15 * and/or sell copies of the Software, and to permit persons to whom the
16 * Software is furnished to do so, subject to the following conditions:
18 * The above copyright notice and this permission notice (including the next
19 * paragraph) shall be included in all copies or substantial portions of the
22 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
25 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
30 #ifndef V3DV_PRIVATE_H
31 #define V3DV_PRIVATE_H
35 #include <vulkan/vulkan.h>
36 #include <vulkan/vk_icd.h>
37 #include <vk_enum_to_str.h>
39 #include "vk_device.h"
40 #include "vk_format.h"
41 #include "vk_instance.h"
44 #include "vk_physical_device.h"
45 #include "vk_shader_module.h"
49 #include "vk_command_buffer.h"
50 #include "vk_command_pool.h"
60 #define VG(x) ((void)0)
63 #include "v3dv_limits.h"
65 #include "common/v3d_device_info.h"
66 #include "common/v3d_limits.h"
67 #include "common/v3d_tiling.h"
68 #include "common/v3d_util.h"
70 #include "compiler/shader_enums.h"
71 #include "compiler/spirv/nir_spirv.h"
73 #include "compiler/v3d_compiler.h"
75 #include "vk_debug_report.h"
77 #include "util/hash_table.h"
78 #include "util/sparse_array.h"
79 #include "util/xmlconfig.h"
82 #include "v3dv_entrypoints.h"
85 #include "drm-uapi/v3d_drm.h"
88 #include "simulator/v3d_simulator.h"
92 #include "wsi_common.h"
94 /* A non-fatal assert. Useful for debugging. */
96 #define v3dv_assert(x) ({ \
98 fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \
101 #define v3dv_assert(x)
104 #define perf_debug(...) do { \
105 if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \
106 fprintf(stderr, __VA_ARGS__); \
109 struct v3dv_instance;
111 #ifdef USE_V3D_SIMULATOR
112 #define using_v3d_simulator true
114 #define using_v3d_simulator false
117 struct v3d_simulator_file;
119 /* Minimum required by the Vulkan 1.1 spec */
120 #define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)
122 struct v3dv_physical_device {
123 struct vk_physical_device vk;
130 /* We need these because it is not clear how to detect
131 * valid devids in a portable way
139 uint8_t driver_build_sha1[20];
140 uint8_t pipeline_cache_uuid[VK_UUID_SIZE];
141 uint8_t device_uuid[VK_UUID_SIZE];
142 uint8_t driver_uuid[VK_UUID_SIZE];
144 struct vk_sync_type drm_syncobj_type;
145 const struct vk_sync_type *sync_types[2];
147 struct disk_cache *disk_cache;
151 struct wsi_device wsi_device;
153 VkPhysicalDeviceMemoryProperties memory;
155 struct v3d_device_info devinfo;
157 struct v3d_simulator_file *sim_file;
159 const struct v3d_compiler *compiler;
160 uint32_t next_program_id;
162 /* This array holds all our 'struct v3dv_bo' allocations. We use this
163 * so we can add a refcount to our BOs and check if a particular BO
164 * was already allocated in this device using its GEM handle. This is
165 * necessary to properly manage BO imports, because the kernel doesn't
166 * refcount the underlying BO memory.
168 * Specifically, when self-importing (i.e. importing a BO into the same
169 * device that created it), the kernel will give us the same BO handle
170 * for both BOs and we must only free it once when both references are
171 * freed. Otherwise, if we are not self-importing, we get two differnt BO
172 * handles, and we want to free each one individually.
174 * The BOs in this map all have a refcnt with the referece counter and
175 * only self-imported BOs will ever have a refcnt > 1.
177 struct util_sparse_array bo_map;
188 VkResult v3dv_physical_device_acquire_display(struct v3dv_instance *instance,
189 struct v3dv_physical_device *pdevice,
190 VkIcdSurfaceBase *surface);
192 static inline struct v3dv_bo *
193 v3dv_device_lookup_bo(struct v3dv_physical_device *device, uint32_t handle)
195 return (struct v3dv_bo *) util_sparse_array_get(&device->bo_map, handle);
198 VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);
199 void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);
200 struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,
203 void v3dv_meta_clear_init(struct v3dv_device *device);
204 void v3dv_meta_clear_finish(struct v3dv_device *device);
206 void v3dv_meta_blit_init(struct v3dv_device *device);
207 void v3dv_meta_blit_finish(struct v3dv_device *device);
209 void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);
210 void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);
212 bool v3dv_meta_can_use_tlb(struct v3dv_image *image,
213 const VkOffset3D *offset,
214 VkFormat *compat_format);
216 struct v3dv_instance {
217 struct vk_instance vk;
219 int physicalDeviceCount;
220 struct v3dv_physical_device physicalDevice;
222 bool pipeline_cache_enabled;
223 bool default_pipeline_cache_enabled;
226 /* FIXME: In addition to tracking the last job submitted by GPU queue (cl, csd,
227 * tfu), we still need a syncobj to track the last overall job submitted
228 * (V3DV_QUEUE_ANY) for the case we don't support multisync. Someday we can
229 * start expecting multisync to be present and drop the legacy implementation
230 * together with this V3DV_QUEUE_ANY tracker.
232 enum v3dv_queue_type {
240 /* For each GPU queue, we use a syncobj to track the last job submitted. We
241 * set the flag `first` to determine when we are starting a new cmd buffer
242 * batch and therefore a job submitted to a given queue will be the first in a
245 struct v3dv_last_job_sync {
246 /* If the job is the first submitted to a GPU queue in a cmd buffer batch */
247 bool first[V3DV_QUEUE_COUNT];
248 /* Array of syncobj to track the last job submitted to a GPU queue */
249 uint32_t syncs[V3DV_QUEUE_COUNT];
255 struct v3dv_device *device;
257 struct v3dv_last_job_sync last_job_syncs;
259 struct v3dv_job *noop_job;
262 VkResult v3dv_queue_driver_submit(struct vk_queue *vk_queue,
263 struct vk_queue_submit *submit);
265 #define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t))
266 #define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \
267 sizeof(VkComponentMapping))
269 struct v3dv_meta_color_clear_pipeline {
276 struct v3dv_meta_depth_clear_pipeline {
281 struct v3dv_meta_blit_pipeline {
284 VkRenderPass pass_no_load;
285 uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
288 struct v3dv_meta_texel_buffer_copy_pipeline {
291 VkRenderPass pass_no_load;
292 uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
295 struct v3dv_pipeline_key {
296 bool robust_buffer_access;
298 uint8_t logicop_func;
300 bool sample_coverage;
301 bool sample_alpha_to_coverage;
302 bool sample_alpha_to_one;
305 enum pipe_format format;
307 } color_fmt[V3D_MAX_DRAW_BUFFERS];
308 uint8_t f32_color_rb;
309 uint32_t va_swap_rb_mask;
313 struct v3dv_pipeline_cache_stats {
319 /* Equivalent to gl_shader_stage, but including the coordinate shaders
321 * FIXME: perhaps move to common
323 enum broadcom_shader_stage {
324 BROADCOM_SHADER_VERTEX,
325 BROADCOM_SHADER_VERTEX_BIN,
326 BROADCOM_SHADER_GEOMETRY,
327 BROADCOM_SHADER_GEOMETRY_BIN,
328 BROADCOM_SHADER_FRAGMENT,
329 BROADCOM_SHADER_COMPUTE,
332 #define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)
334 /* Assumes that coordinate shaders will be custom-handled by the caller */
335 static inline enum broadcom_shader_stage
336 gl_shader_stage_to_broadcom(gl_shader_stage stage)
339 case MESA_SHADER_VERTEX:
340 return BROADCOM_SHADER_VERTEX;
341 case MESA_SHADER_GEOMETRY:
342 return BROADCOM_SHADER_GEOMETRY;
343 case MESA_SHADER_FRAGMENT:
344 return BROADCOM_SHADER_FRAGMENT;
345 case MESA_SHADER_COMPUTE:
346 return BROADCOM_SHADER_COMPUTE;
348 unreachable("Unknown gl shader stage");
352 static inline gl_shader_stage
353 broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)
356 case BROADCOM_SHADER_VERTEX:
357 case BROADCOM_SHADER_VERTEX_BIN:
358 return MESA_SHADER_VERTEX;
359 case BROADCOM_SHADER_GEOMETRY:
360 case BROADCOM_SHADER_GEOMETRY_BIN:
361 return MESA_SHADER_GEOMETRY;
362 case BROADCOM_SHADER_FRAGMENT:
363 return MESA_SHADER_FRAGMENT;
364 case BROADCOM_SHADER_COMPUTE:
365 return MESA_SHADER_COMPUTE;
367 unreachable("Unknown broadcom shader stage");
372 broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)
375 case BROADCOM_SHADER_VERTEX_BIN:
376 case BROADCOM_SHADER_GEOMETRY_BIN:
384 broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)
387 case BROADCOM_SHADER_VERTEX:
388 case BROADCOM_SHADER_GEOMETRY:
395 static inline enum broadcom_shader_stage
396 broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)
399 case BROADCOM_SHADER_VERTEX:
400 return BROADCOM_SHADER_VERTEX_BIN;
401 case BROADCOM_SHADER_GEOMETRY:
402 return BROADCOM_SHADER_GEOMETRY_BIN;
404 unreachable("Invalid shader stage");
408 static inline const char *
409 broadcom_shader_stage_name(enum broadcom_shader_stage stage)
412 case BROADCOM_SHADER_VERTEX_BIN:
413 return "MESA_SHADER_VERTEX_BIN";
414 case BROADCOM_SHADER_GEOMETRY_BIN:
415 return "MESA_SHADER_GEOMETRY_BIN";
417 return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));
421 struct v3dv_pipeline_cache {
422 struct vk_object_base base;
424 struct v3dv_device *device;
427 struct hash_table *nir_cache;
428 struct v3dv_pipeline_cache_stats nir_stats;
430 struct hash_table *cache;
431 struct v3dv_pipeline_cache_stats stats;
433 /* For VK_EXT_pipeline_creation_cache_control. */
434 bool externally_synchronized;
440 struct v3dv_instance *instance;
441 struct v3dv_physical_device *pdevice;
443 struct v3d_device_info devinfo;
444 struct v3dv_queue queue;
446 /* Guards query->maybe_available and value for timestamps */
449 /* Signaled whenever a query is ended */
452 /* Resources used for meta operations */
456 VkPipelineLayout p_layout;
457 struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */
460 VkPipelineLayout p_layout;
461 struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */
464 VkDescriptorSetLayout ds_layout;
465 VkPipelineLayout p_layout;
466 struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */
469 VkDescriptorSetLayout ds_layout;
470 VkPipelineLayout p_layout;
471 struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */
475 struct v3dv_bo_cache {
476 /** List of struct v3d_bo freed, by age. */
477 struct list_head time_list;
478 /** List of struct v3d_bo freed, per size, by age. */
479 struct list_head *size_list;
480 uint32_t size_list_size;
485 uint32_t cache_count;
486 uint32_t max_cache_size;
492 struct v3dv_pipeline_cache default_pipeline_cache;
494 /* GL_SHADER_STATE_RECORD needs to speficy default attribute values. The
495 * following covers the most common case, that is all attributes format
496 * being float being float, allowing us to reuse the same BO for all
497 * pipelines matching this requirement. Pipelines that need integer
498 * attributes will create their own BO.
500 struct v3dv_bo *default_attribute_float;
501 VkPhysicalDeviceFeatures features;
506 V3DV_GRALLOC_UNKNOWN,
513 struct v3dv_device_memory {
514 struct vk_object_base base;
517 const VkMemoryType *type;
521 #define V3D_OUTPUT_IMAGE_FORMAT_NO 255
522 #define TEXTURE_DATA_FORMAT_NO 255
527 /* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */
530 /* One of V3D33_TEXTURE_DATA_FORMAT_*. */
533 /* Swizzle to apply to the RGBA shader output for storing to the tile
534 * buffer, to the RGBA tile buffer to produce shader input (for
535 * blending), and for turning the rgba8888 texture sampler return
536 * value into shader rgba values.
540 /* Whether the return value is 16F/I/UI or 32F/I/UI. */
543 /* If the format supports (linear) filtering when texturing. */
544 bool supports_filtering;
547 struct v3d_resource_slice {
550 uint32_t padded_height;
551 /* Size of a single pane of the slice. For 3D textures, there will be
552 * a number of panes equal to the minified, power-of-two-aligned
557 enum v3d_tiling_mode tiling;
558 uint32_t padded_height_of_output_image_in_uif_blocks;
561 bool v3dv_format_swizzle_needs_rb_swap(const uint8_t *swizzle);
562 bool v3dv_format_swizzle_needs_reverse(const uint8_t *swizzle);
567 const struct v3dv_format *format;
571 struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];
572 uint64_t size; /* Total size in bytes */
573 uint32_t cube_map_stride;
575 struct v3dv_device_memory *mem;
576 VkDeviceSize mem_offset;
580 /* Image is backed by VK_ANDROID_native_buffer, */
581 bool is_native_buffer_memory;
585 VkImageViewType v3dv_image_type_to_view_type(VkImageType type);
587 /* Pre-generating packets needs to consider changes in packet sizes across hw
588 * versions. Keep things simple and allocate enough space for any supported
589 * version. We ensure the size is large enough through static asserts.
591 #define V3DV_TEXTURE_SHADER_STATE_LENGTH 32
592 #define V3DV_SAMPLER_STATE_LENGTH 24
593 #define V3DV_BLEND_CFG_LENGTH 5
594 #define V3DV_CFG_BITS_LENGTH 4
595 #define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36
596 #define V3DV_VCM_CACHE_SIZE_LENGTH 2
597 #define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16
598 #define V3DV_STENCIL_CFG_LENGTH 6
600 struct v3dv_image_view {
601 struct vk_image_view vk;
603 const struct v3dv_format *format;
605 bool channel_reverse;
606 uint32_t internal_bpp;
607 uint32_t internal_type;
610 /* Precomputed (composed from createinfo->components and formar swizzle)
611 * swizzles to pass in to the shader key.
613 * This could be also included on the descriptor bo, but the shader state
614 * packet doesn't need it on a bo, so we can just avoid a memory copy
618 /* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info
619 * during UpdateDescriptorSets.
621 * Empirical tests show that cube arrays need a different shader state
622 * depending on whether they are used with a sampler or not, so for these
623 * we generate two states and select the one to use based on the descriptor
626 uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];
629 uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer);
632 struct vk_object_base base;
635 VkBufferUsageFlags usage;
638 struct v3dv_device_memory *mem;
639 VkDeviceSize mem_offset;
642 struct v3dv_buffer_view {
643 struct vk_object_base base;
645 struct v3dv_buffer *buffer;
648 const struct v3dv_format *format;
649 uint32_t internal_bpp;
650 uint32_t internal_type;
654 uint32_t num_elements;
656 /* Prepacked TEXTURE_SHADER_STATE. */
657 uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];
660 struct v3dv_subpass_attachment {
662 VkImageLayout layout;
665 struct v3dv_subpass {
666 uint32_t input_count;
667 struct v3dv_subpass_attachment *input_attachments;
669 uint32_t color_count;
670 struct v3dv_subpass_attachment *color_attachments;
671 struct v3dv_subpass_attachment *resolve_attachments;
673 struct v3dv_subpass_attachment ds_attachment;
674 struct v3dv_subpass_attachment ds_resolve_attachment;
675 bool resolve_depth, resolve_stencil;
677 /* If we need to emit the clear of the depth/stencil attachment using a
678 * a draw call instead of using the TLB (GFXH-1461).
680 bool do_depth_clear_with_draw;
681 bool do_stencil_clear_with_draw;
687 struct v3dv_render_pass_attachment {
688 VkAttachmentDescription2 desc;
690 uint32_t first_subpass;
691 uint32_t last_subpass;
693 /* When multiview is enabled, we no longer care about when a particular
694 * attachment is first or last used in a render pass, since not all views
695 * in the attachment will meet that criteria. Instead, we need to track
696 * each individual view (layer) in each attachment and emit our stores,
697 * loads and clears accordingly.
700 uint32_t first_subpass;
701 uint32_t last_subpass;
702 } views[MAX_MULTIVIEW_VIEW_COUNT];
704 /* If this is a multisampled attachment that is going to be resolved,
705 * whether we may be able to use the TLB hardware resolve based on the
708 bool try_tlb_resolve;
711 struct v3dv_render_pass {
712 struct vk_object_base base;
714 bool multiview_enabled;
716 uint32_t attachment_count;
717 struct v3dv_render_pass_attachment *attachments;
719 uint32_t subpass_count;
720 struct v3dv_subpass *subpasses;
722 struct v3dv_subpass_attachment *subpass_attachments;
725 struct v3dv_framebuffer {
726 struct vk_object_base base;
732 /* Typically, edge tiles in the framebuffer have padding depending on the
733 * underlying tiling layout. One consequnce of this is that when the
734 * framebuffer dimensions are not aligned to tile boundaries, tile stores
735 * would still write full tiles on the edges and write to the padded area.
736 * If the framebuffer is aliasing a smaller region of a larger image, then
737 * we need to be careful with this though, as we won't have padding on the
738 * edge tiles (which typically means that we need to load the tile buffer
741 bool has_edge_padding;
743 uint32_t attachment_count;
744 uint32_t color_attachment_count;
746 /* Notice that elements in 'attachments' will be NULL if the framebuffer
747 * was created imageless. The driver is expected to access attachment info
748 * from the command buffer state instead.
750 struct v3dv_image_view *attachments[0];
753 struct v3dv_frame_tiling {
757 uint32_t render_target_count;
758 uint32_t internal_bpp;
762 uint32_t tile_height;
763 uint32_t draw_tiles_x;
764 uint32_t draw_tiles_y;
765 uint32_t supertile_width;
766 uint32_t supertile_height;
767 uint32_t frame_width_in_supertiles;
768 uint32_t frame_height_in_supertiles;
771 bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
772 const VkRect2D *area,
773 struct v3dv_framebuffer *fb,
774 struct v3dv_render_pass *pass,
775 uint32_t subpass_idx);
777 /* Checks if we need to emit 2 initial tile clears for double buffer mode.
778 * This happens when we render at least 2 tiles, because in this mode each
779 * tile uses a different half of the tile buffer memory so we can have 2 tiles
780 * in flight (one being stored to memory and the next being rendered). In this
781 * scenario, if we emit a single initial tile clear we would only clear the
782 * first half of the tile buffer.
785 v3dv_do_double_initial_tile_clear(const struct v3dv_frame_tiling *tiling)
787 return tiling->double_buffer &&
788 (tiling->draw_tiles_x > 1 || tiling->draw_tiles_y > 1 ||
792 enum v3dv_cmd_buffer_status {
793 V3DV_CMD_BUFFER_STATUS_NEW = 0,
794 V3DV_CMD_BUFFER_STATUS_INITIALIZED = 1,
795 V3DV_CMD_BUFFER_STATUS_RECORDING = 2,
796 V3DV_CMD_BUFFER_STATUS_EXECUTABLE = 3
799 union v3dv_clear_value {
807 struct v3dv_cmd_buffer_attachment_state {
808 /* The original clear value as provided by the Vulkan API */
809 VkClearValue vk_clear_value;
811 /* The hardware clear value */
812 union v3dv_clear_value clear_value;
814 /* The underlying image view (from the framebuffer or, if imageless
815 * framebuffer is used, from VkRenderPassAttachmentBeginInfo.
817 struct v3dv_image_view *image_view;
819 /* If this is a multisampled attachment with a resolve operation. */
822 /* If this is a multisampled attachment with a resolve operation,
823 * whether we can use the TLB for the resolve.
825 bool use_tlb_resolve;
828 struct v3dv_viewport_state {
830 VkViewport viewports[MAX_VIEWPORTS];
831 float translate[MAX_VIEWPORTS][3];
832 float scale[MAX_VIEWPORTS][3];
835 struct v3dv_scissor_state {
837 VkRect2D scissors[MAX_SCISSORS];
840 /* Mostly a v3dv mapping of VkDynamicState, used to track which data as
843 enum v3dv_dynamic_state_bits {
844 V3DV_DYNAMIC_VIEWPORT = 1 << 0,
845 V3DV_DYNAMIC_SCISSOR = 1 << 1,
846 V3DV_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 2,
847 V3DV_DYNAMIC_STENCIL_WRITE_MASK = 1 << 3,
848 V3DV_DYNAMIC_STENCIL_REFERENCE = 1 << 4,
849 V3DV_DYNAMIC_BLEND_CONSTANTS = 1 << 5,
850 V3DV_DYNAMIC_DEPTH_BIAS = 1 << 6,
851 V3DV_DYNAMIC_LINE_WIDTH = 1 << 7,
852 V3DV_DYNAMIC_COLOR_WRITE_ENABLE = 1 << 8,
853 V3DV_DYNAMIC_ALL = (1 << 9) - 1,
856 /* Flags for dirty pipeline state.
858 enum v3dv_cmd_dirty_bits {
859 V3DV_CMD_DIRTY_VIEWPORT = 1 << 0,
860 V3DV_CMD_DIRTY_SCISSOR = 1 << 1,
861 V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK = 1 << 2,
862 V3DV_CMD_DIRTY_STENCIL_WRITE_MASK = 1 << 3,
863 V3DV_CMD_DIRTY_STENCIL_REFERENCE = 1 << 4,
864 V3DV_CMD_DIRTY_PIPELINE = 1 << 5,
865 V3DV_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 6,
866 V3DV_CMD_DIRTY_VERTEX_BUFFER = 1 << 7,
867 V3DV_CMD_DIRTY_INDEX_BUFFER = 1 << 8,
868 V3DV_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 9,
869 V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 10,
870 V3DV_CMD_DIRTY_PUSH_CONSTANTS = 1 << 11,
871 V3DV_CMD_DIRTY_BLEND_CONSTANTS = 1 << 12,
872 V3DV_CMD_DIRTY_OCCLUSION_QUERY = 1 << 13,
873 V3DV_CMD_DIRTY_DEPTH_BIAS = 1 << 14,
874 V3DV_CMD_DIRTY_LINE_WIDTH = 1 << 15,
875 V3DV_CMD_DIRTY_VIEW_INDEX = 1 << 16,
876 V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE = 1 << 17,
879 struct v3dv_dynamic_state {
881 * Bitmask of (1 << VK_DYNAMIC_STATE_*).
882 * Defines the set of saved dynamic state.
886 struct v3dv_viewport_state viewport;
888 struct v3dv_scissor_state scissor;
893 } stencil_compare_mask;
898 } stencil_write_mask;
905 float blend_constants[4];
908 float constant_factor;
909 float depth_bias_clamp;
915 uint32_t color_write_enable;
918 void v3dv_viewport_compute_xform(const VkViewport *viewport,
923 V3D_EZ_UNDECIDED = 0,
930 V3DV_JOB_TYPE_GPU_CL = 0,
931 V3DV_JOB_TYPE_GPU_CL_SECONDARY,
932 V3DV_JOB_TYPE_GPU_TFU,
933 V3DV_JOB_TYPE_GPU_CSD,
934 V3DV_JOB_TYPE_CPU_RESET_QUERIES,
935 V3DV_JOB_TYPE_CPU_END_QUERY,
936 V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,
937 V3DV_JOB_TYPE_CPU_SET_EVENT,
938 V3DV_JOB_TYPE_CPU_WAIT_EVENTS,
939 V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
940 V3DV_JOB_TYPE_CPU_CSD_INDIRECT,
941 V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,
944 struct v3dv_reset_query_cpu_job_info {
945 struct v3dv_query_pool *pool;
950 struct v3dv_end_query_cpu_job_info {
951 struct v3dv_query_pool *pool;
954 /* This is one unless multiview is used */
958 struct v3dv_copy_query_results_cpu_job_info {
959 struct v3dv_query_pool *pool;
962 struct v3dv_buffer *dst;
965 VkQueryResultFlags flags;
968 struct v3dv_submit_sync_info {
969 /* List of syncs to wait before running a job */
971 struct vk_sync_wait *waits;
973 /* List of syncs to signal when all jobs complete */
974 uint32_t signal_count;
975 struct vk_sync_signal *signals;
978 struct v3dv_event_set_cpu_job_info {
979 struct v3dv_event *event;
983 struct v3dv_event_wait_cpu_job_info {
984 /* List of events to wait on */
985 uint32_t event_count;
986 struct v3dv_event **events;
989 struct v3dv_copy_buffer_to_image_cpu_job_info {
990 struct v3dv_image *image;
991 struct v3dv_buffer *buffer;
992 uint32_t buffer_offset;
993 uint32_t buffer_stride;
994 uint32_t buffer_layer_stride;
995 VkOffset3D image_offset;
996 VkExtent3D image_extent;
999 uint32_t layer_count;
1002 struct v3dv_csd_indirect_cpu_job_info {
1003 struct v3dv_buffer *buffer;
1005 struct v3dv_job *csd_job;
1007 uint32_t *wg_uniform_offsets[3];
1008 bool needs_wg_uniform_rewrite;
1011 struct v3dv_timestamp_query_cpu_job_info {
1012 struct v3dv_query_pool *pool;
1015 /* This is one unless multiview is used */
1020 struct list_head list_link;
1022 /* We only create job clones when executing secondary command buffers into
1023 * primaries. These clones don't make deep copies of the original object
1024 * so we want to flag them to avoid freeing resources they don't own.
1028 enum v3dv_job_type type;
1030 struct v3dv_device *device;
1032 struct v3dv_cmd_buffer *cmd_buffer;
1036 struct v3dv_cl indirect;
1038 /* Set of all BOs referenced by the job. This will be used for making
1039 * the list of BOs that the kernel will need to have paged in to
1044 uint64_t bo_handle_mask;
1046 struct v3dv_bo *tile_alloc;
1047 struct v3dv_bo *tile_state;
1051 uint32_t first_subpass;
1053 /* When the current subpass is split into multiple jobs, this flag is set
1054 * to true for any jobs after the first in the same subpass.
1056 bool is_subpass_continue;
1058 /* If this job is the last job emitted for a subpass. */
1059 bool is_subpass_finish;
1061 struct v3dv_frame_tiling frame_tiling;
1063 enum v3dv_ez_state ez_state;
1064 enum v3dv_ez_state first_ez_state;
1066 /* If we have already decided if we need to disable Early Z/S completely
1069 bool decided_global_ez_enable;
1071 /* If this job has been configured to use early Z/S clear */
1072 bool early_zs_clear;
1074 /* Number of draw calls recorded into the job */
1075 uint32_t draw_count;
1077 /* A flag indicating whether we want to flush every draw separately. This
1078 * can be used for debugging, or for cases where special circumstances
1079 * require this behavior.
1083 /* Whether we need to serialize this job in our command stream */
1086 /* If this is a CL job, whether we should sync before binning */
1087 bool needs_bcl_sync;
1089 /* Job specs for CPU jobs */
1091 struct v3dv_reset_query_cpu_job_info query_reset;
1092 struct v3dv_end_query_cpu_job_info query_end;
1093 struct v3dv_copy_query_results_cpu_job_info query_copy_results;
1094 struct v3dv_event_set_cpu_job_info event_set;
1095 struct v3dv_event_wait_cpu_job_info event_wait;
1096 struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;
1097 struct v3dv_csd_indirect_cpu_job_info csd_indirect;
1098 struct v3dv_timestamp_query_cpu_job_info query_timestamp;
1101 /* Job specs for TFU jobs */
1102 struct drm_v3d_submit_tfu tfu;
1104 /* Job specs for CSD jobs */
1106 struct v3dv_bo *shared_memory;
1107 uint32_t wg_count[3];
1108 uint32_t wg_base[3];
1109 struct drm_v3d_submit_csd submit;
1113 struct v3dv_wait_thread_info {
1114 struct v3dv_job *job;
1116 /* Semaphores info for any postponed jobs after a wait event */
1117 struct v3dv_submit_sync_info *sync_info;
1120 void v3dv_job_init(struct v3dv_job *job,
1121 enum v3dv_job_type type,
1122 struct v3dv_device *device,
1123 struct v3dv_cmd_buffer *cmd_buffer,
1124 int32_t subpass_idx);
1125 void v3dv_job_destroy(struct v3dv_job *job);
1127 void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);
1128 void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);
1130 void v3dv_job_start_frame(struct v3dv_job *job,
1134 bool allocate_tile_state_for_all_layers,
1135 uint32_t render_target_count,
1136 uint8_t max_internal_bpp,
1139 bool v3dv_job_type_is_gpu(struct v3dv_job *job);
1142 v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,
1143 struct v3dv_cmd_buffer *cmd_buffer);
1145 struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,
1146 enum v3dv_job_type type,
1147 struct v3dv_cmd_buffer *cmd_buffer,
1148 uint32_t subpass_idx);
1151 v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,
1153 uint32_t used_count,
1154 uint32_t *alloc_count,
1157 void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer);
1159 /* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a
1160 * cmd_buffer specific header?
1162 struct v3dv_draw_info {
1163 uint32_t vertex_count;
1164 uint32_t instance_count;
1165 uint32_t first_vertex;
1166 uint32_t first_instance;
1169 struct v3dv_vertex_binding {
1170 struct v3dv_buffer *buffer;
1171 VkDeviceSize offset;
1174 struct v3dv_descriptor_state {
1175 struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];
1177 uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
1180 struct v3dv_cmd_pipeline_state {
1181 struct v3dv_pipeline *pipeline;
1183 struct v3dv_descriptor_state descriptor_state;
1186 struct v3dv_cmd_buffer_state {
1187 struct v3dv_render_pass *pass;
1188 struct v3dv_framebuffer *framebuffer;
1189 VkRect2D render_area;
1191 /* Current job being recorded */
1192 struct v3dv_job *job;
1194 uint32_t subpass_idx;
1196 struct v3dv_cmd_pipeline_state gfx;
1197 struct v3dv_cmd_pipeline_state compute;
1199 struct v3dv_dynamic_state dynamic;
1202 VkShaderStageFlagBits dirty_descriptor_stages;
1203 VkShaderStageFlagBits dirty_push_constants_stages;
1205 /* Current clip window. We use this to check whether we have an active
1206 * scissor, since in that case we can't use TLB clears and need to fallback
1209 VkRect2D clip_window;
1211 /* Whether our render area is aligned to tile boundaries. If this is false
1212 * then we have tiles that are only partially covered by the render area,
1213 * and therefore, we need to be careful with our loads and stores so we don't
1214 * modify pixels for the tile area that is not covered by the render area.
1215 * This means, for example, that we can't use the TLB to clear, since that
1216 * always clears full tiles.
1218 bool tile_aligned_render_area;
1220 uint32_t attachment_alloc_count;
1221 struct v3dv_cmd_buffer_attachment_state *attachments;
1223 struct v3dv_vertex_binding vertex_bindings[MAX_VBS];
1227 VkDeviceSize offset;
1231 /* Current uniforms */
1233 struct v3dv_cl_reloc vs_bin;
1234 struct v3dv_cl_reloc vs;
1235 struct v3dv_cl_reloc gs_bin;
1236 struct v3dv_cl_reloc gs;
1237 struct v3dv_cl_reloc fs;
1240 /* Current view index for multiview rendering */
1241 uint32_t view_index;
1243 /* Used to flag OOM conditions during command buffer recording */
1246 /* Whether we have recorded a pipeline barrier that we still need to
1250 bool has_bcl_barrier;
1252 /* Secondary command buffer state */
1254 bool occlusion_query_enable;
1257 /* Command buffer state saved during a meta operation */
1259 uint32_t subpass_idx;
1261 VkFramebuffer framebuffer;
1263 uint32_t attachment_alloc_count;
1264 uint32_t attachment_count;
1265 struct v3dv_cmd_buffer_attachment_state *attachments;
1267 bool tile_aligned_render_area;
1268 VkRect2D render_area;
1270 struct v3dv_dynamic_state dynamic;
1272 struct v3dv_cmd_pipeline_state gfx;
1273 bool has_descriptor_state;
1275 uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];
1278 /* Command buffer state for queries */
1280 /* A list of vkCmdQueryEnd commands recorded in the command buffer during
1281 * a render pass. We queue these here and then schedule the corresponding
1282 * CPU jobs for them at the time we finish the GPU job in which they have
1286 uint32_t used_count;
1287 uint32_t alloc_count;
1288 struct v3dv_end_query_cpu_job_info *states;
1291 /* This BO is not NULL if we have an active query, that is, we have
1292 * called vkCmdBeginQuery but not vkCmdEndQuery.
1301 /* The following struct represents the info from a descriptor that we store on
1302 * the host memory. They are mostly links to other existing vulkan objects,
1303 * like the image_view in order to access to swizzle info, or the buffer used
1304 * for a UBO/SSBO, for example.
1306 * FIXME: revisit if makes sense to just move everything that would be needed
1307 * from a descriptor to the bo.
1309 struct v3dv_descriptor {
1310 VkDescriptorType type;
1314 struct v3dv_image_view *image_view;
1315 struct v3dv_sampler *sampler;
1319 struct v3dv_buffer *buffer;
1324 struct v3dv_buffer_view *buffer_view;
1329 bool maybe_available;
1331 /* Used by GPU queries (occlusion) */
1336 /* Used by CPU queries (timestamp) */
1341 struct v3dv_query_pool {
1342 struct vk_object_base base;
1344 struct v3dv_bo *bo; /* Only used with GPU queries (occlusion) */
1346 VkQueryType query_type;
1347 uint32_t query_count;
1348 struct v3dv_query *queries;
1351 VkResult v3dv_get_query_pool_results_cpu(struct v3dv_device *device,
1352 struct v3dv_query_pool *pool,
1356 VkDeviceSize stride,
1357 VkQueryResultFlags flags);
1359 void v3dv_reset_query_pools(struct v3dv_device *device,
1360 struct v3dv_query_pool *query_pool,
1364 typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,
1366 VkAllocationCallbacks *alloc);
1367 struct v3dv_cmd_buffer_private_obj {
1368 struct list_head list_link;
1370 v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;
1373 struct v3dv_cmd_buffer {
1374 struct vk_command_buffer vk;
1376 struct v3dv_device *device;
1378 /* Used at submit time to link command buffers in the submission that have
1379 * spawned wait threads, so we can then wait on all of them to complete
1380 * before we process any signal sempahores or fences.
1382 struct list_head list_link;
1384 VkCommandBufferUsageFlags usage_flags;
1386 enum v3dv_cmd_buffer_status status;
1388 struct v3dv_cmd_buffer_state state;
1390 /* FIXME: we have just one client-side and bo for the push constants,
1391 * independently of the stageFlags in vkCmdPushConstants, and the
1392 * pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage
1393 * tunning in the future if it makes sense.
1395 uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];
1396 struct v3dv_cl_reloc push_constants_resource;
1398 /* Collection of Vulkan objects created internally by the driver (typically
1399 * during recording of meta operations) that are part of the command buffer
1400 * and should be destroyed with it.
1402 struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */
1404 /* Per-command buffer resources for meta operations. */
1407 /* The current descriptor pool for blit sources */
1408 VkDescriptorPool dspool;
1411 /* The current descriptor pool for texel buffer copy sources */
1412 VkDescriptorPool dspool;
1413 } texel_buffer_copy;
1416 /* List of jobs in the command buffer. For primary command buffers it
1417 * represents the jobs we want to submit to the GPU. For secondary command
1418 * buffers it represents jobs that will be merged into a primary command
1419 * buffer via vkCmdExecuteCommands.
1421 struct list_head jobs;
1424 struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,
1425 int32_t subpass_idx,
1426 enum v3dv_job_type type);
1427 void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);
1429 struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,
1430 uint32_t subpass_idx);
1431 struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,
1432 uint32_t subpass_idx);
1434 void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);
1436 void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,
1437 bool push_descriptor_state);
1438 void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,
1439 uint32_t dirty_dynamic_state,
1440 bool needs_subpass_resume);
1442 void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,
1443 struct v3dv_query_pool *pool,
1447 void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,
1448 struct v3dv_query_pool *pool,
1450 VkQueryControlFlags flags);
1452 void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,
1453 struct v3dv_query_pool *pool,
1456 void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,
1457 struct v3dv_query_pool *pool,
1460 struct v3dv_buffer *dst,
1463 VkQueryResultFlags flags);
1465 void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,
1466 struct drm_v3d_submit_tfu *tfu);
1468 void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info,
1469 const uint32_t *wg_counts);
1471 void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,
1473 v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);
1476 struct vk_object_base base;
1480 struct v3dv_shader_variant {
1481 enum broadcom_shader_stage stage;
1484 struct v3d_prog_data *base;
1485 struct v3d_vs_prog_data *vs;
1486 struct v3d_gs_prog_data *gs;
1487 struct v3d_fs_prog_data *fs;
1488 struct v3d_compute_prog_data *cs;
1491 /* We explicitly save the prog_data_size as it would make easier to
1494 uint32_t prog_data_size;
1496 /* The assembly for this variant will be uploaded to a BO shared with all
1497 * other shader stages in that pipeline. This is the offset in that BO.
1499 uint32_t assembly_offset;
1501 /* Note: it is really likely that qpu_insts would be NULL, as it will be
1502 * used only temporarily, to upload it to the shared bo, as we compile the
1503 * different stages individually.
1505 uint64_t *qpu_insts;
1506 uint32_t qpu_insts_size;
1510 * Per-stage info for each stage, useful so shader_module_compile_to_nir and
1511 * other methods doesn't have so many parameters.
1513 * FIXME: for the case of the coordinate shader and the vertex shader, module,
1514 * entrypoint, spec_info and nir are the same. There are also info only
1515 * relevant to some stages. But seemed too much a hassle to create a new
1516 * struct only to handle that. Revisit if such kind of info starts to grow.
1518 struct v3dv_pipeline_stage {
1519 struct v3dv_pipeline *pipeline;
1521 enum broadcom_shader_stage stage;
1523 const struct vk_shader_module *module;
1524 const char *entrypoint;
1525 const VkSpecializationInfo *spec_info;
1529 /* The following is the combined hash of module+entrypoint+spec_info+nir */
1530 unsigned char shader_sha1[20];
1532 /** A name for this program, so you can track it in shader-db output. */
1533 uint32_t program_id;
1535 VkPipelineCreationFeedbackEXT feedback;
1538 /* We are using the descriptor pool entry for two things:
1539 * * Track the allocated sets, so we can properly free it if needed
1540 * * Track the suballocated pool bo regions, so if some descriptor set is
1541 * freed, the gap could be reallocated later.
1543 * Those only make sense if the pool was not created with the flag
1544 * VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT
1546 struct v3dv_descriptor_pool_entry
1548 struct v3dv_descriptor_set *set;
1549 /* Offset and size of the subregion allocated for this entry from the
1556 struct v3dv_descriptor_pool {
1557 struct vk_object_base base;
1559 /* If this descriptor pool has been allocated for the driver for internal
1560 * use, typically to implement meta operations.
1562 bool is_driver_internal;
1565 /* Current offset at the descriptor bo. 0 means that we didn't use it for
1566 * any descriptor. If the descriptor bo is NULL, current offset is
1569 uint32_t current_offset;
1571 /* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the
1572 * descriptor sets are handled as a whole as pool memory and handled by the
1573 * following pointers. If set, they are not used, and individually
1574 * descriptor sets are allocated/freed.
1576 uint8_t *host_memory_base;
1577 uint8_t *host_memory_ptr;
1578 uint8_t *host_memory_end;
1580 uint32_t entry_count;
1581 uint32_t max_entry_count;
1582 struct v3dv_descriptor_pool_entry entries[0];
1585 struct v3dv_descriptor_set {
1586 struct vk_object_base base;
1588 struct v3dv_descriptor_pool *pool;
1590 struct v3dv_descriptor_set_layout *layout;
1592 /* Offset relative to the descriptor pool bo for this set */
1593 uint32_t base_offset;
1595 /* The descriptors below can be indexed (set/binding) using the set_layout
1597 struct v3dv_descriptor descriptors[0];
1600 struct v3dv_descriptor_set_binding_layout {
1601 VkDescriptorType type;
1603 /* Number of array elements in this binding */
1604 uint32_t array_size;
1606 /* Index into the flattend descriptor set */
1607 uint32_t descriptor_index;
1609 uint32_t dynamic_offset_count;
1610 uint32_t dynamic_offset_index;
1612 /* Offset into the descriptor set where this descriptor lives (final offset
1613 * on the descriptor bo need to take into account set->base_offset)
1615 uint32_t descriptor_offset;
1617 /* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 0
1618 * if there are no immutable samplers.
1620 uint32_t immutable_samplers_offset;
1623 struct v3dv_descriptor_set_layout {
1624 struct vk_object_base base;
1626 VkDescriptorSetLayoutCreateFlags flags;
1628 /* Number of bindings in this descriptor set */
1629 uint32_t binding_count;
1631 /* Total bo size needed for this descriptor set
1635 /* Shader stages affected by this descriptor set */
1636 uint16_t shader_stages;
1638 /* Number of descriptors in this descriptor set */
1639 uint32_t descriptor_count;
1641 /* Number of dynamic offsets used by this descriptor set */
1642 uint16_t dynamic_offset_count;
1644 /* Descriptor set layouts can be destroyed even if they are still being
1649 /* Bindings in this descriptor set */
1650 struct v3dv_descriptor_set_binding_layout binding[0];
1654 v3dv_descriptor_set_layout_destroy(struct v3dv_device *device,
1655 struct v3dv_descriptor_set_layout *set_layout);
1658 v3dv_descriptor_set_layout_ref(struct v3dv_descriptor_set_layout *set_layout)
1660 assert(set_layout && set_layout->ref_cnt >= 1);
1661 p_atomic_inc(&set_layout->ref_cnt);
1665 v3dv_descriptor_set_layout_unref(struct v3dv_device *device,
1666 struct v3dv_descriptor_set_layout *set_layout)
1668 assert(set_layout && set_layout->ref_cnt >= 1);
1669 if (p_atomic_dec_zero(&set_layout->ref_cnt))
1670 v3dv_descriptor_set_layout_destroy(device, set_layout);
1673 struct v3dv_pipeline_layout {
1674 struct vk_object_base base;
1677 struct v3dv_descriptor_set_layout *layout;
1678 uint32_t dynamic_offset_start;
1683 /* Shader stages that are declared to use descriptors from this layout */
1684 uint32_t shader_stages;
1686 uint32_t dynamic_offset_count;
1687 uint32_t push_constant_size;
1691 * We are using descriptor maps for ubo/ssbo and texture/samplers, so we need
1692 * it to be big enough to include the max value for all of them.
1694 * FIXME: one alternative would be to allocate the map as big as you need for
1695 * each descriptor type. That would means more individual allocations.
1697 #define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS, \
1698 MAX_UNIFORM_BUFFERS + MAX_INLINE_UNIFORM_BUFFERS, \
1699 MAX_STORAGE_BUFFERS)
1702 struct v3dv_descriptor_map {
1703 /* TODO: avoid fixed size array/justify the size */
1704 unsigned num_desc; /* Number of descriptors */
1705 int set[DESCRIPTOR_MAP_SIZE];
1706 int binding[DESCRIPTOR_MAP_SIZE];
1707 int array_index[DESCRIPTOR_MAP_SIZE];
1708 int array_size[DESCRIPTOR_MAP_SIZE];
1709 bool used[DESCRIPTOR_MAP_SIZE];
1711 /* NOTE: the following is only for sampler, but this is the easier place to
1714 uint8_t return_size[DESCRIPTOR_MAP_SIZE];
1717 struct v3dv_sampler {
1718 struct vk_object_base base;
1720 bool compare_enable;
1721 bool unnormalized_coordinates;
1722 bool clamp_to_transparent_black_border;
1724 /* Prepacked SAMPLER_STATE, that is referenced as part of the tmu
1725 * configuration. If needed it will be copied to the descriptor info during
1726 * UpdateDescriptorSets
1728 uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];
1731 struct v3dv_descriptor_template_entry {
1732 /* The type of descriptor in this entry */
1733 VkDescriptorType type;
1735 /* Binding in the descriptor set */
1738 /* Offset at which to write into the descriptor set binding */
1739 uint32_t array_element;
1741 /* Number of elements to write into the descriptor set binding */
1742 uint32_t array_count;
1744 /* Offset into the user provided data */
1747 /* Stride between elements into the user provided data */
1751 struct v3dv_descriptor_update_template {
1752 struct vk_object_base base;
1754 VkPipelineBindPoint bind_point;
1756 /* The descriptor set this template corresponds to. This value is only
1757 * valid if the template was created with the templateType
1758 * VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.
1762 /* Number of entries in this template */
1763 uint32_t entry_count;
1765 /* Entries of the template */
1766 struct v3dv_descriptor_template_entry entries[0];
1770 /* We keep two special values for the sampler idx that represents exactly when a
1771 * sampler is not needed/provided. The main use is that even if we don't have
1772 * sampler, we still need to do the output unpacking (through
1773 * nir_lower_tex). The easier way to do this is to add those special "no
1774 * sampler" in the sampler_map, and then use the proper unpacking for that
1777 * We have one when we want a 16bit output size, and other when we want a
1778 * 32bit output size. We use the info coming from the RelaxedPrecision
1779 * decoration to decide between one and the other.
1781 #define V3DV_NO_SAMPLER_16BIT_IDX 0
1782 #define V3DV_NO_SAMPLER_32BIT_IDX 1
1785 * Following two methods are using on the combined to/from texture/sampler
1786 * indices maps at v3dv_pipeline.
1788 static inline uint32_t
1789 v3dv_pipeline_combined_index_key_create(uint32_t texture_index,
1790 uint32_t sampler_index)
1792 return texture_index << 24 | sampler_index;
1796 v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,
1797 uint32_t *texture_index,
1798 uint32_t *sampler_index)
1800 uint32_t texture = combined_index_key >> 24;
1801 uint32_t sampler = combined_index_key & 0xffffff;
1804 *texture_index = texture;
1807 *sampler_index = sampler;
1810 struct v3dv_descriptor_maps {
1811 struct v3dv_descriptor_map ubo_map;
1812 struct v3dv_descriptor_map ssbo_map;
1813 struct v3dv_descriptor_map sampler_map;
1814 struct v3dv_descriptor_map texture_map;
1817 /* The structure represents data shared between different objects, like the
1818 * pipeline and the pipeline cache, so we ref count it to know when it should
1821 struct v3dv_pipeline_shared_data {
1824 unsigned char sha1_key[20];
1826 struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];
1827 struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];
1829 struct v3dv_bo *assembly_bo;
1832 struct v3dv_pipeline {
1833 struct vk_object_base base;
1835 struct v3dv_device *device;
1837 VkShaderStageFlags active_stages;
1839 struct v3dv_render_pass *pass;
1840 struct v3dv_subpass *subpass;
1842 /* Note: We can't use just a MESA_SHADER_STAGES array because we also need
1843 * to track binning shaders. Note these will be freed once the pipeline
1844 * has been compiled.
1846 struct v3dv_pipeline_stage *vs;
1847 struct v3dv_pipeline_stage *vs_bin;
1848 struct v3dv_pipeline_stage *gs;
1849 struct v3dv_pipeline_stage *gs_bin;
1850 struct v3dv_pipeline_stage *fs;
1851 struct v3dv_pipeline_stage *cs;
1853 /* Flags for whether optional pipeline stages are present, for convenience */
1856 /* Spilling memory requirements */
1859 uint32_t size_per_thread;
1862 struct v3dv_dynamic_state dynamic_state;
1864 struct v3dv_pipeline_layout *layout;
1866 /* Whether this pipeline enables depth writes */
1867 bool z_updates_enable;
1869 enum v3dv_ez_state ez_state;
1872 bool sample_rate_shading;
1873 uint32_t sample_mask;
1875 bool primitive_restart;
1877 /* Accessed by binding. So vb[binding]->stride is the stride of the vertex
1878 * array with such binding
1880 struct v3dv_pipeline_vertex_binding {
1882 uint32_t instance_divisor;
1886 /* Note that a lot of info from VkVertexInputAttributeDescription is
1887 * already prepacked, so here we are only storing those that need recheck
1888 * later. The array must be indexed by driver location, since that is the
1889 * order in which we need to emit the attributes.
1891 struct v3dv_pipeline_vertex_attrib {
1895 } va[MAX_VERTEX_ATTRIBS];
1898 enum pipe_prim_type topology;
1900 struct v3dv_pipeline_shared_data *shared_data;
1902 /* It is the combined stages sha1, plus the pipeline key sha1. */
1903 unsigned char sha1[20];
1905 /* In general we can reuse v3dv_device->default_attribute_float, so note
1906 * that the following can be NULL.
1908 * FIXME: the content of this BO will be small, so it could be improved to
1909 * be uploaded to a common BO. But as in most cases it will be NULL, it is
1912 struct v3dv_bo *default_attribute_values;
1914 struct vpm_config vpm_cfg;
1915 struct vpm_config vpm_cfg_bin;
1917 /* If the pipeline should emit any of the stencil configuration packets */
1918 bool emit_stencil_cfg[2];
1922 /* Per-RT bit mask with blend enables */
1924 /* Per-RT prepacked blend config packets */
1925 uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];
1926 /* Flag indicating whether the blend factors in use require
1929 bool needs_color_constants;
1930 /* Mask with enabled color channels for each RT (4 bits per RT) */
1931 uint32_t color_write_masks;
1940 /* Packets prepacked during pipeline creation
1942 uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];
1943 uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];
1944 uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];
1945 uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *
1946 MAX_VERTEX_ATTRIBS];
1947 uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];
1950 static inline VkPipelineBindPoint
1951 v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)
1953 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||
1954 !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));
1955 return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?
1956 VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;
1959 static inline struct v3dv_descriptor_state*
1960 v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,
1961 struct v3dv_pipeline *pipeline)
1963 if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE)
1964 return &cmd_buffer->state.compute.descriptor_state;
1966 return &cmd_buffer->state.gfx.descriptor_state;
1969 const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void);
1971 uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev);
1972 uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev);
1974 #define v3dv_debug_ignored_stype(sType) \
1975 mesa_logd("%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType))
1977 const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f);
1978 uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable);
1979 const struct v3dv_format *
1980 v3dv_get_compatible_tfu_format(struct v3dv_device *device,
1981 uint32_t bpp, VkFormat *out_vk_format);
1982 bool v3dv_buffer_format_supports_features(struct v3dv_device *device,
1984 VkFormatFeatureFlags features);
1986 struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,
1987 struct v3dv_pipeline *pipeline,
1988 struct v3dv_shader_variant *variant);
1990 struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,
1991 struct v3dv_pipeline *pipeline,
1992 struct v3dv_shader_variant *variant,
1993 uint32_t **wg_count_offsets);
1995 struct v3dv_shader_variant *
1996 v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,
1997 struct v3dv_pipeline_cache *cache,
1998 struct v3d_key *key,
2000 const VkAllocationCallbacks *pAllocator,
2001 VkResult *out_vk_result);
2003 struct v3dv_shader_variant *
2004 v3dv_shader_variant_create(struct v3dv_device *device,
2005 enum broadcom_shader_stage stage,
2006 struct v3d_prog_data *prog_data,
2007 uint32_t prog_data_size,
2008 uint32_t assembly_offset,
2009 uint64_t *qpu_insts,
2010 uint32_t qpu_insts_size,
2011 VkResult *out_vk_result);
2014 v3dv_shader_variant_destroy(struct v3dv_device *device,
2015 struct v3dv_shader_variant *variant);
2018 v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data)
2020 assert(shared_data && shared_data->ref_cnt >= 1);
2021 p_atomic_inc(&shared_data->ref_cnt);
2025 v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,
2026 struct v3dv_pipeline_shared_data *shared_data);
2029 v3dv_pipeline_shared_data_unref(struct v3dv_device *device,
2030 struct v3dv_pipeline_shared_data *shared_data)
2032 assert(shared_data && shared_data->ref_cnt >= 1);
2033 if (p_atomic_dec_zero(&shared_data->ref_cnt))
2034 v3dv_pipeline_shared_data_destroy(device, shared_data);
2037 struct v3dv_descriptor *
2038 v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,
2039 struct v3dv_descriptor_map *map,
2040 struct v3dv_pipeline_layout *pipeline_layout,
2042 uint32_t *dynamic_offset);
2044 struct v3dv_cl_reloc
2045 v3dv_descriptor_map_get_descriptor_bo(struct v3dv_device *device,
2046 struct v3dv_descriptor_state *descriptor_state,
2047 struct v3dv_descriptor_map *map,
2048 struct v3dv_pipeline_layout *pipeline_layout,
2050 VkDescriptorType *out_type);
2052 const struct v3dv_sampler *
2053 v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,
2054 struct v3dv_descriptor_map *map,
2055 struct v3dv_pipeline_layout *pipeline_layout,
2058 struct v3dv_cl_reloc
2059 v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,
2060 struct v3dv_descriptor_state *descriptor_state,
2061 struct v3dv_descriptor_map *map,
2062 struct v3dv_pipeline_layout *pipeline_layout,
2065 struct v3dv_cl_reloc
2066 v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,
2067 struct v3dv_descriptor_state *descriptor_state,
2068 struct v3dv_descriptor_map *map,
2069 struct v3dv_pipeline_layout *pipeline_layout,
2073 v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,
2074 struct v3dv_descriptor_map *map,
2075 struct v3dv_pipeline_layout *pipeline_layout,
2078 static inline const struct v3dv_sampler *
2079 v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,
2080 const struct v3dv_descriptor_set_binding_layout *binding)
2082 assert(binding->immutable_samplers_offset);
2083 return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);
2086 void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,
2087 struct v3dv_device *device,
2088 VkPipelineCacheCreateFlags,
2089 bool cache_enabled);
2091 void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);
2093 void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,
2094 struct v3dv_pipeline_cache *cache,
2096 unsigned char sha1_key[20]);
2098 nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,
2099 struct v3dv_pipeline_cache *cache,
2100 const nir_shader_compiler_options *nir_options,
2101 unsigned char sha1_key[20]);
2103 struct v3dv_pipeline_shared_data *
2104 v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,
2105 unsigned char sha1_key[20],
2109 v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,
2110 struct v3dv_pipeline_cache *cache);
2113 v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,
2114 struct v3dv_pipeline *pipeline);
2116 void v3dv_shader_module_internal_init(struct v3dv_device *device,
2117 struct vk_shader_module *module,
2120 #define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle) \
2121 VK_FROM_HANDLE(__v3dv_type, __name, __handle)
2123 VK_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, vk.base, VkCommandBuffer,
2124 VK_OBJECT_TYPE_COMMAND_BUFFER)
2125 VK_DEFINE_HANDLE_CASTS(v3dv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)
2126 VK_DEFINE_HANDLE_CASTS(v3dv_instance, vk.base, VkInstance,
2127 VK_OBJECT_TYPE_INSTANCE)
2128 VK_DEFINE_HANDLE_CASTS(v3dv_physical_device, vk.base, VkPhysicalDevice,
2129 VK_OBJECT_TYPE_PHYSICAL_DEVICE)
2130 VK_DEFINE_HANDLE_CASTS(v3dv_queue, vk.base, VkQueue, VK_OBJECT_TYPE_QUEUE)
2132 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, base, VkBuffer,
2133 VK_OBJECT_TYPE_BUFFER)
2134 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, base, VkBufferView,
2135 VK_OBJECT_TYPE_BUFFER_VIEW)
2136 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, base, VkDeviceMemory,
2137 VK_OBJECT_TYPE_DEVICE_MEMORY)
2138 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, base, VkDescriptorPool,
2139 VK_OBJECT_TYPE_DESCRIPTOR_POOL)
2140 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, base, VkDescriptorSet,
2141 VK_OBJECT_TYPE_DESCRIPTOR_SET)
2142 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, base,
2143 VkDescriptorSetLayout,
2144 VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)
2145 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_update_template, base,
2146 VkDescriptorUpdateTemplate,
2147 VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)
2148 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)
2149 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, base, VkFramebuffer,
2150 VK_OBJECT_TYPE_FRAMEBUFFER)
2151 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, vk.base, VkImage,
2152 VK_OBJECT_TYPE_IMAGE)
2153 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, vk.base, VkImageView,
2154 VK_OBJECT_TYPE_IMAGE_VIEW)
2155 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, base, VkPipeline,
2156 VK_OBJECT_TYPE_PIPELINE)
2157 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, base, VkPipelineCache,
2158 VK_OBJECT_TYPE_PIPELINE_CACHE)
2159 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, base, VkPipelineLayout,
2160 VK_OBJECT_TYPE_PIPELINE_LAYOUT)
2161 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, base, VkQueryPool,
2162 VK_OBJECT_TYPE_QUERY_POOL)
2163 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, base, VkRenderPass,
2164 VK_OBJECT_TYPE_RENDER_PASS)
2165 VK_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, base, VkSampler,
2166 VK_OBJECT_TYPE_SAMPLER)
2169 v3dv_ioctl(int fd, unsigned long request, void *arg)
2171 if (using_v3d_simulator)
2172 return v3d_simulator_ioctl(fd, request, arg);
2174 return drmIoctl(fd, request, arg);
2177 /* Flags OOM conditions in command buffer state.
2179 * Note: notice that no-op jobs don't have a command buffer reference.
2182 v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)
2185 cmd_buffer->state.oom = true;
2188 if (job->cmd_buffer)
2189 job->cmd_buffer->state.oom = true;
2193 #define v3dv_return_if_oom(_cmd_buffer, _job) do { \
2194 const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer; \
2195 if (__cmd_buffer && __cmd_buffer->state.oom) \
2197 const struct v3dv_job *__job = _job; \
2198 if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom) \
2202 static inline uint32_t
2203 u64_hash(const void *key)
2205 return _mesa_hash_data(key, sizeof(uint64_t));
2209 u64_compare(const void *key1, const void *key2)
2211 return memcmp(key1, key2, sizeof(uint64_t)) == 0;
2214 /* Helper to call hw ver speficic functions */
2215 #define v3dv_X(device, thing) ({ \
2216 __typeof(&v3d42_##thing) v3d_X_thing; \
2217 switch (device->devinfo.ver) { \
2219 v3d_X_thing = &v3d42_##thing; \
2222 unreachable("Unsupported hardware generation"); \
2228 /* v3d_macros from common requires v3dX and V3DX definitions. Below we need to
2229 * define v3dX for each version supported, because when we compile code that
2230 * is not version-specific, all version-specific macros need to be already
2234 # include "v3dvx_private.h"
2236 # define v3dX(x) v3d42_##x
2237 # include "v3dvx_private.h"
2243 v3dv_gralloc_info(struct v3dv_device *device,
2244 const VkNativeBufferANDROID *gralloc_info,
2248 uint64_t *out_modifier);
2251 v3dv_import_native_buffer_fd(VkDevice device_h,
2253 const VkAllocationCallbacks *alloc,
2255 #endif /* ANDROID */
2257 #endif /* V3DV_PRIVATE_H */