radv: Add a list of performance counters.
[platform/upstream/mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <string.h>
31
32 #ifdef __FreeBSD__
33 #include <sys/types.h>
34 #endif
35 #ifdef MAJOR_IN_MKDEV
36 #include <sys/mkdev.h>
37 #endif
38 #ifdef MAJOR_IN_SYSMACROS
39 #include <sys/sysmacros.h>
40 #endif
41
42 #ifdef __linux__
43 #include <sys/inotify.h>
44 #endif
45
46 #include "util/debug.h"
47 #include "util/disk_cache.h"
48 #include "radv_cs.h"
49 #include "radv_debug.h"
50 #include "radv_private.h"
51 #include "radv_shader.h"
52 #include "vk_util.h"
53 #ifdef _WIN32
54 typedef void *drmDevicePtr;
55 #include <io.h>
56 #else
57 #include <amdgpu.h>
58 #include <xf86drm.h>
59 #include "drm-uapi/amdgpu_drm.h"
60 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
61 #endif
62 #include "util/build_id.h"
63 #include "util/debug.h"
64 #include "util/driconf.h"
65 #include "util/mesa-sha1.h"
66 #include "util/os_time.h"
67 #include "util/timespec.h"
68 #include "util/u_atomic.h"
69 #include "winsys/null/radv_null_winsys_public.h"
70 #include "git_sha1.h"
71 #include "sid.h"
72 #include "vk_format.h"
73 #include "vk_sync.h"
74 #include "vk_sync_dummy.h"
75 #include "vulkan/vk_icd.h"
76
77 #ifdef LLVM_AVAILABLE
78 #include "ac_llvm_util.h"
79 #endif
80
81 /* The number of IBs per submit isn't infinite, it depends on the IP type
82  * (ie. some initial setup needed for a submit) and the number of IBs (4 DW).
83  * This limit is arbitrary but should be safe for now.  Ideally, we should get
84  * this limit from the KMD.
85  */
86 #define RADV_MAX_IBS_PER_SUBMIT 192
87
88 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
89 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
90 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
91 #endif
92
93 static VkResult radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission);
94
95 uint64_t
96 radv_get_current_time(void)
97 {
98    return os_time_get_nano();
99 }
100
101 static int
102 radv_device_get_cache_uuid(struct radv_physical_device *pdevice, void *uuid)
103 {
104    enum radeon_family family = pdevice->rad_info.family;
105    struct mesa_sha1 ctx;
106    unsigned char sha1[20];
107    unsigned ptr_size = sizeof(void *);
108
109    memset(uuid, 0, VK_UUID_SIZE);
110    _mesa_sha1_init(&ctx);
111
112    if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx)
113 #ifdef LLVM_AVAILABLE
114        || (pdevice->use_llvm &&
115            !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
116 #endif
117    )
118       return -1;
119
120    _mesa_sha1_update(&ctx, &family, sizeof(family));
121    _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
122    _mesa_sha1_final(&ctx, sha1);
123
124    memcpy(uuid, sha1, VK_UUID_SIZE);
125    return 0;
126 }
127
128 static void
129 radv_get_driver_uuid(void *uuid)
130 {
131    ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
132 }
133
134 static void
135 radv_get_device_uuid(struct radeon_info *info, void *uuid)
136 {
137    ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
138 }
139
140 static uint64_t
141 radv_get_adjusted_vram_size(struct radv_physical_device *device)
142 {
143    int ov = driQueryOptioni(&device->instance->dri_options, "override_vram_size");
144    if (ov >= 0)
145       return MIN2(device->rad_info.vram_size, (uint64_t)ov << 20);
146    return device->rad_info.vram_size;
147 }
148
149 static uint64_t
150 radv_get_visible_vram_size(struct radv_physical_device *device)
151 {
152    return MIN2(radv_get_adjusted_vram_size(device), device->rad_info.vram_vis_size);
153 }
154
155 static uint64_t
156 radv_get_vram_size(struct radv_physical_device *device)
157 {
158    uint64_t total_size = radv_get_adjusted_vram_size(device);
159    return total_size - MIN2(total_size, device->rad_info.vram_vis_size);
160 }
161
162 enum radv_heap {
163    RADV_HEAP_VRAM = 1 << 0,
164    RADV_HEAP_GTT = 1 << 1,
165    RADV_HEAP_VRAM_VIS = 1 << 2,
166    RADV_HEAP_MAX = 1 << 3,
167 };
168
169 static void
170 radv_physical_device_init_mem_types(struct radv_physical_device *device)
171 {
172    uint64_t visible_vram_size = radv_get_visible_vram_size(device);
173    uint64_t vram_size = radv_get_vram_size(device);
174    uint64_t gtt_size = device->rad_info.gart_size;
175    int vram_index = -1, visible_vram_index = -1, gart_index = -1;
176
177    device->memory_properties.memoryHeapCount = 0;
178    device->heaps = 0;
179
180    if (!device->rad_info.has_dedicated_vram) {
181       /* On APUs, the carveout is usually too small for games that request a minimum VRAM size
182        * greater than it. To workaround this, we compute the total available memory size (GTT +
183        * visible VRAM size) and report 2/3 as VRAM and 1/3 as GTT.
184        */
185       const uint64_t total_size = gtt_size + visible_vram_size;
186       visible_vram_size = align64((total_size * 2) / 3, device->rad_info.gart_page_size);
187       gtt_size = total_size - visible_vram_size;
188       vram_size = 0;
189    }
190
191    /* Only get a VRAM heap if it is significant, not if it is a 16 MiB
192     * remainder above visible VRAM. */
193    if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
194       vram_index = device->memory_properties.memoryHeapCount++;
195       device->heaps |= RADV_HEAP_VRAM;
196       device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap){
197          .size = vram_size,
198          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
199       };
200    }
201
202    if (gtt_size > 0) {
203       gart_index = device->memory_properties.memoryHeapCount++;
204       device->heaps |= RADV_HEAP_GTT;
205       device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap){
206          .size = gtt_size,
207          .flags = 0,
208       };
209    }
210
211    if (visible_vram_size) {
212       visible_vram_index = device->memory_properties.memoryHeapCount++;
213       device->heaps |= RADV_HEAP_VRAM_VIS;
214       device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap){
215          .size = visible_vram_size,
216          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
217       };
218    }
219
220    unsigned type_count = 0;
221
222    if (vram_index >= 0 || visible_vram_index >= 0) {
223       device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
224       device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
225       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
226          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
227          .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
228       };
229    }
230
231    if (gart_index >= 0) {
232       device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
233       device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
234       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
235          .propertyFlags =
236             VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
237          .heapIndex = gart_index,
238       };
239    }
240    if (visible_vram_index >= 0) {
241       device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
242       device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
243       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
244          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
245                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
246                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
247          .heapIndex = visible_vram_index,
248       };
249    }
250
251    if (gart_index >= 0) {
252       device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
253       device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
254       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
255          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
256                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
257          .heapIndex = gart_index,
258       };
259    }
260    device->memory_properties.memoryTypeCount = type_count;
261
262    if (device->rad_info.has_l2_uncached) {
263       for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
264          VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
265
266          if ((mem_type.propertyFlags &
267               (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
268              mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
269
270             VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |
271                                                    VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
272                                                    VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
273
274             device->memory_domains[type_count] = device->memory_domains[i];
275             device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
276             device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
277                .propertyFlags = property_flags,
278                .heapIndex = mem_type.heapIndex,
279             };
280          }
281       }
282       device->memory_properties.memoryTypeCount = type_count;
283    }
284 }
285
286 static const char *
287 radv_get_compiler_string(struct radv_physical_device *pdevice)
288 {
289    if (!pdevice->use_llvm) {
290       /* Some games like SotTR apply shader workarounds if the LLVM
291        * version is too old or if the LLVM version string is
292        * missing. This gives 2-5% performance with SotTR and ACO.
293        */
294       if (driQueryOptionb(&pdevice->instance->dri_options, "radv_report_llvm9_version_string")) {
295          return " (LLVM 9.0.1)";
296       }
297
298       return "";
299    }
300
301 #ifdef LLVM_AVAILABLE
302    return " (LLVM " MESA_LLVM_VERSION_STRING ")";
303 #else
304    unreachable("LLVM is not available");
305 #endif
306 }
307
308 int
309 radv_get_int_debug_option(const char *name, int default_value)
310 {
311    const char *str;
312    int result;
313
314    str = getenv(name);
315    if (!str) {
316       result = default_value;
317    } else {
318       char *endptr;
319
320       result = strtol(str, &endptr, 0);
321       if (str == endptr) {
322          /* No digits founs. */
323          result = default_value;
324       }
325    }
326
327    return result;
328 }
329
330 static bool
331 radv_thread_trace_enabled()
332 {
333    return radv_get_int_debug_option("RADV_THREAD_TRACE", -1) >= 0 ||
334           getenv("RADV_THREAD_TRACE_TRIGGER");
335 }
336
337 static bool
338 radv_spm_trace_enabled()
339 {
340    return radv_thread_trace_enabled() &&
341           debug_get_bool_option("RADV_THREAD_TRACE_CACHE_COUNTERS", false);
342 }
343
344 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) ||                    \
345    defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_DISPLAY_KHR)
346 #define RADV_USE_WSI_PLATFORM
347 #endif
348
349 #ifdef ANDROID
350 #define RADV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
351 #else
352 #define RADV_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
353 #endif
354
355 VKAPI_ATTR VkResult VKAPI_CALL
356 radv_EnumerateInstanceVersion(uint32_t *pApiVersion)
357 {
358    *pApiVersion = RADV_API_VERSION;
359    return VK_SUCCESS;
360 }
361
362 static const struct vk_instance_extension_table radv_instance_extensions_supported = {
363    .KHR_device_group_creation = true,
364    .KHR_external_fence_capabilities = true,
365    .KHR_external_memory_capabilities = true,
366    .KHR_external_semaphore_capabilities = true,
367    .KHR_get_physical_device_properties2 = true,
368    .EXT_debug_report = true,
369    .EXT_debug_utils = true,
370
371 #ifdef RADV_USE_WSI_PLATFORM
372    .KHR_get_surface_capabilities2 = true,
373    .KHR_surface = true,
374    .KHR_surface_protected_capabilities = true,
375 #endif
376 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
377    .KHR_wayland_surface = true,
378 #endif
379 #ifdef VK_USE_PLATFORM_XCB_KHR
380    .KHR_xcb_surface = true,
381 #endif
382 #ifdef VK_USE_PLATFORM_XLIB_KHR
383    .KHR_xlib_surface = true,
384 #endif
385 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
386    .EXT_acquire_xlib_display = true,
387 #endif
388 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
389    .KHR_display = true,
390    .KHR_get_display_properties2 = true,
391    .EXT_direct_mode_display = true,
392    .EXT_display_surface_counter = true,
393    .EXT_acquire_drm_display = true,
394 #endif
395 };
396
397 static void
398 radv_physical_device_get_supported_extensions(const struct radv_physical_device *device,
399                                               struct vk_device_extension_table *ext)
400 {
401    *ext = (struct vk_device_extension_table){
402       .KHR_8bit_storage = true,
403       .KHR_16bit_storage = true,
404       .KHR_acceleration_structure = radv_enable_rt(device, false),
405       .KHR_bind_memory2 = true,
406       .KHR_buffer_device_address = true,
407       .KHR_copy_commands2 = true,
408       .KHR_create_renderpass2 = true,
409       .KHR_dedicated_allocation = true,
410       .KHR_deferred_host_operations = true,
411       .KHR_depth_stencil_resolve = true,
412       .KHR_descriptor_update_template = true,
413       .KHR_device_group = true,
414       .KHR_draw_indirect_count = true,
415       .KHR_driver_properties = true,
416       .KHR_dynamic_rendering = true,
417       .KHR_external_fence = true,
418       .KHR_external_fence_fd = true,
419       .KHR_external_memory = true,
420       .KHR_external_memory_fd = true,
421       .KHR_external_semaphore = true,
422       .KHR_external_semaphore_fd = true,
423       .KHR_format_feature_flags2 = true,
424       .KHR_fragment_shading_rate = device->rad_info.gfx_level >= GFX10_3,
425       .KHR_get_memory_requirements2 = true,
426       .KHR_image_format_list = true,
427       .KHR_imageless_framebuffer = true,
428 #ifdef RADV_USE_WSI_PLATFORM
429       .KHR_incremental_present = true,
430 #endif
431       .KHR_maintenance1 = true,
432       .KHR_maintenance2 = true,
433       .KHR_maintenance3 = true,
434       .KHR_maintenance4 = true,
435       .KHR_multiview = true,
436       .KHR_pipeline_executable_properties = true,
437       .KHR_pipeline_library = !device->use_llvm,
438       .KHR_push_descriptor = true,
439       .KHR_ray_query = radv_enable_rt(device, false),
440       .KHR_ray_tracing_maintenance1 = radv_enable_rt(device, false),
441       .KHR_ray_tracing_pipeline = radv_enable_rt(device, true),
442       .KHR_relaxed_block_layout = true,
443       .KHR_sampler_mirror_clamp_to_edge = true,
444       .KHR_sampler_ycbcr_conversion = true,
445       .KHR_separate_depth_stencil_layouts = true,
446       .KHR_shader_atomic_int64 = true,
447       .KHR_shader_clock = true,
448       .KHR_shader_draw_parameters = true,
449       .KHR_shader_float16_int8 = true,
450       .KHR_shader_float_controls = true,
451       .KHR_shader_integer_dot_product = true,
452       .KHR_shader_non_semantic_info = true,
453       .KHR_shader_subgroup_extended_types = true,
454       .KHR_shader_subgroup_uniform_control_flow = true,
455       .KHR_shader_terminate_invocation = true,
456       .KHR_spirv_1_4 = true,
457       .KHR_storage_buffer_storage_class = true,
458 #ifdef RADV_USE_WSI_PLATFORM
459       .KHR_swapchain = true,
460       .KHR_swapchain_mutable_format = true,
461 #endif
462       .KHR_synchronization2 = true,
463       .KHR_timeline_semaphore = true,
464       .KHR_uniform_buffer_standard_layout = true,
465       .KHR_variable_pointers = true,
466       .KHR_vulkan_memory_model = true,
467       .KHR_workgroup_memory_explicit_layout = true,
468       .KHR_zero_initialize_workgroup_memory = true,
469       .EXT_4444_formats = true,
470       .EXT_border_color_swizzle = device->rad_info.gfx_level >= GFX10,
471       .EXT_buffer_device_address = true,
472       .EXT_calibrated_timestamps = RADV_SUPPORT_CALIBRATED_TIMESTAMPS,
473       .EXT_color_write_enable = true,
474       .EXT_conditional_rendering = true,
475       .EXT_conservative_rasterization = device->rad_info.gfx_level >= GFX9,
476       .EXT_custom_border_color = true,
477       .EXT_debug_marker = radv_thread_trace_enabled(),
478       .EXT_depth_clip_control = true,
479       .EXT_depth_clip_enable = true,
480       .EXT_depth_range_unrestricted = true,
481       .EXT_descriptor_indexing = true,
482       .EXT_discard_rectangles = true,
483 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
484       .EXT_display_control = true,
485 #endif
486       .EXT_extended_dynamic_state = true,
487       .EXT_extended_dynamic_state2 = true,
488       .EXT_external_memory_dma_buf = true,
489       .EXT_external_memory_host = device->rad_info.has_userptr,
490       .EXT_global_priority = true,
491       .EXT_global_priority_query = true,
492       .EXT_host_query_reset = true,
493       .EXT_image_2d_view_of_3d = true,
494       .EXT_image_drm_format_modifier = device->rad_info.gfx_level >= GFX9,
495       .EXT_image_robustness = true,
496       .EXT_image_view_min_lod = true,
497       .EXT_index_type_uint8 = device->rad_info.gfx_level >= GFX8,
498       .EXT_inline_uniform_block = true,
499       .EXT_line_rasterization = true,
500       .EXT_memory_budget = true,
501       .EXT_memory_priority = true,
502       .EXT_multi_draw = true,
503       .EXT_non_seamless_cube_map = true,
504       .EXT_pci_bus_info = true,
505 #ifndef _WIN32
506       .EXT_physical_device_drm = true,
507 #endif
508       .EXT_pipeline_creation_cache_control = true,
509       .EXT_pipeline_creation_feedback = true,
510       .EXT_post_depth_coverage = device->rad_info.gfx_level >= GFX10,
511       .EXT_primitive_topology_list_restart = true,
512       .EXT_primitives_generated_query = true,
513       .EXT_private_data = true,
514       .EXT_provoking_vertex = true,
515       .EXT_queue_family_foreign = true,
516       .EXT_robustness2 = true,
517       .EXT_sample_locations = device->rad_info.gfx_level < GFX10,
518       .EXT_sampler_filter_minmax = true,
519       .EXT_scalar_block_layout = device->rad_info.gfx_level >= GFX7,
520       .EXT_separate_stencil_usage = true,
521       .EXT_shader_atomic_float = true,
522 #ifdef LLVM_AVAILABLE
523       .EXT_shader_atomic_float2 = !device->use_llvm || LLVM_VERSION_MAJOR >= 14,
524 #else
525       .EXT_shader_atomic_float2 = true,
526 #endif
527       .EXT_shader_demote_to_helper_invocation = true,
528       .EXT_shader_image_atomic_int64 = true,
529       .EXT_shader_module_identifier = true,
530       .EXT_shader_stencil_export = true,
531       .EXT_shader_subgroup_ballot = true,
532       .EXT_shader_subgroup_vote = true,
533       .EXT_shader_viewport_index_layer = true,
534       .EXT_subgroup_size_control = true,
535       .EXT_texel_buffer_alignment = true,
536       .EXT_transform_feedback = device->rad_info.gfx_level < GFX11,
537       .EXT_vertex_attribute_divisor = true,
538       .EXT_vertex_input_dynamic_state = !device->use_llvm,
539       .EXT_ycbcr_image_arrays = true,
540       .AMD_buffer_marker = true,
541       .AMD_device_coherent_memory = true,
542       .AMD_draw_indirect_count = true,
543       .AMD_gcn_shader = true,
544       .AMD_gpu_shader_half_float = device->rad_info.has_packed_math_16bit,
545       .AMD_gpu_shader_int16 = device->rad_info.has_packed_math_16bit,
546       .AMD_memory_overallocation_behavior = true,
547       .AMD_mixed_attachment_samples = true,
548       .AMD_rasterization_order = device->rad_info.has_out_of_order_rast,
549       .AMD_shader_ballot = true,
550       .AMD_shader_core_properties = true,
551       .AMD_shader_core_properties2 = true,
552       .AMD_shader_explicit_vertex_parameter = true,
553       .AMD_shader_fragment_mask = device->rad_info.gfx_level < GFX11,
554       .AMD_shader_image_load_store_lod = true,
555       .AMD_shader_trinary_minmax = true,
556       .AMD_texture_gather_bias_lod = true,
557 #ifdef ANDROID
558       .ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER,
559       .ANDROID_native_buffer = true,
560 #endif
561       .GOOGLE_decorate_string = true,
562       .GOOGLE_hlsl_functionality1 = true,
563       .GOOGLE_user_type = true,
564       .INTEL_shader_integer_functions2 = true,
565       .NV_compute_shader_derivatives = true,
566       .NV_mesh_shader = device->use_ngg && device->rad_info.gfx_level >= GFX10_3 &&
567                         device->instance->perftest_flags & RADV_PERFTEST_NV_MS && !device->use_llvm,
568       /* Undocumented extension purely for vkd3d-proton. This check is to prevent anyone else from
569        * using it.
570        */
571       .VALVE_descriptor_set_host_mapping =
572          device->vk.instance->app_info.engine_name &&
573          strcmp(device->vk.instance->app_info.engine_name, "vkd3d") == 0,
574       .VALVE_mutable_descriptor_type = true,
575    };
576 }
577
578 static bool
579 radv_is_conformant(const struct radv_physical_device *pdevice)
580 {
581    return pdevice->rad_info.gfx_level >= GFX8;
582 }
583
584 static void
585 radv_physical_device_init_queue_table(struct radv_physical_device *pdevice)
586 {
587    int idx = 0;
588    pdevice->vk_queue_to_radv[idx] = RADV_QUEUE_GENERAL;
589    idx++;
590
591    for (unsigned i = 1; i < RADV_MAX_QUEUE_FAMILIES; i++)
592       pdevice->vk_queue_to_radv[i] = RADV_MAX_QUEUE_FAMILIES + 1;
593
594    if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 &&
595        !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
596       pdevice->vk_queue_to_radv[idx] = RADV_QUEUE_COMPUTE;
597       idx++;
598    }
599    pdevice->num_queues = idx;
600 }
601
602 static VkResult
603 radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm_device,
604                                 struct radv_physical_device **device_out)
605 {
606    VkResult result;
607    int fd = -1;
608    int master_fd = -1;
609
610 #ifdef _WIN32
611    assert(drm_device == NULL);
612 #else
613    if (drm_device) {
614       const char *path = drm_device->nodes[DRM_NODE_RENDER];
615       drmVersionPtr version;
616
617       fd = open(path, O_RDWR | O_CLOEXEC);
618       if (fd < 0) {
619          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
620                           "Could not open device %s: %m", path);
621       }
622
623       version = drmGetVersion(fd);
624       if (!version) {
625          close(fd);
626
627          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
628                           "Could not get the kernel driver version for device %s: %m", path);
629       }
630
631       if (strcmp(version->name, "amdgpu")) {
632          drmFreeVersion(version);
633          close(fd);
634
635          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
636                           "Device '%s' is not using the AMDGPU kernel driver: %m", path);
637       }
638       drmFreeVersion(version);
639
640       if (instance->debug_flags & RADV_DEBUG_STARTUP)
641          fprintf(stderr, "radv: info: Found compatible device '%s'.\n", path);
642    }
643 #endif
644
645    struct radv_physical_device *device = vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8,
646                                                     VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
647    if (!device) {
648       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
649       goto fail_fd;
650    }
651
652    struct vk_physical_device_dispatch_table dispatch_table;
653    vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
654                                                       &radv_physical_device_entrypoints, true);
655    vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
656                                                       &wsi_physical_device_entrypoints, false);
657
658    result = vk_physical_device_init(&device->vk, &instance->vk, NULL, &dispatch_table);
659    if (result != VK_SUCCESS) {
660       goto fail_alloc;
661    }
662
663    device->instance = instance;
664
665 #ifdef _WIN32
666    device->ws = radv_null_winsys_create();
667 #else
668    if (drm_device) {
669       bool reserve_vmid = radv_thread_trace_enabled();
670
671       device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags,
672                                              reserve_vmid);
673    } else {
674       device->ws = radv_null_winsys_create();
675    }
676 #endif
677
678    if (!device->ws) {
679       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys");
680       goto fail_base;
681    }
682
683    device->vk.supported_sync_types = device->ws->get_sync_types(device->ws);
684
685 #ifndef _WIN32
686    if (drm_device && instance->vk.enabled_extensions.KHR_display) {
687       master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
688       if (master_fd >= 0) {
689          uint32_t accel_working = 0;
690          struct drm_amdgpu_info request = {.return_pointer = (uintptr_t)&accel_working,
691                                            .return_size = sizeof(accel_working),
692                                            .query = AMDGPU_INFO_ACCEL_WORKING};
693
694          if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) <
695                 0 ||
696              !accel_working) {
697             close(master_fd);
698             master_fd = -1;
699          }
700       }
701    }
702 #endif
703
704    device->master_fd = master_fd;
705    device->local_fd = fd;
706    device->ws->query_info(device->ws, &device->rad_info);
707
708    device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
709 #ifndef LLVM_AVAILABLE
710    if (device->use_llvm) {
711       fprintf(stderr, "ERROR: LLVM compiler backend selected for radv, but LLVM support was not "
712                       "enabled at build time.\n");
713       abort();
714    }
715 #endif
716
717 #ifdef ANDROID
718    device->emulate_etc2 = !radv_device_supports_etc(device);
719 #else
720    device->emulate_etc2 = !radv_device_supports_etc(device) &&
721                           driQueryOptionb(&device->instance->dri_options, "radv_require_etc2");
722 #endif
723
724    snprintf(device->name, sizeof(device->name), "AMD RADV %s%s", device->rad_info.name,
725             radv_get_compiler_string(device));
726
727    const char *marketing_name = device->ws->get_chip_name(device->ws);
728    snprintf(device->marketing_name, sizeof(device->name), "%s (RADV %s%s)",
729             marketing_name, device->rad_info.name, radv_get_compiler_string(device));
730
731 #ifdef ENABLE_SHADER_CACHE
732    if (radv_device_get_cache_uuid(device, device->cache_uuid)) {
733       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID");
734       goto fail_wsi;
735    }
736
737    /* The gpu id is already embedded in the uuid so we just pass "radv"
738     * when creating the cache.
739     */
740    char buf[VK_UUID_SIZE * 2 + 1];
741    disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
742    device->disk_cache = disk_cache_create(device->name, buf, 0);
743 #endif
744
745    if (!radv_is_conformant(device))
746       vk_warn_non_conformant_implementation("radv");
747
748    radv_get_driver_uuid(&device->driver_uuid);
749    radv_get_device_uuid(&device->rad_info, &device->device_uuid);
750
751    device->out_of_order_rast_allowed =
752       device->rad_info.has_out_of_order_rast &&
753       !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
754
755    device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
756
757    device->use_ngg = (device->rad_info.gfx_level >= GFX10 &&
758                      device->rad_info.family != CHIP_NAVI14 &&
759                      !(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) ||
760                      device->rad_info.gfx_level >= GFX11;
761
762    device->use_ngg_culling = device->use_ngg && device->rad_info.max_render_backends > 1 &&
763                              (device->rad_info.gfx_level >= GFX10_3 ||
764                               (device->instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
765                              !(device->instance->debug_flags & RADV_DEBUG_NO_NGGC);
766
767    device->use_ngg_streamout = false;
768
769    /* Determine the number of threads per wave for all stages. */
770    device->cs_wave_size = 64;
771    device->ps_wave_size = 64;
772    device->ge_wave_size = 64;
773    device->rt_wave_size = 64;
774
775    if (device->rad_info.gfx_level >= GFX10) {
776       if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
777          device->cs_wave_size = 32;
778
779       /* For pixel shaders, wave64 is recommanded. */
780       if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
781          device->ps_wave_size = 32;
782
783       if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
784          device->ge_wave_size = 32;
785
786       if (!(device->instance->perftest_flags & RADV_PERFTEST_RT_WAVE_64))
787          device->rt_wave_size = 32;
788    }
789
790    radv_physical_device_init_mem_types(device);
791
792    radv_physical_device_get_supported_extensions(device, &device->vk.supported_extensions);
793
794    radv_get_nir_options(device);
795
796 #ifndef _WIN32
797    if (drm_device) {
798       struct stat primary_stat = {0}, render_stat = {0};
799
800       device->available_nodes = drm_device->available_nodes;
801       device->bus_info = *drm_device->businfo.pci;
802
803       if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) &&
804           stat(drm_device->nodes[DRM_NODE_PRIMARY], &primary_stat) != 0) {
805          result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
806                             "failed to stat DRM primary node %s",
807                             drm_device->nodes[DRM_NODE_PRIMARY]);
808          goto fail_perfcounters;
809       }
810       device->primary_devid = primary_stat.st_rdev;
811
812       if ((drm_device->available_nodes & (1 << DRM_NODE_RENDER)) &&
813           stat(drm_device->nodes[DRM_NODE_RENDER], &render_stat) != 0) {
814          result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
815                             "failed to stat DRM render node %s",
816                             drm_device->nodes[DRM_NODE_RENDER]);
817          goto fail_perfcounters;
818       }
819       device->render_devid = render_stat.st_rdev;
820    }
821 #endif
822
823    if ((device->instance->debug_flags & RADV_DEBUG_INFO))
824       ac_print_gpu_info(&device->rad_info, stdout);
825
826    radv_physical_device_init_queue_table(device);
827
828    /* We don't check the error code, but later check if it is initialized. */
829    ac_init_perfcounters(&device->rad_info, false, false, &device->ac_perfcounters);
830
831    /* The WSI is structured as a layer on top of the driver, so this has
832     * to be the last part of initialization (at least until we get other
833     * semi-layers).
834     */
835    result = radv_init_wsi(device);
836    if (result != VK_SUCCESS) {
837       vk_error(instance, result);
838       goto fail_perfcounters;
839    }
840
841    device->gs_table_depth =
842       ac_get_gs_table_depth(device->rad_info.gfx_level, device->rad_info.family);
843
844    ac_get_hs_info(&device->rad_info, &device->hs);
845    ac_get_task_info(&device->rad_info, &device->task_info);
846
847    *device_out = device;
848
849    return VK_SUCCESS;
850
851 fail_perfcounters:
852    ac_destroy_perfcounters(&device->ac_perfcounters);
853    disk_cache_destroy(device->disk_cache);
854 #ifdef ENABLE_SHADER_CACHE
855 fail_wsi:
856 #endif
857    device->ws->destroy(device->ws);
858 fail_base:
859    vk_physical_device_finish(&device->vk);
860 fail_alloc:
861    vk_free(&instance->vk.alloc, device);
862 fail_fd:
863    if (fd != -1)
864       close(fd);
865    if (master_fd != -1)
866       close(master_fd);
867    return result;
868 }
869
870 static void
871 radv_physical_device_destroy(struct radv_physical_device *device)
872 {
873    radv_finish_wsi(device);
874    ac_destroy_perfcounters(&device->ac_perfcounters);
875    device->ws->destroy(device->ws);
876    disk_cache_destroy(device->disk_cache);
877    if (device->local_fd != -1)
878       close(device->local_fd);
879    if (device->master_fd != -1)
880       close(device->master_fd);
881    vk_physical_device_finish(&device->vk);
882    vk_free(&device->instance->vk.alloc, device);
883 }
884
885 static const struct debug_control radv_debug_options[] = {
886    {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
887    {"nodcc", RADV_DEBUG_NO_DCC},
888    {"shaders", RADV_DEBUG_DUMP_SHADERS},
889    {"nocache", RADV_DEBUG_NO_CACHE},
890    {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
891    {"nohiz", RADV_DEBUG_NO_HIZ},
892    {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
893    {"allbos", RADV_DEBUG_ALL_BOS},
894    {"noibs", RADV_DEBUG_NO_IBS},
895    {"spirv", RADV_DEBUG_DUMP_SPIRV},
896    {"vmfaults", RADV_DEBUG_VM_FAULTS},
897    {"zerovram", RADV_DEBUG_ZERO_VRAM},
898    {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
899    {"preoptir", RADV_DEBUG_PREOPTIR},
900    {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
901    {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
902    {"info", RADV_DEBUG_INFO},
903    {"startup", RADV_DEBUG_STARTUP},
904    {"checkir", RADV_DEBUG_CHECKIR},
905    {"nobinning", RADV_DEBUG_NOBINNING},
906    {"nongg", RADV_DEBUG_NO_NGG},
907    {"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
908    {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
909    {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
910    {"llvm", RADV_DEBUG_LLVM},
911    {"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
912    {"hang", RADV_DEBUG_HANG},
913    {"img", RADV_DEBUG_IMG},
914    {"noumr", RADV_DEBUG_NO_UMR},
915    {"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},
916    {"splitfma", RADV_DEBUG_SPLIT_FMA},
917    {"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC},
918    {"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},
919    {"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},
920    {"noatocdithering", RADV_DEBUG_NO_ATOC_DITHERING},
921    {"nonggc", RADV_DEBUG_NO_NGGC},
922    {"prologs", RADV_DEBUG_DUMP_PROLOGS},
923    {"nodma", RADV_DEBUG_NO_DMA_BLIT},
924    {NULL, 0}};
925
926 const char *
927 radv_get_debug_option_name(int id)
928 {
929    assert(id < ARRAY_SIZE(radv_debug_options) - 1);
930    return radv_debug_options[id].string;
931 }
932
933 static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_PERFTEST_LOCAL_BOS},
934                                                              {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
935                                                              {"bolist", RADV_PERFTEST_BO_LIST},
936                                                              {"cswave32", RADV_PERFTEST_CS_WAVE_32},
937                                                              {"pswave32", RADV_PERFTEST_PS_WAVE_32},
938                                                              {"gewave32", RADV_PERFTEST_GE_WAVE_32},
939                                                              {"nosam", RADV_PERFTEST_NO_SAM},
940                                                              {"sam", RADV_PERFTEST_SAM},
941                                                              {"rt", RADV_PERFTEST_RT},
942                                                              {"nggc", RADV_PERFTEST_NGGC},
943                                                              {"emulate_rt", RADV_PERFTEST_EMULATE_RT},
944                                                              {"nv_ms", RADV_PERFTEST_NV_MS},
945                                                              {"rtwave64", RADV_PERFTEST_RT_WAVE_64},
946                                                              {NULL, 0}};
947
948 const char *
949 radv_get_perftest_option_name(int id)
950 {
951    assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
952    return radv_perftest_options[id].string;
953 }
954
955 // clang-format off
956 static const driOptionDescription radv_dri_options[] = {
957    DRI_CONF_SECTION_PERFORMANCE
958       DRI_CONF_ADAPTIVE_SYNC(true)
959       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
960       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
961       DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
962       DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
963       DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING(false)
964       DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP(false)
965       DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false)
966       DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false)
967       DRI_CONF_RADV_ABSOLUTE_DEPTH_BIAS(false)
968       DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0)
969    DRI_CONF_SECTION_END
970
971    DRI_CONF_SECTION_DEBUG
972       DRI_CONF_OVERRIDE_VRAM_SIZE()
973       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
974       DRI_CONF_RADV_ZERO_VRAM(false)
975       DRI_CONF_RADV_LOWER_DISCARD_TO_DEMOTE(false)
976       DRI_CONF_RADV_INVARIANT_GEOM(false)
977       DRI_CONF_RADV_SPLIT_FMA(false)
978       DRI_CONF_RADV_DISABLE_TC_COMPAT_HTILE_GENERAL(false)
979       DRI_CONF_RADV_DISABLE_DCC(false)
980       DRI_CONF_RADV_REPORT_APU_AS_DGPU(false)
981       DRI_CONF_RADV_REQUIRE_ETC2(false)
982       DRI_CONF_RADV_DISABLE_ANISO_SINGLE_LEVEL(false)
983       DRI_CONF_RADV_DISABLE_SINKING_LOAD_INPUT_FS(false)
984    DRI_CONF_SECTION_END
985 };
986 // clang-format on
987
988 static void
989 radv_init_dri_options(struct radv_instance *instance)
990 {
991    driParseOptionInfo(&instance->available_dri_options, radv_dri_options,
992                       ARRAY_SIZE(radv_dri_options));
993    driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "radv", NULL, NULL,
994                        instance->vk.app_info.app_name, instance->vk.app_info.app_version,
995                        instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
996
997    instance->enable_mrt_output_nan_fixup =
998       driQueryOptionb(&instance->dri_options, "radv_enable_mrt_output_nan_fixup");
999
1000    instance->disable_shrink_image_store =
1001       driQueryOptionb(&instance->dri_options, "radv_disable_shrink_image_store");
1002
1003    instance->absolute_depth_bias =
1004       driQueryOptionb(&instance->dri_options, "radv_absolute_depth_bias");
1005
1006    instance->disable_tc_compat_htile_in_general =
1007       driQueryOptionb(&instance->dri_options, "radv_disable_tc_compat_htile_general");
1008
1009    if (driQueryOptionb(&instance->dri_options, "radv_no_dynamic_bounds"))
1010       instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
1011
1012    if (driQueryOptionb(&instance->dri_options, "radv_lower_discard_to_demote"))
1013       instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
1014
1015    if (driQueryOptionb(&instance->dri_options, "radv_invariant_geom"))
1016       instance->debug_flags |= RADV_DEBUG_INVARIANT_GEOM;
1017
1018    if (driQueryOptionb(&instance->dri_options, "radv_split_fma"))
1019       instance->debug_flags |= RADV_DEBUG_SPLIT_FMA;
1020
1021    if (driQueryOptionb(&instance->dri_options, "radv_disable_dcc"))
1022       instance->debug_flags |= RADV_DEBUG_NO_DCC;
1023
1024    instance->zero_vram =
1025       driQueryOptionb(&instance->dri_options, "radv_zero_vram");
1026
1027    instance->report_apu_as_dgpu =
1028       driQueryOptionb(&instance->dri_options, "radv_report_apu_as_dgpu");
1029
1030    instance->disable_aniso_single_level =
1031       driQueryOptionb(&instance->dri_options, "radv_disable_aniso_single_level");
1032
1033    instance->disable_sinking_load_input_fs =
1034       driQueryOptionb(&instance->dri_options, "radv_disable_sinking_load_input_fs");
1035 }
1036
1037 VKAPI_ATTR VkResult VKAPI_CALL
1038 radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1039                     const VkAllocationCallbacks *pAllocator, VkInstance *pInstance)
1040 {
1041    struct radv_instance *instance;
1042    VkResult result;
1043
1044    if (!pAllocator)
1045       pAllocator = vk_default_allocator();
1046
1047    instance = vk_zalloc(pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1048    if (!instance)
1049       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1050
1051    struct vk_instance_dispatch_table dispatch_table;
1052    vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &radv_instance_entrypoints, true);
1053    vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &wsi_instance_entrypoints, false);
1054    struct vk_instance_extension_table extensions_supported = radv_instance_extensions_supported;
1055
1056    result = vk_instance_init(&instance->vk, &extensions_supported, &dispatch_table,
1057                              pCreateInfo, pAllocator);
1058    if (result != VK_SUCCESS) {
1059       vk_free(pAllocator, instance);
1060       return vk_error(instance, result);
1061    }
1062
1063    instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"), radv_debug_options);
1064    instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"), radv_perftest_options);
1065
1066    if (instance->debug_flags & RADV_DEBUG_STARTUP)
1067       fprintf(stderr, "radv: info: Created an instance.\n");
1068
1069    instance->physical_devices_enumerated = false;
1070    list_inithead(&instance->physical_devices);
1071
1072    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1073
1074    radv_init_dri_options(instance);
1075
1076    *pInstance = radv_instance_to_handle(instance);
1077
1078    return VK_SUCCESS;
1079 }
1080
1081 VKAPI_ATTR void VKAPI_CALL
1082 radv_DestroyInstance(VkInstance _instance, const VkAllocationCallbacks *pAllocator)
1083 {
1084    RADV_FROM_HANDLE(radv_instance, instance, _instance);
1085
1086    if (!instance)
1087       return;
1088
1089    list_for_each_entry_safe(struct radv_physical_device, pdevice, &instance->physical_devices, link)
1090    {
1091       radv_physical_device_destroy(pdevice);
1092    }
1093
1094    VG(VALGRIND_DESTROY_MEMPOOL(instance));
1095
1096    driDestroyOptionCache(&instance->dri_options);
1097    driDestroyOptionInfo(&instance->available_dri_options);
1098
1099    vk_instance_finish(&instance->vk);
1100    vk_free(&instance->vk.alloc, instance);
1101 }
1102
1103 static VkResult
1104 radv_enumerate_physical_devices(struct radv_instance *instance)
1105 {
1106    if (instance->physical_devices_enumerated)
1107       return VK_SUCCESS;
1108
1109    instance->physical_devices_enumerated = true;
1110
1111    VkResult result = VK_SUCCESS;
1112
1113    if (getenv("RADV_FORCE_FAMILY")) {
1114       /* When RADV_FORCE_FAMILY is set, the driver creates a nul
1115        * device that allows to test the compiler without having an
1116        * AMDGPU instance.
1117        */
1118       struct radv_physical_device *pdevice;
1119
1120       result = radv_physical_device_try_create(instance, NULL, &pdevice);
1121       if (result != VK_SUCCESS)
1122          return result;
1123
1124       list_addtail(&pdevice->link, &instance->physical_devices);
1125       return VK_SUCCESS;
1126    }
1127
1128 #ifndef _WIN32
1129    /* TODO: Check for more devices ? */
1130    drmDevicePtr devices[8];
1131    int max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
1132
1133    if (instance->debug_flags & RADV_DEBUG_STARTUP)
1134       fprintf(stderr, "radv: info: Found %d drm nodes.\n", max_devices);
1135
1136    if (max_devices < 1)
1137       return vk_error(instance, VK_SUCCESS);
1138
1139    for (unsigned i = 0; i < (unsigned)max_devices; i++) {
1140       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
1141           devices[i]->bustype == DRM_BUS_PCI &&
1142           devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
1143
1144          struct radv_physical_device *pdevice;
1145          result = radv_physical_device_try_create(instance, devices[i], &pdevice);
1146          /* Incompatible DRM device, skip. */
1147          if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
1148             result = VK_SUCCESS;
1149             continue;
1150          }
1151
1152          /* Error creating the physical device, report the error. */
1153          if (result != VK_SUCCESS)
1154             break;
1155
1156          list_addtail(&pdevice->link, &instance->physical_devices);
1157       }
1158    }
1159    drmFreeDevices(devices, max_devices);
1160 #endif
1161
1162    /* If we successfully enumerated any devices, call it success */
1163    return result;
1164 }
1165
1166 VKAPI_ATTR VkResult VKAPI_CALL
1167 radv_EnumeratePhysicalDevices(VkInstance _instance, uint32_t *pPhysicalDeviceCount,
1168                               VkPhysicalDevice *pPhysicalDevices)
1169 {
1170    RADV_FROM_HANDLE(radv_instance, instance, _instance);
1171    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices, pPhysicalDeviceCount);
1172
1173    VkResult result = radv_enumerate_physical_devices(instance);
1174    if (result != VK_SUCCESS)
1175       return result;
1176
1177    list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)
1178    {
1179       vk_outarray_append_typed(VkPhysicalDevice, &out, i)
1180       {
1181          *i = radv_physical_device_to_handle(pdevice);
1182       }
1183    }
1184
1185    return vk_outarray_status(&out);
1186 }
1187
1188 VKAPI_ATTR VkResult VKAPI_CALL
1189 radv_EnumeratePhysicalDeviceGroups(VkInstance _instance, uint32_t *pPhysicalDeviceGroupCount,
1190                                    VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties)
1191 {
1192    RADV_FROM_HANDLE(radv_instance, instance, _instance);
1193    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out, pPhysicalDeviceGroupProperties,
1194                           pPhysicalDeviceGroupCount);
1195
1196    VkResult result = radv_enumerate_physical_devices(instance);
1197    if (result != VK_SUCCESS)
1198       return result;
1199
1200    list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)
1201    {
1202       vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p)
1203       {
1204          p->physicalDeviceCount = 1;
1205          memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
1206          p->physicalDevices[0] = radv_physical_device_to_handle(pdevice);
1207          p->subsetAllocation = false;
1208       }
1209    }
1210
1211    return vk_outarray_status(&out);
1212 }
1213
1214 VKAPI_ATTR void VKAPI_CALL
1215 radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures *pFeatures)
1216 {
1217    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1218    memset(pFeatures, 0, sizeof(*pFeatures));
1219
1220    *pFeatures = (VkPhysicalDeviceFeatures){
1221       .robustBufferAccess = true,
1222       .fullDrawIndexUint32 = true,
1223       .imageCubeArray = true,
1224       .independentBlend = true,
1225       .geometryShader = true,
1226       .tessellationShader = true,
1227       .sampleRateShading = true,
1228       .dualSrcBlend = true,
1229       .logicOp = true,
1230       .multiDrawIndirect = true,
1231       .drawIndirectFirstInstance = true,
1232       .depthClamp = true,
1233       .depthBiasClamp = true,
1234       .fillModeNonSolid = true,
1235       .depthBounds = true,
1236       .wideLines = true,
1237       .largePoints = true,
1238       .alphaToOne = false,
1239       .multiViewport = true,
1240       .samplerAnisotropy = true,
1241       .textureCompressionETC2 = radv_device_supports_etc(pdevice) || pdevice->emulate_etc2,
1242       .textureCompressionASTC_LDR = false,
1243       .textureCompressionBC = true,
1244       .occlusionQueryPrecise = true,
1245       .pipelineStatisticsQuery = true,
1246       .vertexPipelineStoresAndAtomics = true,
1247       .fragmentStoresAndAtomics = true,
1248       .shaderTessellationAndGeometryPointSize = true,
1249       .shaderImageGatherExtended = true,
1250       .shaderStorageImageExtendedFormats = true,
1251       .shaderStorageImageMultisample = true,
1252       .shaderUniformBufferArrayDynamicIndexing = true,
1253       .shaderSampledImageArrayDynamicIndexing = true,
1254       .shaderStorageBufferArrayDynamicIndexing = true,
1255       .shaderStorageImageArrayDynamicIndexing = true,
1256       .shaderStorageImageReadWithoutFormat = true,
1257       .shaderStorageImageWriteWithoutFormat = true,
1258       .shaderClipDistance = true,
1259       .shaderCullDistance = true,
1260       .shaderFloat64 = true,
1261       .shaderInt64 = true,
1262       .shaderInt16 = true,
1263       .sparseBinding = true,
1264       .sparseResidencyBuffer = pdevice->rad_info.family >= CHIP_POLARIS10,
1265       .sparseResidencyImage2D = pdevice->rad_info.family >= CHIP_POLARIS10,
1266       .sparseResidencyAliased = pdevice->rad_info.family >= CHIP_POLARIS10,
1267       .variableMultisampleRate = true,
1268       .shaderResourceMinLod = true,
1269       .shaderResourceResidency = true,
1270       .inheritedQueries = true,
1271    };
1272 }
1273
1274 static void
1275 radv_get_physical_device_features_1_1(struct radv_physical_device *pdevice,
1276                                       VkPhysicalDeviceVulkan11Features *f)
1277 {
1278    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
1279
1280    f->storageBuffer16BitAccess = true;
1281    f->uniformAndStorageBuffer16BitAccess = true;
1282    f->storagePushConstant16 = true;
1283    f->storageInputOutput16 = pdevice->rad_info.has_packed_math_16bit;
1284    f->multiview = true;
1285    f->multiviewGeometryShader = true;
1286    f->multiviewTessellationShader = true;
1287    f->variablePointersStorageBuffer = true;
1288    f->variablePointers = true;
1289    f->protectedMemory = false;
1290    f->samplerYcbcrConversion = true;
1291    f->shaderDrawParameters = true;
1292 }
1293
1294 static void
1295 radv_get_physical_device_features_1_2(struct radv_physical_device *pdevice,
1296                                       VkPhysicalDeviceVulkan12Features *f)
1297 {
1298    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
1299
1300    f->samplerMirrorClampToEdge = true;
1301    f->drawIndirectCount = true;
1302    f->storageBuffer8BitAccess = true;
1303    f->uniformAndStorageBuffer8BitAccess = true;
1304    f->storagePushConstant8 = true;
1305    f->shaderBufferInt64Atomics = true;
1306    f->shaderSharedInt64Atomics = true;
1307    f->shaderFloat16 = pdevice->rad_info.has_packed_math_16bit;
1308    f->shaderInt8 = true;
1309
1310    f->descriptorIndexing = true;
1311    f->shaderInputAttachmentArrayDynamicIndexing = true;
1312    f->shaderUniformTexelBufferArrayDynamicIndexing = true;
1313    f->shaderStorageTexelBufferArrayDynamicIndexing = true;
1314    f->shaderUniformBufferArrayNonUniformIndexing = true;
1315    f->shaderSampledImageArrayNonUniformIndexing = true;
1316    f->shaderStorageBufferArrayNonUniformIndexing = true;
1317    f->shaderStorageImageArrayNonUniformIndexing = true;
1318    f->shaderInputAttachmentArrayNonUniformIndexing = true;
1319    f->shaderUniformTexelBufferArrayNonUniformIndexing = true;
1320    f->shaderStorageTexelBufferArrayNonUniformIndexing = true;
1321    f->descriptorBindingUniformBufferUpdateAfterBind = true;
1322    f->descriptorBindingSampledImageUpdateAfterBind = true;
1323    f->descriptorBindingStorageImageUpdateAfterBind = true;
1324    f->descriptorBindingStorageBufferUpdateAfterBind = true;
1325    f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
1326    f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
1327    f->descriptorBindingUpdateUnusedWhilePending = true;
1328    f->descriptorBindingPartiallyBound = true;
1329    f->descriptorBindingVariableDescriptorCount = true;
1330    f->runtimeDescriptorArray = true;
1331
1332    f->samplerFilterMinmax = true;
1333    f->scalarBlockLayout = pdevice->rad_info.gfx_level >= GFX7;
1334    f->imagelessFramebuffer = true;
1335    f->uniformBufferStandardLayout = true;
1336    f->shaderSubgroupExtendedTypes = true;
1337    f->separateDepthStencilLayouts = true;
1338    f->hostQueryReset = true;
1339    f->timelineSemaphore = true, f->bufferDeviceAddress = true;
1340    f->bufferDeviceAddressCaptureReplay = true;
1341    f->bufferDeviceAddressMultiDevice = false;
1342    f->vulkanMemoryModel = true;
1343    f->vulkanMemoryModelDeviceScope = true;
1344    f->vulkanMemoryModelAvailabilityVisibilityChains = false;
1345    f->shaderOutputViewportIndex = true;
1346    f->shaderOutputLayer = true;
1347    f->subgroupBroadcastDynamicId = true;
1348 }
1349
1350 static void
1351 radv_get_physical_device_features_1_3(struct radv_physical_device *pdevice,
1352                                       VkPhysicalDeviceVulkan13Features *f)
1353 {
1354    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES);
1355
1356    f->robustImageAccess = true;
1357    f->inlineUniformBlock = true;
1358    f->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1359    f->pipelineCreationCacheControl = true;
1360    f->privateData = true;
1361    f->shaderDemoteToHelperInvocation = true;
1362    f->shaderTerminateInvocation = true;
1363    f->subgroupSizeControl = true;
1364    f->computeFullSubgroups = true;
1365    f->synchronization2 = true;
1366    f->textureCompressionASTC_HDR = false;
1367    f->shaderZeroInitializeWorkgroupMemory = true;
1368    f->dynamicRendering = true;
1369    f->shaderIntegerDotProduct = true;
1370    f->maintenance4 = true;
1371 }
1372
1373 VKAPI_ATTR void VKAPI_CALL
1374 radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
1375                                 VkPhysicalDeviceFeatures2 *pFeatures)
1376 {
1377    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1378    radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1379
1380    VkPhysicalDeviceVulkan11Features core_1_1 = {
1381       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1382    };
1383    radv_get_physical_device_features_1_1(pdevice, &core_1_1);
1384
1385    VkPhysicalDeviceVulkan12Features core_1_2 = {
1386       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1387    };
1388    radv_get_physical_device_features_1_2(pdevice, &core_1_2);
1389
1390    VkPhysicalDeviceVulkan13Features core_1_3 = {
1391       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
1392    };
1393    radv_get_physical_device_features_1_3(pdevice, &core_1_3);
1394
1395 #define CORE_FEATURE(major, minor, feature) features->feature = core_##major##_##minor.feature
1396
1397    vk_foreach_struct(ext, pFeatures->pNext)
1398    {
1399       if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1))
1400          continue;
1401       if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2))
1402          continue;
1403       if (vk_get_physical_device_core_1_3_feature_ext(ext, &core_1_3))
1404          continue;
1405
1406       switch (ext->sType) {
1407       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
1408          VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
1409             (VkPhysicalDeviceConditionalRenderingFeaturesEXT *)ext;
1410          features->conditionalRendering = true;
1411          features->inheritedConditionalRendering = false;
1412          break;
1413       }
1414       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
1415          VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
1416             (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
1417          features->vertexAttributeInstanceRateDivisor = true;
1418          features->vertexAttributeInstanceRateZeroDivisor = true;
1419          break;
1420       }
1421       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
1422          VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
1423             (VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
1424          features->transformFeedback = pdevice->rad_info.gfx_level < GFX11;
1425          features->geometryStreams = !pdevice->use_ngg_streamout && pdevice->rad_info.gfx_level < GFX11;
1426          break;
1427       }
1428       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
1429          VkPhysicalDeviceScalarBlockLayoutFeatures *features =
1430             (VkPhysicalDeviceScalarBlockLayoutFeatures *)ext;
1431          CORE_FEATURE(1, 2, scalarBlockLayout);
1432          break;
1433       }
1434       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
1435          VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
1436             (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
1437          features->memoryPriority = true;
1438          break;
1439       }
1440       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
1441          VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
1442             (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
1443          CORE_FEATURE(1, 2, bufferDeviceAddress);
1444          CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);
1445          CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);
1446          break;
1447       }
1448       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
1449          VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
1450             (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
1451          features->depthClipEnable = true;
1452          break;
1453       }
1454       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1455          VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1456             (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1457          features->computeDerivativeGroupQuads = false;
1458          features->computeDerivativeGroupLinear = true;
1459          break;
1460       }
1461       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1462          VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1463             (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;
1464          features->ycbcrImageArrays = true;
1465          break;
1466       }
1467       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1468          VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1469             (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1470          features->indexTypeUint8 = pdevice->rad_info.gfx_level >= GFX8;
1471          break;
1472       }
1473       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1474          VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1475             (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1476          features->pipelineExecutableInfo = true;
1477          break;
1478       }
1479       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
1480          VkPhysicalDeviceShaderClockFeaturesKHR *features =
1481             (VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
1482          features->shaderSubgroupClock = true;
1483          features->shaderDeviceClock = pdevice->rad_info.gfx_level >= GFX8;
1484          break;
1485       }
1486       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
1487          VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
1488             (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
1489          features->texelBufferAlignment = true;
1490          break;
1491       }
1492       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: {
1493          VkPhysicalDeviceCoherentMemoryFeaturesAMD *features =
1494             (VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext;
1495          features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached;
1496          break;
1497       }
1498       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
1499          VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
1500             (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
1501          features->rectangularLines = false;
1502          features->bresenhamLines = true;
1503          features->smoothLines = false;
1504          features->stippledRectangularLines = false;
1505          /* FIXME: Some stippled Bresenham CTS fails on Vega10
1506           * but work on Raven.
1507           */
1508          features->stippledBresenhamLines = pdevice->rad_info.gfx_level != GFX9;
1509          features->stippledSmoothLines = false;
1510          break;
1511       }
1512       case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
1513          VkDeviceMemoryOverallocationCreateInfoAMD *features =
1514             (VkDeviceMemoryOverallocationCreateInfoAMD *)ext;
1515          features->overallocationBehavior = true;
1516          break;
1517       }
1518       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1519          VkPhysicalDeviceRobustness2FeaturesEXT *features =
1520             (VkPhysicalDeviceRobustness2FeaturesEXT *)ext;
1521          features->robustBufferAccess2 = true;
1522          features->robustImageAccess2 = true;
1523          features->nullDescriptor = true;
1524          break;
1525       }
1526       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1527          VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
1528             (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
1529          features->customBorderColors = true;
1530          features->customBorderColorWithoutFormat = true;
1531          break;
1532       }
1533       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
1534          VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
1535             (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *)ext;
1536          features->extendedDynamicState = true;
1537          break;
1538       }
1539       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
1540          VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features =
1541             (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *)ext;
1542          features->shaderBufferFloat32Atomics = true;
1543          features->shaderBufferFloat32AtomicAdd = false;
1544          features->shaderBufferFloat64Atomics = true;
1545          features->shaderBufferFloat64AtomicAdd = false;
1546          features->shaderSharedFloat32Atomics = true;
1547          features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.gfx_level >= GFX8;
1548          features->shaderSharedFloat64Atomics = true;
1549          features->shaderSharedFloat64AtomicAdd = false;
1550          features->shaderImageFloat32Atomics = true;
1551          features->shaderImageFloat32AtomicAdd = false;
1552          features->sparseImageFloat32Atomics = true;
1553          features->sparseImageFloat32AtomicAdd = false;
1554          break;
1555       }
1556       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
1557          VkPhysicalDevice4444FormatsFeaturesEXT *features =
1558             (VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
1559          features->formatA4R4G4B4 = true;
1560          features->formatA4B4G4R4 = true;
1561          break;
1562       }
1563       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT: {
1564          VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *features =
1565             (VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *)ext;
1566          features->shaderImageInt64Atomics = true;
1567          features->sparseImageInt64Atomics = true;
1568          break;
1569       }
1570       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
1571          VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
1572             (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
1573          features->mutableDescriptorType = true;
1574          break;
1575       }
1576       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
1577          VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
1578             (VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
1579          features->pipelineFragmentShadingRate = true;
1580          features->primitiveFragmentShadingRate = true;
1581          features->attachmentFragmentShadingRate =
1582             !(pdevice->instance->debug_flags & RADV_DEBUG_NO_HIZ) &&
1583             pdevice->rad_info.gfx_level < GFX11; /* TODO: VRS no longer uses HTILE. */
1584          break;
1585       }
1586       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {
1587          VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =
1588             (VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;
1589          features->workgroupMemoryExplicitLayout = true;
1590          features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
1591          features->workgroupMemoryExplicitLayout8BitAccess = true;
1592          features->workgroupMemoryExplicitLayout16BitAccess = true;
1593          break;
1594       }
1595       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
1596          VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
1597             (VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
1598          features->provokingVertexLast = true;
1599          features->transformFeedbackPreservesProvokingVertex = true;
1600          break;
1601       }
1602       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
1603          VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =
1604             (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;
1605          features->extendedDynamicState2 = true;
1606          features->extendedDynamicState2LogicOp = true;
1607          features->extendedDynamicState2PatchControlPoints = false;
1608          break;
1609       }
1610       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_EXT: {
1611          VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *features =
1612             (VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *)ext;
1613          features->globalPriorityQuery = true;
1614          break;
1615       }
1616       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR: {
1617          VkPhysicalDeviceAccelerationStructureFeaturesKHR *features =
1618             (VkPhysicalDeviceAccelerationStructureFeaturesKHR *)ext;
1619          features->accelerationStructure = true;
1620          features->accelerationStructureCaptureReplay = false;
1621          features->accelerationStructureIndirectBuild = false;
1622          features->accelerationStructureHostCommands = true;
1623          features->descriptorBindingAccelerationStructureUpdateAfterBind = true;
1624          break;
1625       }
1626       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR: {
1627          VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *features =
1628             (VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *)ext;
1629          features->shaderSubgroupUniformControlFlow = true;
1630          break;
1631       }
1632       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: {
1633          VkPhysicalDeviceMultiDrawFeaturesEXT *features = (VkPhysicalDeviceMultiDrawFeaturesEXT *)ext;
1634          features->multiDraw = true;
1635          break;
1636       }
1637       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {
1638          VkPhysicalDeviceColorWriteEnableFeaturesEXT *features =
1639             (VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext;
1640          features->colorWriteEnable = true;
1641          break;
1642       }
1643       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: {
1644          VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features =
1645             (VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *)ext;
1646          bool has_shader_buffer_float_minmax = radv_has_shader_buffer_float_minmax(pdevice);
1647          bool has_shader_image_float_minmax =
1648             pdevice->rad_info.gfx_level != GFX8 && pdevice->rad_info.gfx_level != GFX9;
1649          features->shaderBufferFloat16Atomics = false;
1650          features->shaderBufferFloat16AtomicAdd = false;
1651          features->shaderBufferFloat16AtomicMinMax = false;
1652          features->shaderBufferFloat32AtomicMinMax = has_shader_buffer_float_minmax;
1653          features->shaderBufferFloat64AtomicMinMax = has_shader_buffer_float_minmax;
1654          features->shaderSharedFloat16Atomics = false;
1655          features->shaderSharedFloat16AtomicAdd = false;
1656          features->shaderSharedFloat16AtomicMinMax = false;
1657          features->shaderSharedFloat32AtomicMinMax = true;
1658          features->shaderSharedFloat64AtomicMinMax = true;
1659          features->shaderImageFloat32AtomicMinMax = has_shader_image_float_minmax;
1660          features->sparseImageFloat32AtomicMinMax = has_shader_image_float_minmax;
1661          break;
1662       }
1663       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: {
1664          VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *features =
1665             (VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *)ext;
1666          features->primitiveTopologyListRestart = true;
1667          features->primitiveTopologyPatchListRestart = false;
1668          break;
1669       }
1670       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR: {
1671          VkPhysicalDeviceRayQueryFeaturesKHR *features =
1672             (VkPhysicalDeviceRayQueryFeaturesKHR *)ext;
1673          features->rayQuery = true;
1674          break;
1675       }
1676       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR: {
1677          VkPhysicalDeviceRayTracingPipelineFeaturesKHR *features =
1678             (VkPhysicalDeviceRayTracingPipelineFeaturesKHR *)ext;
1679          features->rayTracingPipeline = true;
1680          features->rayTracingPipelineShaderGroupHandleCaptureReplay = false;
1681          features->rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false;
1682          features->rayTracingPipelineTraceRaysIndirect = true;
1683          features->rayTraversalPrimitiveCulling = true;
1684          break;
1685       }
1686       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_MAINTENANCE_1_FEATURES_KHR: {
1687          VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR *features =
1688             (VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR *)ext;
1689          features->rayTracingMaintenance1 = true;
1690          features->rayTracingPipelineTraceRaysIndirect2 = radv_enable_rt(pdevice, true);
1691          break;
1692       }
1693       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES: {
1694          VkPhysicalDeviceMaintenance4Features *features =
1695             (VkPhysicalDeviceMaintenance4Features *)ext;
1696          features->maintenance4 = true;
1697          break;
1698       }
1699       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT: {
1700          VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *features =
1701             (VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *)ext;
1702          features->vertexInputDynamicState = true;
1703          break;
1704       }
1705       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_MIN_LOD_FEATURES_EXT: {
1706          VkPhysicalDeviceImageViewMinLodFeaturesEXT *features =
1707             (VkPhysicalDeviceImageViewMinLodFeaturesEXT *)ext;
1708          features->minLod = true;
1709          break;
1710       }
1711       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES: {
1712          VkPhysicalDeviceSynchronization2Features *features =
1713             (VkPhysicalDeviceSynchronization2Features *)ext;
1714          features->synchronization2 = true;
1715          break;
1716       }
1717       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES: {
1718          VkPhysicalDeviceDynamicRenderingFeatures *features =
1719             (VkPhysicalDeviceDynamicRenderingFeatures *)ext;
1720          features->dynamicRendering = true;
1721          break;
1722       }
1723       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV: {
1724          VkPhysicalDeviceMeshShaderFeaturesNV *features =
1725             (VkPhysicalDeviceMeshShaderFeaturesNV *)ext;
1726          features->meshShader = true;
1727          features->taskShader = false; /* TODO */
1728          break;
1729       }
1730       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXTURE_COMPRESSION_ASTC_HDR_FEATURES: {
1731          VkPhysicalDeviceTextureCompressionASTCHDRFeatures *features =
1732             (VkPhysicalDeviceTextureCompressionASTCHDRFeatures *)ext;
1733          features->textureCompressionASTC_HDR = false;
1734          break;
1735       }
1736       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_SET_HOST_MAPPING_FEATURES_VALVE: {
1737          VkPhysicalDeviceDescriptorSetHostMappingFeaturesVALVE *features =
1738             (VkPhysicalDeviceDescriptorSetHostMappingFeaturesVALVE *)ext;
1739          features->descriptorSetHostMapping = true;
1740          break;
1741       }
1742       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT: {
1743          VkPhysicalDeviceDepthClipControlFeaturesEXT *features =
1744             (VkPhysicalDeviceDepthClipControlFeaturesEXT *)ext;
1745          features->depthClipControl = true;
1746          break;
1747       }
1748       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT: {
1749          VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *features =
1750             (VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *)ext;
1751          features->image2DViewOf3D = true;
1752          features->sampler2DViewOf3D = false;
1753          break;
1754       }
1755       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL: {
1756          VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *features =
1757             (VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *)ext;
1758          features->shaderIntegerFunctions2 = true;
1759          break;
1760       }
1761       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: {
1762          VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features =
1763             (VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *)ext;
1764          features->primitivesGeneratedQuery = true;
1765          features->primitivesGeneratedQueryWithRasterizerDiscard = true;
1766          features->primitivesGeneratedQueryWithNonZeroStreams = true;
1767          break;
1768       }
1769       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_NON_SEAMLESS_CUBE_MAP_FEATURES_EXT : {
1770          VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT *features =
1771             (VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT *)ext;
1772          features->nonSeamlessCubeMap = true;
1773          break;
1774       }
1775       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT: {
1776          VkPhysicalDeviceBorderColorSwizzleFeaturesEXT *features =
1777             (VkPhysicalDeviceBorderColorSwizzleFeaturesEXT *)ext;
1778          features->borderColorSwizzle = true;
1779          features->borderColorSwizzleFromImage = true;
1780          break;
1781       }
1782       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_FEATURES_EXT: {
1783          VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *features =
1784             (VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *)ext;
1785          features->shaderModuleIdentifier = true;
1786          break;
1787       }
1788       default:
1789          break;
1790       }
1791    }
1792 }
1793
1794 static size_t
1795 radv_max_descriptor_set_size()
1796 {
1797    /* make sure that the entire descriptor set is addressable with a signed
1798     * 32-bit int. So the sum of all limits scaled by descriptor size has to
1799     * be at most 2 GiB. the combined image & samples object count as one of
1800     * both. This limit is for the pipeline layout, not for the set layout, but
1801     * there is no set limit, so we just set a pipeline limit. I don't think
1802     * any app is going to hit this soon. */
1803    return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
1804            MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1805           (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1806            32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1807            32 /* sampler, largest when combined with image */ + 64 /* sampled image */ +
1808            64 /* storage image */);
1809 }
1810
1811 static uint32_t
1812 radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdevice)
1813 {
1814    uint32_t uniform_offset_alignment =
1815       driQueryOptioni(&pdevice->instance->dri_options, "radv_override_uniform_offset_alignment");
1816    if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
1817       fprintf(stderr,
1818               "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
1819               "not a power of two\n",
1820               uniform_offset_alignment);
1821       uniform_offset_alignment = 0;
1822    }
1823
1824    /* Take at least the hardware limit. */
1825    return MAX2(uniform_offset_alignment, 4);
1826 }
1827
1828 VKAPI_ATTR void VKAPI_CALL
1829 radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
1830                                  VkPhysicalDeviceProperties *pProperties)
1831 {
1832    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1833    VkSampleCountFlags sample_counts = 0xf;
1834
1835    size_t max_descriptor_set_size = radv_max_descriptor_set_size();
1836
1837    VkPhysicalDeviceLimits limits = {
1838       .maxImageDimension1D = (1 << 14),
1839       .maxImageDimension2D = (1 << 14),
1840       .maxImageDimension3D = (1 << 11),
1841       .maxImageDimensionCube = (1 << 14),
1842       .maxImageArrayLayers = (1 << 11),
1843       .maxTexelBufferElements = UINT32_MAX,
1844       .maxUniformBufferRange = UINT32_MAX,
1845       .maxStorageBufferRange = UINT32_MAX,
1846       .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1847       .maxMemoryAllocationCount = UINT32_MAX,
1848       .maxSamplerAllocationCount = 64 * 1024,
1849       .bufferImageGranularity = 1,
1850       .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
1851       .maxBoundDescriptorSets = MAX_SETS,
1852       .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1853       .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1854       .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1855       .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1856       .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1857       .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
1858       .maxPerStageResources = max_descriptor_set_size,
1859       .maxDescriptorSetSamplers = max_descriptor_set_size,
1860       .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1861       .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1862       .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1863       .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1864       .maxDescriptorSetSampledImages = max_descriptor_set_size,
1865       .maxDescriptorSetStorageImages = max_descriptor_set_size,
1866       .maxDescriptorSetInputAttachments = max_descriptor_set_size,
1867       .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
1868       .maxVertexInputBindings = MAX_VBS,
1869       .maxVertexInputAttributeOffset = UINT32_MAX,
1870       .maxVertexInputBindingStride = 2048,
1871       .maxVertexOutputComponents = 128,
1872       .maxTessellationGenerationLevel = 64,
1873       .maxTessellationPatchSize = 32,
1874       .maxTessellationControlPerVertexInputComponents = 128,
1875       .maxTessellationControlPerVertexOutputComponents = 128,
1876       .maxTessellationControlPerPatchOutputComponents = 120,
1877       .maxTessellationControlTotalOutputComponents = 4096,
1878       .maxTessellationEvaluationInputComponents = 128,
1879       .maxTessellationEvaluationOutputComponents = 128,
1880       .maxGeometryShaderInvocations = 127,
1881       .maxGeometryInputComponents = 64,
1882       .maxGeometryOutputComponents = 128,
1883       .maxGeometryOutputVertices = 256,
1884       .maxGeometryTotalOutputComponents = 1024,
1885       .maxFragmentInputComponents = 128,
1886       .maxFragmentOutputAttachments = 8,
1887       .maxFragmentDualSrcAttachments = 1,
1888       .maxFragmentCombinedOutputResources = max_descriptor_set_size,
1889       .maxComputeSharedMemorySize = pdevice->rad_info.gfx_level >= GFX7 ? 65536 : 32768,
1890       .maxComputeWorkGroupCount = {65535, 65535, 65535},
1891       .maxComputeWorkGroupInvocations = 1024,
1892       .maxComputeWorkGroupSize = {1024, 1024, 1024},
1893       .subPixelPrecisionBits = 8,
1894       .subTexelPrecisionBits = 8,
1895       .mipmapPrecisionBits = 8,
1896       .maxDrawIndexedIndexValue = UINT32_MAX,
1897       .maxDrawIndirectCount = UINT32_MAX,
1898       .maxSamplerLodBias = 16,
1899       .maxSamplerAnisotropy = 16,
1900       .maxViewports = MAX_VIEWPORTS,
1901       .maxViewportDimensions = {(1 << 14), (1 << 14)},
1902       .viewportBoundsRange = {INT16_MIN, INT16_MAX},
1903       .viewportSubPixelBits = 8,
1904       .minMemoryMapAlignment = 4096, /* A page */
1905       .minTexelBufferOffsetAlignment = 4,
1906       .minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdevice),
1907       .minStorageBufferOffsetAlignment = 4,
1908       .minTexelOffset = -32,
1909       .maxTexelOffset = 31,
1910       .minTexelGatherOffset = -32,
1911       .maxTexelGatherOffset = 31,
1912       .minInterpolationOffset = -2,
1913       .maxInterpolationOffset = 2,
1914       .subPixelInterpolationOffsetBits = 8,
1915       .maxFramebufferWidth = MAX_FRAMEBUFFER_WIDTH,
1916       .maxFramebufferHeight = MAX_FRAMEBUFFER_HEIGHT,
1917       .maxFramebufferLayers = (1 << 10),
1918       .framebufferColorSampleCounts = sample_counts,
1919       .framebufferDepthSampleCounts = sample_counts,
1920       .framebufferStencilSampleCounts = sample_counts,
1921       .framebufferNoAttachmentsSampleCounts = sample_counts,
1922       .maxColorAttachments = MAX_RTS,
1923       .sampledImageColorSampleCounts = sample_counts,
1924       .sampledImageIntegerSampleCounts = sample_counts,
1925       .sampledImageDepthSampleCounts = sample_counts,
1926       .sampledImageStencilSampleCounts = sample_counts,
1927       .storageImageSampleCounts = sample_counts,
1928       .maxSampleMaskWords = 1,
1929       .timestampComputeAndGraphics = true,
1930       .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
1931       .maxClipDistances = 8,
1932       .maxCullDistances = 8,
1933       .maxCombinedClipAndCullDistances = 8,
1934       .discreteQueuePriorities = 2,
1935       .pointSizeRange = {0.0, 8191.875},
1936       .lineWidthRange = {0.0, 8191.875},
1937       .pointSizeGranularity = (1.0 / 8.0),
1938       .lineWidthGranularity = (1.0 / 8.0),
1939       .strictLines = false, /* FINISHME */
1940       .standardSampleLocations = true,
1941       .optimalBufferCopyOffsetAlignment = 1,
1942       .optimalBufferCopyRowPitchAlignment = 1,
1943       .nonCoherentAtomSize = 64,
1944    };
1945
1946    VkPhysicalDeviceType device_type;
1947
1948    if (pdevice->rad_info.has_dedicated_vram || pdevice->instance->report_apu_as_dgpu) {
1949       device_type = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
1950    } else {
1951       device_type = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1952    }
1953
1954    *pProperties = (VkPhysicalDeviceProperties){
1955       .apiVersion = RADV_API_VERSION,
1956       .driverVersion = vk_get_driver_version(),
1957       .vendorID = ATI_VENDOR_ID,
1958       .deviceID = pdevice->rad_info.pci_id,
1959       .deviceType = device_type,
1960       .limits = limits,
1961       .sparseProperties =
1962          {
1963             .residencyNonResidentStrict = pdevice->rad_info.family >= CHIP_POLARIS10,
1964             .residencyStandard2DBlockShape = pdevice->rad_info.family >= CHIP_POLARIS10,
1965          },
1966    };
1967
1968    strcpy(pProperties->deviceName, pdevice->marketing_name);
1969    memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1970 }
1971
1972 static void
1973 radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice,
1974                                         VkPhysicalDeviceVulkan11Properties *p)
1975 {
1976    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
1977
1978    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1979    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1980    memset(p->deviceLUID, 0, VK_LUID_SIZE);
1981    /* The LUID is for Windows. */
1982    p->deviceLUIDValid = false;
1983    p->deviceNodeMask = 0;
1984
1985    p->subgroupSize = RADV_SUBGROUP_SIZE;
1986    p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT;
1987    p->subgroupSupportedOperations =
1988       VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
1989       VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
1990       VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
1991       VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1992    p->subgroupQuadOperationsInAllStages = true;
1993
1994    p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1995    p->maxMultiviewViewCount = MAX_VIEWS;
1996    p->maxMultiviewInstanceIndex = INT_MAX;
1997    p->protectedNoFault = false;
1998    p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
1999    p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
2000 }
2001
2002 static void
2003 radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
2004                                         VkPhysicalDeviceVulkan12Properties *p)
2005 {
2006    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
2007
2008    p->driverID = VK_DRIVER_ID_MESA_RADV;
2009    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
2010    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 "%s",
2011             radv_get_compiler_string(pdevice));
2012
2013    if (radv_is_conformant(pdevice)) {
2014       if (pdevice->rad_info.gfx_level >= GFX10_3) {
2015          p->conformanceVersion = (VkConformanceVersion){
2016             .major = 1,
2017             .minor = 3,
2018             .subminor = 0,
2019             .patch = 0,
2020          };
2021       } else {
2022          p->conformanceVersion = (VkConformanceVersion){
2023             .major = 1,
2024             .minor = 2,
2025             .subminor = 7,
2026             .patch = 1,
2027          };
2028       }
2029    } else {
2030       p->conformanceVersion = (VkConformanceVersion){
2031          .major = 0,
2032          .minor = 0,
2033          .subminor = 0,
2034          .patch = 0,
2035       };
2036    }
2037
2038    /* On AMD hardware, denormals and rounding modes for fp16/fp64 are
2039     * controlled by the same config register.
2040     */
2041    if (pdevice->rad_info.has_packed_math_16bit) {
2042       p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
2043       p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
2044    } else {
2045       p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
2046       p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
2047    }
2048
2049    /* With LLVM, do not allow both preserving and flushing denorms because
2050     * different shaders in the same pipeline can have different settings and
2051     * this won't work for merged shaders. To make it work, this requires LLVM
2052     * support for changing the register. The same logic applies for the
2053     * rounding modes because they are configured with the same config
2054     * register.
2055     */
2056    p->shaderDenormFlushToZeroFloat32 = true;
2057    p->shaderDenormPreserveFloat32 = !pdevice->use_llvm;
2058    p->shaderRoundingModeRTEFloat32 = true;
2059    p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;
2060    p->shaderSignedZeroInfNanPreserveFloat32 = true;
2061
2062    p->shaderDenormFlushToZeroFloat16 =
2063       pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
2064    p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
2065    p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
2066    p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
2067    p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
2068
2069    p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.gfx_level >= GFX8 && !pdevice->use_llvm;
2070    p->shaderDenormPreserveFloat64 = pdevice->rad_info.gfx_level >= GFX8;
2071    p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.gfx_level >= GFX8;
2072    p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.gfx_level >= GFX8 && !pdevice->use_llvm;
2073    p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.gfx_level >= GFX8;
2074
2075    p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
2076    p->shaderUniformBufferArrayNonUniformIndexingNative = false;
2077    p->shaderSampledImageArrayNonUniformIndexingNative = false;
2078    p->shaderStorageBufferArrayNonUniformIndexingNative = false;
2079    p->shaderStorageImageArrayNonUniformIndexingNative = false;
2080    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
2081    p->robustBufferAccessUpdateAfterBind = true;
2082    p->quadDivergentImplicitLod = false;
2083
2084    size_t max_descriptor_set_size =
2085       ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
2086        MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
2087       (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
2088        32 /* storage buffer, 32 due to potential space wasted on alignment */ +
2089        32 /* sampler, largest when combined with image */ + 64 /* sampled image */ +
2090        64 /* storage image */);
2091    p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
2092    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
2093    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
2094    p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
2095    p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
2096    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
2097    p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
2098    p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
2099    p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
2100    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
2101    p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
2102    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
2103    p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
2104    p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
2105    p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
2106
2107    /* We support all of the depth resolve modes */
2108    p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
2109                                    VK_RESOLVE_MODE_AVERAGE_BIT | VK_RESOLVE_MODE_MIN_BIT |
2110                                    VK_RESOLVE_MODE_MAX_BIT;
2111
2112    /* Average doesn't make sense for stencil so we don't support that */
2113    p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
2114                                      VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT;
2115
2116    p->independentResolveNone = true;
2117    p->independentResolve = true;
2118
2119    /* GFX6-8 only support single channel min/max filter. */
2120    p->filterMinmaxImageComponentMapping = pdevice->rad_info.gfx_level >= GFX9;
2121    p->filterMinmaxSingleComponentFormats = true;
2122
2123    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
2124
2125    p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
2126 }
2127
2128 static void
2129 radv_get_physical_device_properties_1_3(struct radv_physical_device *pdevice,
2130                                         VkPhysicalDeviceVulkan13Properties *p)
2131 {
2132    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES);
2133
2134    p->minSubgroupSize = 64;
2135    p->maxSubgroupSize = 64;
2136    p->maxComputeWorkgroupSubgroups = UINT32_MAX;
2137    p->requiredSubgroupSizeStages = 0;
2138    if (pdevice->rad_info.gfx_level >= GFX10) {
2139       /* Only GFX10+ supports wave32. */
2140       p->minSubgroupSize = 32;
2141       p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
2142    }
2143
2144    p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
2145    p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
2146    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
2147    p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
2148    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
2149    p->maxInlineUniformTotalSize = UINT16_MAX;
2150
2151    bool accel = pdevice->rad_info.has_accelerated_dot_product;
2152    p->integerDotProduct8BitUnsignedAccelerated = accel;
2153    p->integerDotProduct8BitSignedAccelerated = accel;
2154    p->integerDotProduct8BitMixedSignednessAccelerated = false;
2155    p->integerDotProduct4x8BitPackedUnsignedAccelerated = accel;
2156    p->integerDotProduct4x8BitPackedSignedAccelerated = accel;
2157    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = false;
2158    p->integerDotProduct16BitUnsignedAccelerated = accel;
2159    p->integerDotProduct16BitSignedAccelerated = accel;
2160    p->integerDotProduct16BitMixedSignednessAccelerated = false;
2161    p->integerDotProduct32BitUnsignedAccelerated = false;
2162    p->integerDotProduct32BitSignedAccelerated = false;
2163    p->integerDotProduct32BitMixedSignednessAccelerated = false;
2164    p->integerDotProduct64BitUnsignedAccelerated = false;
2165    p->integerDotProduct64BitSignedAccelerated = false;
2166    p->integerDotProduct64BitMixedSignednessAccelerated = false;
2167    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = accel;
2168    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = accel;
2169    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
2170    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = accel;
2171    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = accel;
2172    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false;
2173    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = accel;
2174    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = accel;
2175    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
2176    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
2177    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
2178    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
2179    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
2180    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
2181    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
2182
2183    p->storageTexelBufferOffsetAlignmentBytes = 4;
2184    p->storageTexelBufferOffsetSingleTexelAlignment = true;
2185    p->uniformTexelBufferOffsetAlignmentBytes = 4;
2186    p->uniformTexelBufferOffsetSingleTexelAlignment = true;
2187
2188    p->maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
2189 }
2190
2191 VKAPI_ATTR void VKAPI_CALL
2192 radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
2193                                   VkPhysicalDeviceProperties2 *pProperties)
2194 {
2195    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2196    radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
2197
2198    VkPhysicalDeviceVulkan11Properties core_1_1 = {
2199       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
2200    };
2201    radv_get_physical_device_properties_1_1(pdevice, &core_1_1);
2202
2203    VkPhysicalDeviceVulkan12Properties core_1_2 = {
2204       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
2205    };
2206    radv_get_physical_device_properties_1_2(pdevice, &core_1_2);
2207
2208    VkPhysicalDeviceVulkan13Properties core_1_3 = {
2209       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
2210    };
2211    radv_get_physical_device_properties_1_3(pdevice, &core_1_3);
2212
2213    vk_foreach_struct(ext, pProperties->pNext)
2214    {
2215       if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
2216          continue;
2217       if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
2218          continue;
2219       if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
2220          continue;
2221
2222       switch (ext->sType) {
2223       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
2224          VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
2225             (VkPhysicalDevicePushDescriptorPropertiesKHR *)ext;
2226          properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
2227          break;
2228       }
2229       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
2230          VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
2231             (VkPhysicalDeviceDiscardRectanglePropertiesEXT *)ext;
2232          properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
2233          break;
2234       }
2235       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
2236          VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
2237             (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *)ext;
2238          properties->minImportedHostPointerAlignment = 4096;
2239          break;
2240       }
2241       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
2242          VkPhysicalDeviceShaderCorePropertiesAMD *properties =
2243             (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
2244
2245          /* Shader engines. */
2246          properties->shaderEngineCount = pdevice->rad_info.max_se;
2247          properties->shaderArraysPerEngineCount = pdevice->rad_info.max_sa_per_se;
2248          properties->computeUnitsPerShaderArray = pdevice->rad_info.min_good_cu_per_sa;
2249          properties->simdPerComputeUnit = pdevice->rad_info.num_simd_per_compute_unit;
2250          properties->wavefrontsPerSimd = pdevice->rad_info.max_wave64_per_simd;
2251          properties->wavefrontSize = 64;
2252
2253          /* SGPR. */
2254          properties->sgprsPerSimd = pdevice->rad_info.num_physical_sgprs_per_simd;
2255          properties->minSgprAllocation = pdevice->rad_info.min_sgpr_alloc;
2256          properties->maxSgprAllocation = pdevice->rad_info.max_sgpr_alloc;
2257          properties->sgprAllocationGranularity = pdevice->rad_info.sgpr_alloc_granularity;
2258
2259          /* VGPR. */
2260          properties->vgprsPerSimd = pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
2261          properties->minVgprAllocation = pdevice->rad_info.min_wave64_vgpr_alloc;
2262          properties->maxVgprAllocation = pdevice->rad_info.max_vgpr_alloc;
2263          properties->vgprAllocationGranularity = pdevice->rad_info.wave64_vgpr_alloc_granularity;
2264          break;
2265       }
2266       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
2267          VkPhysicalDeviceShaderCoreProperties2AMD *properties =
2268             (VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
2269
2270          properties->shaderCoreFeatures = 0;
2271          properties->activeComputeUnitCount = pdevice->rad_info.num_good_compute_units;
2272          break;
2273       }
2274       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
2275          VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
2276             (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
2277          properties->maxVertexAttribDivisor = UINT32_MAX;
2278          break;
2279       }
2280       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
2281          VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
2282             (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
2283          properties->primitiveOverestimationSize = 0;
2284          properties->maxExtraPrimitiveOverestimationSize = 0;
2285          properties->extraPrimitiveOverestimationSizeGranularity = 0;
2286          properties->primitiveUnderestimation = false;
2287          properties->conservativePointAndLineRasterization = false;
2288          properties->degenerateTrianglesRasterized = true;
2289          properties->degenerateLinesRasterized = false;
2290          properties->fullyCoveredFragmentShaderInputVariable = false;
2291          properties->conservativeRasterizationPostDepthCoverage = false;
2292          break;
2293       }
2294 #ifndef _WIN32
2295       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
2296          VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
2297             (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
2298          properties->pciDomain = pdevice->bus_info.domain;
2299          properties->pciBus = pdevice->bus_info.bus;
2300          properties->pciDevice = pdevice->bus_info.dev;
2301          properties->pciFunction = pdevice->bus_info.func;
2302          break;
2303       }
2304 #endif
2305       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
2306          VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
2307             (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
2308          properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
2309          properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
2310          properties->maxTransformFeedbackBufferSize = UINT32_MAX;
2311          properties->maxTransformFeedbackStreamDataSize = 512;
2312          properties->maxTransformFeedbackBufferDataSize = 512;
2313          properties->maxTransformFeedbackBufferDataStride = 512;
2314          properties->transformFeedbackQueries = !pdevice->use_ngg_streamout;
2315          properties->transformFeedbackStreamsLinesTriangles = !pdevice->use_ngg_streamout;
2316          properties->transformFeedbackRasterizationStreamSelect = false;
2317          properties->transformFeedbackDraw = true;
2318          break;
2319       }
2320       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
2321          VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
2322             (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
2323          properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT |
2324                                                   VK_SAMPLE_COUNT_8_BIT;
2325          properties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
2326          properties->sampleLocationCoordinateRange[0] = 0.0f;
2327          properties->sampleLocationCoordinateRange[1] = 0.9375f;
2328          properties->sampleLocationSubPixelBits = 4;
2329          properties->variableSampleLocations = false;
2330          break;
2331       }
2332       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
2333          VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
2334             (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
2335          props->lineSubPixelPrecisionBits = 4;
2336          break;
2337       }
2338       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
2339          VkPhysicalDeviceRobustness2PropertiesEXT *properties =
2340             (VkPhysicalDeviceRobustness2PropertiesEXT *)ext;
2341          properties->robustStorageBufferAccessSizeAlignment = 4;
2342          properties->robustUniformBufferAccessSizeAlignment = 4;
2343          break;
2344       }
2345       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
2346          VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
2347             (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
2348          props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
2349          break;
2350       }
2351       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
2352          VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
2353             (VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
2354          props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
2355          props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
2356          props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
2357          props->primitiveFragmentShadingRateWithMultipleViewports = true;
2358          props->layeredShadingRateAttachments = false; /* TODO */
2359          props->fragmentShadingRateNonTrivialCombinerOps = true;
2360          props->maxFragmentSize = (VkExtent2D){2, 2};
2361          props->maxFragmentSizeAspectRatio = 2;
2362          props->maxFragmentShadingRateCoverageSamples = 32;
2363          props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT;
2364          props->fragmentShadingRateWithShaderDepthStencilWrites = false;
2365          props->fragmentShadingRateWithSampleMask = true;
2366          props->fragmentShadingRateWithShaderSampleMask = false;
2367          props->fragmentShadingRateWithConservativeRasterization = true;
2368          props->fragmentShadingRateWithFragmentShaderInterlock = false;
2369          props->fragmentShadingRateWithCustomSampleLocations = false;
2370          props->fragmentShadingRateStrictMultiplyCombiner = true;
2371          break;
2372       }
2373       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
2374          VkPhysicalDeviceProvokingVertexPropertiesEXT *props =
2375             (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
2376          props->provokingVertexModePerPipeline = true;
2377          props->transformFeedbackPreservesTriangleFanProvokingVertex = true;
2378          break;
2379       }
2380       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR: {
2381          VkPhysicalDeviceAccelerationStructurePropertiesKHR *props =
2382             (VkPhysicalDeviceAccelerationStructurePropertiesKHR *)ext;
2383          props->maxGeometryCount = (1 << 24) - 1;
2384          props->maxInstanceCount = (1 << 24) - 1;
2385          props->maxPrimitiveCount = (1 << 29) - 1;
2386          props->maxPerStageDescriptorAccelerationStructures =
2387             pProperties->properties.limits.maxPerStageDescriptorStorageBuffers;
2388          props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures =
2389             pProperties->properties.limits.maxPerStageDescriptorStorageBuffers;
2390          props->maxDescriptorSetAccelerationStructures =
2391             pProperties->properties.limits.maxDescriptorSetStorageBuffers;
2392          props->maxDescriptorSetUpdateAfterBindAccelerationStructures =
2393             pProperties->properties.limits.maxDescriptorSetStorageBuffers;
2394          props->minAccelerationStructureScratchOffsetAlignment = 128;
2395          break;
2396       }
2397 #ifndef _WIN32
2398       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
2399          VkPhysicalDeviceDrmPropertiesEXT *props = (VkPhysicalDeviceDrmPropertiesEXT *)ext;
2400          if (pdevice->available_nodes & (1 << DRM_NODE_PRIMARY)) {
2401             props->hasPrimary = true;
2402             props->primaryMajor = (int64_t)major(pdevice->primary_devid);
2403             props->primaryMinor = (int64_t)minor(pdevice->primary_devid);
2404          } else {
2405             props->hasPrimary = false;
2406          }
2407          if (pdevice->available_nodes & (1 << DRM_NODE_RENDER)) {
2408             props->hasRender = true;
2409             props->renderMajor = (int64_t)major(pdevice->render_devid);
2410             props->renderMinor = (int64_t)minor(pdevice->render_devid);
2411          } else {
2412             props->hasRender = false;
2413          }
2414          break;
2415       }
2416 #endif
2417       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
2418          VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
2419          props->maxMultiDrawCount = 2048;
2420          break;
2421       }
2422       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR: {
2423          VkPhysicalDeviceRayTracingPipelinePropertiesKHR *props =
2424             (VkPhysicalDeviceRayTracingPipelinePropertiesKHR *)ext;
2425          props->shaderGroupHandleSize = RADV_RT_HANDLE_SIZE;
2426          props->maxRayRecursionDepth = 31;    /* Minimum allowed for DXR. */
2427          props->maxShaderGroupStride = 16384; /* dummy */
2428          props->shaderGroupBaseAlignment = 16;
2429          props->shaderGroupHandleCaptureReplaySize = 16;
2430          props->maxRayDispatchInvocationCount = 1024 * 1024 * 64;
2431          props->shaderGroupHandleAlignment = 16;
2432          props->maxRayHitAttributeSize = RADV_MAX_HIT_ATTRIB_SIZE;
2433          break;
2434       }
2435       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES: {
2436          VkPhysicalDeviceMaintenance4Properties *properties =
2437             (VkPhysicalDeviceMaintenance4Properties *)ext;
2438          properties->maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
2439          break;
2440       }
2441       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV: {
2442          VkPhysicalDeviceMeshShaderPropertiesNV *properties =
2443             (VkPhysicalDeviceMeshShaderPropertiesNV *)ext;
2444
2445          /* Task shader limitations:
2446           * Same as compute, because TS are compiled to CS.
2447           */
2448          properties->maxDrawMeshTasksCount = 65535;
2449          properties->maxTaskTotalMemorySize = 65536;
2450          properties->maxTaskWorkGroupInvocations = 1024;
2451          properties->maxTaskWorkGroupSize[0] = 1024;
2452          properties->maxTaskWorkGroupSize[1] = 1024;
2453          properties->maxTaskWorkGroupSize[2] = 1024;
2454          properties->maxTaskOutputCount = 65535;
2455
2456          /* Mesh shader limitations:
2457           * Same as NGG, because MS are compiled to NGG.
2458           */
2459          properties->maxMeshMultiviewViewCount = MAX_VIEWS;
2460          properties->maxMeshOutputPrimitives = 256;
2461          properties->maxMeshOutputVertices = 256;
2462          properties->maxMeshTotalMemorySize = 31 * 1024; /* Reserve 1K for prim indices, etc. */
2463          properties->maxMeshWorkGroupInvocations = 256;
2464          properties->maxMeshWorkGroupSize[0] = 256;
2465          properties->maxMeshWorkGroupSize[1] = 256;
2466          properties->maxMeshWorkGroupSize[2] = 256;
2467          properties->meshOutputPerPrimitiveGranularity = 1;
2468          properties->meshOutputPerVertexGranularity = 1;
2469
2470          break;
2471       }
2472       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT: {
2473          VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *properties =
2474             (VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext;
2475          STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
2476                        sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
2477          memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
2478                 vk_shaderModuleIdentifierAlgorithmUUID,
2479                 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
2480          break;
2481       }
2482       default:
2483          break;
2484       }
2485    }
2486 }
2487
2488 static void
2489 radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice,
2490                                                  uint32_t *pCount,
2491                                                  VkQueueFamilyProperties **pQueueFamilyProperties)
2492 {
2493    int num_queue_families = 1;
2494    int idx;
2495    if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 &&
2496        !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
2497       num_queue_families++;
2498
2499    if (pQueueFamilyProperties == NULL) {
2500       *pCount = num_queue_families;
2501       return;
2502    }
2503
2504    if (!*pCount)
2505       return;
2506
2507    idx = 0;
2508    if (*pCount >= 1) {
2509       *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2510          .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT |
2511                        VK_QUEUE_SPARSE_BINDING_BIT,
2512          .queueCount = 1,
2513          .timestampValidBits = 64,
2514          .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2515       };
2516       idx++;
2517    }
2518
2519    if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 &&
2520        !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
2521       if (*pCount > idx) {
2522          *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2523             .queueFlags =
2524                VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
2525             .queueCount = pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues,
2526             .timestampValidBits = 64,
2527             .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2528          };
2529          idx++;
2530       }
2531    }
2532    *pCount = idx;
2533 }
2534
2535 static const VkQueueGlobalPriorityEXT radv_global_queue_priorities[] = {
2536    VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT,
2537    VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT,
2538    VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT,
2539    VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT,
2540 };
2541
2542 VKAPI_ATTR void VKAPI_CALL
2543 radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pCount,
2544                                              VkQueueFamilyProperties2 *pQueueFamilyProperties)
2545 {
2546    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2547    if (!pQueueFamilyProperties) {
2548       radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2549       return;
2550    }
2551    VkQueueFamilyProperties *properties[] = {
2552       &pQueueFamilyProperties[0].queueFamilyProperties,
2553       &pQueueFamilyProperties[1].queueFamilyProperties,
2554       &pQueueFamilyProperties[2].queueFamilyProperties,
2555    };
2556    radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2557    assert(*pCount <= 3);
2558
2559    for (uint32_t i = 0; i < *pCount; i++) {
2560       vk_foreach_struct(ext, pQueueFamilyProperties[i].pNext)
2561       {
2562          switch (ext->sType) {
2563          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_EXT: {
2564             VkQueueFamilyGlobalPriorityPropertiesEXT *prop =
2565                (VkQueueFamilyGlobalPriorityPropertiesEXT *)ext;
2566             STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <= VK_MAX_GLOBAL_PRIORITY_SIZE_EXT);
2567             prop->priorityCount = ARRAY_SIZE(radv_global_queue_priorities);
2568             memcpy(&prop->priorities, radv_global_queue_priorities, sizeof(radv_global_queue_priorities));
2569             break;
2570          }
2571          default:
2572             break;
2573          }
2574       }
2575    }
2576 }
2577
2578 static void
2579 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
2580                                   VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2581 {
2582    RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2583    VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
2584
2585    /* For all memory heaps, the computation of budget is as follow:
2586     *   heap_budget = heap_size - global_heap_usage + app_heap_usage
2587     *
2588     * The Vulkan spec 1.1.97 says that the budget should include any
2589     * currently allocated device memory.
2590     *
2591     * Note that the application heap usages are not really accurate (eg.
2592     * in presence of shared buffers).
2593     */
2594    if (!device->rad_info.has_dedicated_vram) {
2595       /* On APUs, the driver exposes fake heaps to the application because usually the carveout is
2596        * too small for games but the budgets need to be redistributed accordingly.
2597        */
2598
2599       assert(device->heaps == (RADV_HEAP_GTT | RADV_HEAP_VRAM_VIS));
2600       assert(device->memory_properties.memoryHeaps[0].flags == 0); /* GTT */
2601       assert(device->memory_properties.memoryHeaps[1].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
2602       uint8_t gtt_heap_idx = 0, vram_vis_heap_idx = 1;
2603
2604       /* Get the visible VRAM/GTT heap sizes and internal usages. */
2605       uint64_t gtt_heap_size = device->memory_properties.memoryHeaps[gtt_heap_idx].size;
2606       uint64_t vram_vis_heap_size = device->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
2607
2608       uint64_t vram_vis_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) +
2609                                          device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2610       uint64_t gtt_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
2611
2612       /* Compute the total heap size, internal and system usage. */
2613       uint64_t total_heap_size = vram_vis_heap_size + gtt_heap_size;
2614       uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
2615       uint64_t total_system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
2616                                     device->ws->query_value(device->ws, RADEON_GTT_USAGE);
2617
2618       uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
2619
2620       /* Compute the total free space that can be allocated for this process accross all heaps. */
2621       uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
2622
2623       /* Compute the remaining visible VRAM size for this process. */
2624       uint64_t vram_vis_free_space = vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage);
2625
2626       /* Distribute the total free space (2/3rd as VRAM and 1/3rd as GTT) to match the heap sizes,
2627        * and align down to the page size to be conservative.
2628        */
2629       vram_vis_free_space = ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space),
2630                                           device->rad_info.gart_page_size);
2631       uint64_t gtt_free_space = total_free_space - vram_vis_free_space;
2632
2633       memoryBudget->heapBudget[vram_vis_heap_idx] = vram_vis_free_space + vram_vis_internal_usage;
2634       memoryBudget->heapUsage[vram_vis_heap_idx] = vram_vis_internal_usage;
2635       memoryBudget->heapBudget[gtt_heap_idx] = gtt_free_space + gtt_internal_usage;
2636       memoryBudget->heapUsage[gtt_heap_idx] = gtt_internal_usage;
2637    } else {
2638       unsigned mask = device->heaps;
2639       unsigned heap = 0;
2640       while (mask) {
2641          uint64_t internal_usage = 0, system_usage = 0;
2642          unsigned type = 1u << u_bit_scan(&mask);
2643
2644          switch (type) {
2645          case RADV_HEAP_VRAM:
2646             internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2647             system_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE);
2648             break;
2649          case RADV_HEAP_VRAM_VIS:
2650             internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS);
2651             if (!(device->heaps & RADV_HEAP_VRAM))
2652                internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2653             system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE);
2654             break;
2655          case RADV_HEAP_GTT:
2656             internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
2657             system_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE);
2658             break;
2659          }
2660
2661          uint64_t total_usage = MAX2(internal_usage, system_usage);
2662
2663          uint64_t free_space = device->memory_properties.memoryHeaps[heap].size -
2664                                MIN2(device->memory_properties.memoryHeaps[heap].size, total_usage);
2665          memoryBudget->heapBudget[heap] = free_space + internal_usage;
2666          memoryBudget->heapUsage[heap] = internal_usage;
2667          ++heap;
2668       }
2669
2670       assert(heap == memory_properties->memoryHeapCount);
2671    }
2672
2673    /* The heapBudget and heapUsage values must be zero for array elements
2674     * greater than or equal to
2675     * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
2676     */
2677    for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
2678       memoryBudget->heapBudget[i] = 0;
2679       memoryBudget->heapUsage[i] = 0;
2680    }
2681 }
2682
2683 VKAPI_ATTR void VKAPI_CALL
2684 radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
2685                                         VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
2686 {
2687    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2688
2689    pMemoryProperties->memoryProperties = pdevice->memory_properties;
2690
2691    VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
2692       vk_find_struct(pMemoryProperties->pNext, PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
2693    if (memory_budget)
2694       radv_get_memory_budget_properties(physicalDevice, memory_budget);
2695 }
2696
2697 VKAPI_ATTR VkResult VKAPI_CALL
2698 radv_GetMemoryHostPointerPropertiesEXT(
2699    VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, const void *pHostPointer,
2700    VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
2701 {
2702    RADV_FROM_HANDLE(radv_device, device, _device);
2703
2704    switch (handleType) {
2705    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
2706       const struct radv_physical_device *physical_device = device->physical_device;
2707       uint32_t memoryTypeBits = 0;
2708       for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
2709          if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&
2710              !(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
2711             memoryTypeBits = (1 << i);
2712             break;
2713          }
2714       }
2715       pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
2716       return VK_SUCCESS;
2717    }
2718    default:
2719       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
2720    }
2721 }
2722
2723 static enum radeon_ctx_priority
2724 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
2725 {
2726    /* Default to MEDIUM when a specific global priority isn't requested */
2727    if (!pObj)
2728       return RADEON_CTX_PRIORITY_MEDIUM;
2729
2730    switch (pObj->globalPriority) {
2731    case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
2732       return RADEON_CTX_PRIORITY_REALTIME;
2733    case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
2734       return RADEON_CTX_PRIORITY_HIGH;
2735    case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
2736       return RADEON_CTX_PRIORITY_MEDIUM;
2737    case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
2738       return RADEON_CTX_PRIORITY_LOW;
2739    default:
2740       unreachable("Illegal global priority value");
2741       return RADEON_CTX_PRIORITY_INVALID;
2742    }
2743 }
2744
2745 int
2746 radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
2747                 const VkDeviceQueueCreateInfo *create_info,
2748                 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
2749 {
2750    queue->device = device;
2751    queue->priority = radv_get_queue_global_priority(global_priority);
2752    queue->hw_ctx = device->hw_ctx[queue->priority];
2753    queue->state.qf = vk_queue_to_radv(device->physical_device, create_info->queueFamilyIndex);
2754
2755    VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
2756    if (result != VK_SUCCESS)
2757       return result;
2758
2759    queue->vk.driver_submit = radv_queue_submit;
2760
2761    return VK_SUCCESS;
2762 }
2763
2764 static void
2765 radv_queue_state_finish(struct radv_queue_state *queue, struct radeon_winsys *ws)
2766 {
2767    if (queue->initial_full_flush_preamble_cs)
2768       ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2769    if (queue->initial_preamble_cs)
2770       ws->cs_destroy(queue->initial_preamble_cs);
2771    if (queue->continue_preamble_cs)
2772       ws->cs_destroy(queue->continue_preamble_cs);
2773    if (queue->descriptor_bo)
2774       ws->buffer_destroy(ws, queue->descriptor_bo);
2775    if (queue->scratch_bo)
2776       ws->buffer_destroy(ws, queue->scratch_bo);
2777    if (queue->esgs_ring_bo)
2778       ws->buffer_destroy(ws, queue->esgs_ring_bo);
2779    if (queue->gsvs_ring_bo)
2780       ws->buffer_destroy(ws, queue->gsvs_ring_bo);
2781    if (queue->tess_rings_bo)
2782       ws->buffer_destroy(ws, queue->tess_rings_bo);
2783    if (queue->task_rings_bo)
2784       ws->buffer_destroy(ws, queue->task_rings_bo);
2785    if (queue->gds_bo)
2786       ws->buffer_destroy(ws, queue->gds_bo);
2787    if (queue->gds_oa_bo)
2788       ws->buffer_destroy(ws, queue->gds_oa_bo);
2789    if (queue->compute_scratch_bo)
2790       ws->buffer_destroy(ws, queue->compute_scratch_bo);
2791 }
2792
2793 static void
2794 radv_queue_finish(struct radv_queue *queue)
2795 {
2796    radv_queue_state_finish(&queue->state, queue->device->ws);
2797    vk_queue_finish(&queue->vk);
2798 }
2799
2800 static VkResult
2801 radv_device_init_border_color(struct radv_device *device)
2802 {
2803    VkResult result;
2804
2805    result = device->ws->buffer_create(
2806       device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
2807       RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING,
2808       RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo);
2809
2810    if (result != VK_SUCCESS)
2811       return vk_error(device, result);
2812
2813    result = device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, true);
2814    if (result != VK_SUCCESS)
2815       return vk_error(device, result);
2816
2817    device->border_color_data.colors_gpu_ptr = device->ws->buffer_map(device->border_color_data.bo);
2818    if (!device->border_color_data.colors_gpu_ptr)
2819       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2820    mtx_init(&device->border_color_data.mutex, mtx_plain);
2821
2822    return VK_SUCCESS;
2823 }
2824
2825 static void
2826 radv_device_finish_border_color(struct radv_device *device)
2827 {
2828    if (device->border_color_data.bo) {
2829       device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, false);
2830       device->ws->buffer_destroy(device->ws, device->border_color_data.bo);
2831
2832       mtx_destroy(&device->border_color_data.mutex);
2833    }
2834 }
2835
2836 static VkResult
2837 radv_device_init_vs_prologs(struct radv_device *device)
2838 {
2839    u_rwlock_init(&device->vs_prologs_lock);
2840    device->vs_prologs = _mesa_hash_table_create(NULL, &radv_hash_vs_prolog, &radv_cmp_vs_prolog);
2841    if (!device->vs_prologs)
2842       return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2843
2844    /* don't pre-compile prologs if we want to print them */
2845    if (device->instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS)
2846       return VK_SUCCESS;
2847
2848    struct radv_vs_input_state state;
2849    state.nontrivial_divisors = 0;
2850    memset(state.offsets, 0, sizeof(state.offsets));
2851    state.alpha_adjust_lo = 0;
2852    state.alpha_adjust_hi = 0;
2853    memset(state.formats, 0, sizeof(state.formats));
2854
2855    struct radv_vs_prolog_key key;
2856    key.state = &state;
2857    key.misaligned_mask = 0;
2858    key.as_ls = false;
2859    key.is_ngg = device->physical_device->use_ngg;
2860    key.next_stage = MESA_SHADER_VERTEX;
2861    key.wave32 = device->physical_device->ge_wave_size == 32;
2862
2863    for (unsigned i = 1; i <= MAX_VERTEX_ATTRIBS; i++) {
2864       state.attribute_mask = BITFIELD_MASK(i);
2865       state.instance_rate_inputs = 0;
2866
2867       key.num_attributes = i;
2868
2869       device->simple_vs_prologs[i - 1] = radv_create_vs_prolog(device, &key);
2870       if (!device->simple_vs_prologs[i - 1])
2871          return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2872    }
2873
2874    unsigned idx = 0;
2875    for (unsigned num_attributes = 1; num_attributes <= 16; num_attributes++) {
2876       state.attribute_mask = BITFIELD_MASK(num_attributes);
2877
2878       for (unsigned i = 0; i < num_attributes; i++)
2879          state.divisors[i] = 1;
2880
2881       for (unsigned count = 1; count <= num_attributes; count++) {
2882          for (unsigned start = 0; start <= (num_attributes - count); start++) {
2883             state.instance_rate_inputs = u_bit_consecutive(start, count);
2884
2885             key.num_attributes = num_attributes;
2886
2887             struct radv_shader_part *prolog = radv_create_vs_prolog(device, &key);
2888             if (!prolog)
2889                return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2890
2891             assert(idx ==
2892                    radv_instance_rate_prolog_index(num_attributes, state.instance_rate_inputs));
2893             device->instance_rate_vs_prologs[idx++] = prolog;
2894          }
2895       }
2896    }
2897    assert(idx == ARRAY_SIZE(device->instance_rate_vs_prologs));
2898
2899    return VK_SUCCESS;
2900 }
2901
2902 static void
2903 radv_device_finish_vs_prologs(struct radv_device *device)
2904 {
2905    if (device->vs_prologs) {
2906       hash_table_foreach(device->vs_prologs, entry)
2907       {
2908          free((void *)entry->key);
2909          radv_shader_part_destroy(device, entry->data);
2910       }
2911       _mesa_hash_table_destroy(device->vs_prologs, NULL);
2912    }
2913
2914    for (unsigned i = 0; i < ARRAY_SIZE(device->simple_vs_prologs); i++)
2915       radv_shader_part_destroy(device, device->simple_vs_prologs[i]);
2916
2917    for (unsigned i = 0; i < ARRAY_SIZE(device->instance_rate_vs_prologs); i++)
2918       radv_shader_part_destroy(device, device->instance_rate_vs_prologs[i]);
2919 }
2920
2921 VkResult
2922 radv_device_init_vrs_state(struct radv_device *device)
2923 {
2924    /* FIXME: 4k depth buffers should be large enough for now but we might want to adjust this
2925     * dynamically at some point.
2926     */
2927    uint32_t width = 4096, height = 4096;
2928    VkDeviceMemory mem;
2929    VkBuffer buffer;
2930    VkResult result;
2931    VkImage image;
2932
2933    VkImageCreateInfo image_create_info = {
2934       .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2935       .imageType = VK_IMAGE_TYPE_2D,
2936       .format = VK_FORMAT_D16_UNORM,
2937       .extent = {width, height, 1},
2938       .mipLevels = 1,
2939       .arrayLayers = 1,
2940       .samples = VK_SAMPLE_COUNT_1_BIT,
2941       .tiling = VK_IMAGE_TILING_OPTIMAL,
2942       .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
2943       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2944       .queueFamilyIndexCount = 0,
2945       .pQueueFamilyIndices = NULL,
2946       .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2947    };
2948
2949    result = radv_CreateImage(radv_device_to_handle(device), &image_create_info,
2950                              &device->meta_state.alloc, &image);
2951    if (result != VK_SUCCESS)
2952       return result;
2953
2954    VkBufferCreateInfo buffer_create_info = {
2955       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
2956       .size = radv_image_from_handle(image)->planes[0].surface.meta_size,
2957       .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
2958       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2959    };
2960
2961    result = radv_CreateBuffer(radv_device_to_handle(device), &buffer_create_info,
2962                               &device->meta_state.alloc, &buffer);
2963    if (result != VK_SUCCESS)
2964       goto fail_create;
2965
2966    VkBufferMemoryRequirementsInfo2 info = {
2967       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
2968       .buffer = buffer,
2969    };
2970    VkMemoryRequirements2 mem_req = {
2971       .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2972    };
2973    radv_GetBufferMemoryRequirements2(radv_device_to_handle(device), &info, &mem_req);
2974
2975    VkMemoryAllocateInfo alloc_info = {
2976       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2977       .allocationSize = mem_req.memoryRequirements.size,
2978    };
2979
2980    result = radv_AllocateMemory(radv_device_to_handle(device), &alloc_info,
2981                                 &device->meta_state.alloc, &mem);
2982    if (result != VK_SUCCESS)
2983       goto fail_alloc;
2984
2985    VkBindBufferMemoryInfo bind_info = {
2986       .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
2987       .buffer = buffer,
2988       .memory = mem,
2989       .memoryOffset = 0
2990    };
2991
2992    result = radv_BindBufferMemory2(radv_device_to_handle(device), 1, &bind_info);
2993    if (result != VK_SUCCESS)
2994       goto fail_bind;
2995
2996    device->vrs.image = radv_image_from_handle(image);
2997    device->vrs.buffer = radv_buffer_from_handle(buffer);
2998    device->vrs.mem = radv_device_memory_from_handle(mem);
2999
3000    return VK_SUCCESS;
3001
3002 fail_bind:
3003    radv_FreeMemory(radv_device_to_handle(device), mem, &device->meta_state.alloc);
3004 fail_alloc:
3005    radv_DestroyBuffer(radv_device_to_handle(device), buffer, &device->meta_state.alloc);
3006 fail_create:
3007    radv_DestroyImage(radv_device_to_handle(device), image, &device->meta_state.alloc);
3008
3009    return result;
3010 }
3011
3012 static void
3013 radv_device_finish_vrs_image(struct radv_device *device)
3014 {
3015    if (!device->vrs.image)
3016       return;
3017
3018    radv_FreeMemory(radv_device_to_handle(device), radv_device_memory_to_handle(device->vrs.mem),
3019                    &device->meta_state.alloc);
3020    radv_DestroyBuffer(radv_device_to_handle(device), radv_buffer_to_handle(device->vrs.buffer),
3021                      &device->meta_state.alloc);
3022    radv_DestroyImage(radv_device_to_handle(device), radv_image_to_handle(device->vrs.image),
3023                      &device->meta_state.alloc);
3024 }
3025
3026 static enum radv_force_vrs
3027 radv_parse_vrs_rates(const char *str)
3028 {
3029    if (!strcmp(str, "2x2")) {
3030       return RADV_FORCE_VRS_2x2;
3031    } else if (!strcmp(str, "2x1")) {
3032       return RADV_FORCE_VRS_2x1;
3033    } else if (!strcmp(str, "1x2")) {
3034       return RADV_FORCE_VRS_1x2;
3035    } else if (!strcmp(str, "1x1")) {
3036       return RADV_FORCE_VRS_1x1;
3037    }
3038
3039    fprintf(stderr, "radv: Invalid VRS rates specified (valid values are 2x2, 2x1, 1x2 and 1x1)\n");
3040    return RADV_FORCE_VRS_1x1;
3041 }
3042
3043 static const char *
3044 radv_get_force_vrs_config_file(void)
3045 {
3046    return getenv("RADV_FORCE_VRS_CONFIG_FILE");
3047 }
3048
3049 static enum radv_force_vrs
3050 radv_parse_force_vrs_config_file(const char *config_file)
3051 {
3052    enum radv_force_vrs force_vrs = RADV_FORCE_VRS_1x1;
3053    char buf[4];
3054    FILE *f;
3055
3056    f = fopen(config_file, "r");
3057    if (!f) {
3058       fprintf(stderr, "radv: Can't open file: '%s'.\n", config_file);
3059       return force_vrs;
3060    }
3061
3062    if (fread(buf, sizeof(buf), 1, f) == 1) {
3063       buf[3] = '\0';
3064       force_vrs = radv_parse_vrs_rates(buf);
3065    }
3066
3067    fclose(f);
3068    return force_vrs;
3069 }
3070
3071 #ifdef __linux__
3072
3073 #define BUF_LEN ((10 * (sizeof(struct inotify_event) + NAME_MAX + 1)))
3074
3075 static int
3076 radv_notifier_thread_run(void *data)
3077 {
3078    struct radv_device *device = data;
3079    struct radv_notifier *notifier = &device->notifier;
3080    char buf[BUF_LEN];
3081
3082    while (!notifier->quit) {
3083       const char *file = radv_get_force_vrs_config_file();
3084       struct timespec tm = { .tv_nsec = 100000000 }; /* 1OOms */
3085       int length, i = 0;
3086
3087       length = read(notifier->fd, buf, BUF_LEN);
3088       while (i < length) {
3089          struct inotify_event *event = (struct inotify_event *)&buf[i];
3090
3091          i += sizeof(struct inotify_event) + event->len;
3092          if (event->mask & IN_MODIFY || event->mask & IN_DELETE_SELF) {
3093             /* Sleep 100ms for editors that use a temporary file and delete the original. */
3094             thrd_sleep(&tm, NULL);
3095             device->force_vrs = radv_parse_force_vrs_config_file(file);
3096
3097             fprintf(stderr, "radv: Updated the per-vertex VRS rate to '%d'.\n", device->force_vrs);
3098
3099             if (event->mask & IN_DELETE_SELF) {
3100                inotify_rm_watch(notifier->fd, notifier->watch);
3101                notifier->watch = inotify_add_watch(notifier->fd, file, IN_MODIFY | IN_DELETE_SELF);
3102             }
3103          }
3104       }
3105
3106       thrd_sleep(&tm, NULL);
3107    }
3108
3109    return 0;
3110 }
3111
3112 #endif
3113
3114 static int
3115 radv_device_init_notifier(struct radv_device *device)
3116 {
3117 #ifndef __linux__
3118    return true;
3119 #else
3120    struct radv_notifier *notifier = &device->notifier;
3121    const char *file = radv_get_force_vrs_config_file();
3122    int ret;
3123
3124    notifier->fd = inotify_init1(IN_NONBLOCK);
3125    if (notifier->fd < 0)
3126       return false;
3127
3128    notifier->watch = inotify_add_watch(notifier->fd, file, IN_MODIFY | IN_DELETE_SELF);
3129    if (notifier->watch < 0)
3130       goto fail_watch;
3131
3132    ret = thrd_create(&notifier->thread, radv_notifier_thread_run, device);
3133    if (ret)
3134       goto fail_thread;
3135
3136    return true;
3137
3138 fail_thread:
3139    inotify_rm_watch(notifier->fd, notifier->watch);
3140 fail_watch:
3141    close(notifier->fd);
3142
3143    return false;
3144 #endif
3145 }
3146
3147 static void
3148 radv_device_finish_notifier(struct radv_device *device)
3149 {
3150 #ifdef __linux__
3151    struct radv_notifier *notifier = &device->notifier;
3152
3153    if (!notifier->thread)
3154       return;
3155
3156    notifier->quit = true;
3157    thrd_join(notifier->thread, NULL);
3158    inotify_rm_watch(notifier->fd, notifier->watch);
3159    close(notifier->fd);
3160 #endif
3161 }
3162
3163 static void
3164 radv_device_finish_perf_counter_lock_cs(struct radv_device *device)
3165 {
3166    if (!device->perf_counter_lock_cs)
3167       return;
3168
3169    for (unsigned i = 0; i < 2 * PERF_CTR_MAX_PASSES; ++i) {
3170       if (device->perf_counter_lock_cs[i])
3171          device->ws->cs_destroy(device->perf_counter_lock_cs[i]);
3172    }
3173
3174    free(device->perf_counter_lock_cs);
3175 }
3176
3177 VKAPI_ATTR VkResult VKAPI_CALL
3178 radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo,
3179                   const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
3180 {
3181    RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
3182    VkResult result;
3183    struct radv_device *device;
3184
3185    bool keep_shader_info = false;
3186    bool robust_buffer_access = false;
3187    bool robust_buffer_access2 = false;
3188    bool overallocation_disallowed = false;
3189    bool custom_border_colors = false;
3190    bool attachment_vrs_enabled = false;
3191    bool image_float32_atomics = false;
3192    bool vs_prologs = false;
3193    bool global_bo_list = false;
3194    bool image_2d_view_of_3d = false;
3195    bool primitives_generated_query = false;
3196    bool use_perf_counters = false;
3197
3198    /* Check enabled features */
3199    if (pCreateInfo->pEnabledFeatures) {
3200       if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
3201          robust_buffer_access = true;
3202    }
3203
3204    vk_foreach_struct_const(ext, pCreateInfo->pNext)
3205    {
3206       switch (ext->sType) {
3207       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
3208          const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
3209          if (features->features.robustBufferAccess)
3210             robust_buffer_access = true;
3211          break;
3212       }
3213       case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
3214          const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
3215          if (overallocation->overallocationBehavior ==
3216              VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
3217             overallocation_disallowed = true;
3218          break;
3219       }
3220       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
3221          const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features =
3222             (const void *)ext;
3223          custom_border_colors = border_color_features->customBorderColors;
3224          break;
3225       }
3226       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
3227          const VkPhysicalDeviceFragmentShadingRateFeaturesKHR *vrs = (const void *)ext;
3228          attachment_vrs_enabled = vrs->attachmentFragmentShadingRate;
3229          break;
3230       }
3231       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
3232          const VkPhysicalDeviceRobustness2FeaturesEXT *features = (const void *)ext;
3233          if (features->robustBufferAccess2)
3234             robust_buffer_access2 = true;
3235          break;
3236       }
3237       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
3238          const VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features = (const void *)ext;
3239          if (features->shaderImageFloat32Atomics ||
3240              features->sparseImageFloat32Atomics)
3241             image_float32_atomics = true;
3242          break;
3243       }
3244       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: {
3245          const VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features = (const void *)ext;
3246          if (features->shaderImageFloat32AtomicMinMax ||
3247              features->sparseImageFloat32AtomicMinMax)
3248             image_float32_atomics = true;
3249          break;
3250       }
3251       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT: {
3252          const VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *features = (const void *)ext;
3253          if (features->vertexInputDynamicState)
3254             vs_prologs = true;
3255          break;
3256       }
3257       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: {
3258          const VkPhysicalDeviceVulkan12Features *features = (const void *)ext;
3259          if (features->bufferDeviceAddress || features->descriptorIndexing)
3260             global_bo_list = true;
3261          break;
3262       }
3263       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT: {
3264          const VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *features = (const void *)ext;
3265          if (features->image2DViewOf3D)
3266             image_2d_view_of_3d = true;
3267          break;
3268       }
3269       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: {
3270          const VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features = (const void *)ext;
3271          if (features->primitivesGeneratedQuery ||
3272              features->primitivesGeneratedQueryWithRasterizerDiscard ||
3273              features->primitivesGeneratedQueryWithNonZeroStreams)
3274             primitives_generated_query = true;
3275          break;
3276       }
3277       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
3278          const VkPhysicalDevicePerformanceQueryFeaturesKHR *features = (const void *)ext;
3279          if (features->performanceCounterQueryPools)
3280             use_perf_counters = true;
3281          break;
3282       }
3283       default:
3284          break;
3285       }
3286    }
3287
3288    device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
3289                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3290    if (!device)
3291       return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3292
3293    struct vk_device_dispatch_table dispatch_table;
3294
3295    if (physical_device->instance->vk.app_info.app_name &&
3296        !strcmp(physical_device->instance->vk.app_info.app_name, "metroexodus")) {
3297       /* Metro Exodus (Linux native) calls vkGetSemaphoreCounterValue() with a NULL semaphore and it
3298        * crashes sometimes.  Workaround this game bug by enabling an internal layer. Remove this
3299        * when the game is fixed.
3300        */
3301       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &metro_exodus_device_entrypoints, true);
3302       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false);
3303    } else if (radv_thread_trace_enabled()) {
3304       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &sqtt_device_entrypoints, true);
3305       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false);
3306    } else {
3307       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, true);
3308    }
3309    vk_device_dispatch_table_from_entrypoints(&dispatch_table, &wsi_device_entrypoints, false);
3310
3311    result =
3312       vk_device_init(&device->vk, &physical_device->vk, &dispatch_table, pCreateInfo, pAllocator);
3313    if (result != VK_SUCCESS) {
3314       vk_free(&device->vk.alloc, device);
3315       return result;
3316    }
3317
3318    device->instance = physical_device->instance;
3319    device->physical_device = physical_device;
3320    simple_mtx_init(&device->trace_mtx, mtx_plain);
3321    simple_mtx_init(&device->pstate_mtx, mtx_plain);
3322
3323    device->ws = physical_device->ws;
3324    vk_device_set_drm_fd(&device->vk, device->ws->get_fd(device->ws));
3325
3326    /* With update after bind we can't attach bo's to the command buffer
3327     * from the descriptor set anymore, so we have to use a global BO list.
3328     */
3329    device->use_global_bo_list = global_bo_list ||
3330                                 (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
3331                                 device->vk.enabled_extensions.EXT_descriptor_indexing ||
3332                                 device->vk.enabled_extensions.EXT_buffer_device_address ||
3333                                 device->vk.enabled_extensions.KHR_buffer_device_address ||
3334                                 device->vk.enabled_extensions.KHR_ray_tracing_pipeline ||
3335                                 device->vk.enabled_extensions.KHR_acceleration_structure ||
3336                                 device->vk.enabled_extensions.VALVE_descriptor_set_host_mapping;
3337
3338    device->robust_buffer_access = robust_buffer_access || robust_buffer_access2;
3339    device->robust_buffer_access2 = robust_buffer_access2;
3340
3341    device->attachment_vrs_enabled = attachment_vrs_enabled;
3342
3343    device->image_float32_atomics = image_float32_atomics;
3344
3345    device->image_2d_view_of_3d = image_2d_view_of_3d;
3346
3347    device->primitives_generated_query = primitives_generated_query;
3348
3349    radv_init_shader_arenas(device);
3350
3351    device->overallocation_disallowed = overallocation_disallowed;
3352    mtx_init(&device->overallocation_mutex, mtx_plain);
3353
3354    /* Create one context per queue priority. */
3355    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3356       const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
3357       const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
3358          vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
3359       enum radeon_ctx_priority priority = radv_get_queue_global_priority(global_priority);
3360
3361       if (device->hw_ctx[priority])
3362          continue;
3363
3364       result = device->ws->ctx_create(device->ws, priority, &device->hw_ctx[priority]);
3365       if (result != VK_SUCCESS)
3366          goto fail;
3367    }
3368
3369    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3370       const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
3371       uint32_t qfi = queue_create->queueFamilyIndex;
3372       const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
3373          vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
3374
3375       device->queues[qfi] =
3376          vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(struct radv_queue), 8,
3377                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3378       if (!device->queues[qfi]) {
3379          result = VK_ERROR_OUT_OF_HOST_MEMORY;
3380          goto fail;
3381       }
3382
3383       memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
3384
3385       device->queue_count[qfi] = queue_create->queueCount;
3386
3387       for (unsigned q = 0; q < queue_create->queueCount; q++) {
3388          result = radv_queue_init(device, &device->queues[qfi][q], q, queue_create, global_priority);
3389          if (result != VK_SUCCESS)
3390             goto fail;
3391       }
3392    }
3393    device->private_sdma_queue = VK_NULL_HANDLE;
3394
3395    device->pbb_allowed = device->physical_device->rad_info.gfx_level >= GFX9 &&
3396                          !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
3397
3398    /* The maximum number of scratch waves. Scratch space isn't divided
3399     * evenly between CUs. The number is only a function of the number of CUs.
3400     * We can decrease the constant to decrease the scratch buffer size.
3401     *
3402     * sctx->scratch_waves must be >= the maximum possible size of
3403     * 1 threadgroup, so that the hw doesn't hang from being unable
3404     * to start any.
3405     *
3406     * The recommended value is 4 per CU at most. Higher numbers don't
3407     * bring much benefit, but they still occupy chip resources (think
3408     * async compute). I've seen ~2% performance difference between 4 and 32.
3409     */
3410    uint32_t max_threads_per_block = 2048;
3411    device->scratch_waves =
3412       MAX2(32 * physical_device->rad_info.num_good_compute_units, max_threads_per_block / 64);
3413
3414    device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
3415
3416    if (device->physical_device->rad_info.gfx_level >= GFX7) {
3417       /* If the KMD allows it (there is a KMD hw register for it),
3418        * allow launching waves out-of-order.
3419        */
3420       device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
3421    }
3422
3423    if (device->instance->debug_flags & RADV_DEBUG_HANG) {
3424       /* Enable GPU hangs detection and dump logs if a GPU hang is
3425        * detected.
3426        */
3427       keep_shader_info = true;
3428
3429       if (!radv_init_trace(device))
3430          goto fail;
3431
3432       fprintf(stderr,
3433               "*****************************************************************************\n");
3434       fprintf(stderr,
3435               "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
3436       fprintf(stderr,
3437               "*****************************************************************************\n");
3438
3439       /* Wait for idle after every draw/dispatch to identify the
3440        * first bad call.
3441        */
3442       device->instance->debug_flags |= RADV_DEBUG_SYNC_SHADERS;
3443
3444       radv_dump_enabled_options(device, stderr);
3445    }
3446
3447    if (radv_thread_trace_enabled()) {
3448       if (device->physical_device->rad_info.gfx_level < GFX8 ||
3449           device->physical_device->rad_info.gfx_level > GFX10_3) {
3450          fprintf(stderr, "GPU hardware not supported: refer to "
3451                          "the RGP documentation for the list of "
3452                          "supported GPUs!\n");
3453          abort();
3454       }
3455
3456       if (!radv_thread_trace_init(device))
3457          goto fail;
3458
3459       fprintf(stderr, "radv: Thread trace support is enabled (initial buffer size: %u MiB, "
3460                       "instruction timing: %s, cache counters: %s).\n",
3461               device->thread_trace.buffer_size / (1024 * 1024),
3462               radv_is_instruction_timing_enabled() ? "enabled" : "disabled",
3463               radv_spm_trace_enabled() ? "enabled" : "disabled");
3464
3465       if (radv_spm_trace_enabled()) {
3466          if (device->physical_device->rad_info.gfx_level >= GFX10) {
3467             if (!radv_spm_init(device))
3468                goto fail;
3469          } else {
3470             fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n",
3471                     device->physical_device->name);
3472          }
3473       }
3474    }
3475
3476    if (getenv("RADV_TRAP_HANDLER")) {
3477       /* TODO: Add support for more hardware. */
3478       assert(device->physical_device->rad_info.gfx_level == GFX8);
3479
3480       fprintf(stderr, "**********************************************************************\n");
3481       fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
3482       fprintf(stderr, "**********************************************************************\n");
3483
3484       /* To get the disassembly of the faulty shaders, we have to
3485        * keep some shader info around.
3486        */
3487       keep_shader_info = true;
3488
3489       if (!radv_trap_handler_init(device))
3490          goto fail;
3491    }
3492
3493    if (device->physical_device->rad_info.gfx_level >= GFX10_3) {
3494       if (getenv("RADV_FORCE_VRS_CONFIG_FILE")) {
3495          const char *file = radv_get_force_vrs_config_file();
3496
3497          device->force_vrs = radv_parse_force_vrs_config_file(file);
3498
3499          if (radv_device_init_notifier(device)) {
3500             device->force_vrs_enabled = true;
3501          } else {
3502             fprintf(stderr, "radv: Failed to initialize the notifier for RADV_FORCE_VRS_CONFIG_FILE!\n");
3503          }
3504       } else if (getenv("RADV_FORCE_VRS")) {
3505          const char *vrs_rates = getenv("RADV_FORCE_VRS");
3506
3507          device->force_vrs = radv_parse_vrs_rates(vrs_rates);
3508          device->force_vrs_enabled = device->force_vrs != RADV_FORCE_VRS_1x1;
3509       }
3510    }
3511
3512    /* PKT3_LOAD_SH_REG_INDEX is supported on GFX8+, but it hangs with compute queues until GFX10.3. */
3513    device->load_grid_size_from_user_sgpr = device->physical_device->rad_info.gfx_level >= GFX10_3;
3514
3515    device->keep_shader_info = keep_shader_info;
3516    result = radv_device_init_meta(device);
3517    if (result != VK_SUCCESS)
3518       goto fail;
3519
3520    radv_device_init_msaa(device);
3521
3522    /* If the border color extension is enabled, let's create the buffer we need. */
3523    if (custom_border_colors) {
3524       result = radv_device_init_border_color(device);
3525       if (result != VK_SUCCESS)
3526          goto fail;
3527    }
3528
3529    if (vs_prologs) {
3530       result = radv_device_init_vs_prologs(device);
3531       if (result != VK_SUCCESS)
3532          goto fail;
3533    }
3534
3535    if (device->physical_device->rad_info.gfx_level >= GFX7)
3536       cik_create_gfx_config(device);
3537
3538    VkPipelineCacheCreateInfo ci;
3539    ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
3540    ci.pNext = NULL;
3541    ci.flags = 0;
3542    ci.pInitialData = NULL;
3543    ci.initialDataSize = 0;
3544    VkPipelineCache pc;
3545    result = radv_CreatePipelineCache(radv_device_to_handle(device), &ci, NULL, &pc);
3546    if (result != VK_SUCCESS)
3547       goto fail_meta;
3548
3549    device->mem_cache = radv_pipeline_cache_from_handle(pc);
3550
3551    device->force_aniso = MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
3552    if (device->force_aniso >= 0) {
3553       fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
3554               1 << util_logbase2(device->force_aniso));
3555    }
3556
3557    if (use_perf_counters) {
3558       size_t bo_size = PERF_CTR_BO_PASS_OFFSET + sizeof(uint64_t) * PERF_CTR_MAX_PASSES;
3559       result =
3560          device->ws->buffer_create(device->ws, bo_size, 4096, RADEON_DOMAIN_GTT,
3561                                    RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
3562                                    RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->perf_counter_bo);
3563       if (result != VK_SUCCESS)
3564          goto fail_cache;
3565
3566       device->perf_counter_lock_cs =
3567          calloc(sizeof(struct radeon_winsys_cs *), 2 * PERF_CTR_MAX_PASSES);
3568       if (!device->perf_counter_lock_cs) {
3569          result = VK_ERROR_OUT_OF_HOST_MEMORY;
3570          goto fail_cache;
3571       }
3572
3573       if (!device->physical_device->ac_perfcounters.blocks) {
3574          result = VK_ERROR_INITIALIZATION_FAILED;
3575          goto fail_cache;
3576       }
3577    }
3578
3579    *pDevice = radv_device_to_handle(device);
3580    return VK_SUCCESS;
3581
3582 fail_cache:
3583    radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
3584 fail_meta:
3585    radv_device_finish_meta(device);
3586 fail:
3587    radv_thread_trace_finish(device);
3588
3589    radv_spm_finish(device);
3590
3591    radv_trap_handler_finish(device);
3592    radv_finish_trace(device);
3593
3594    radv_device_finish_perf_counter_lock_cs(device);
3595    if (device->perf_counter_bo)
3596       device->ws->buffer_destroy(device->ws, device->perf_counter_bo);
3597    if (device->gfx_init)
3598       device->ws->buffer_destroy(device->ws, device->gfx_init);
3599
3600    radv_device_finish_notifier(device);
3601    radv_device_finish_vs_prologs(device);
3602    radv_device_finish_border_color(device);
3603
3604    for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3605       for (unsigned q = 0; q < device->queue_count[i]; q++)
3606          radv_queue_finish(&device->queues[i][q]);
3607       if (device->queue_count[i])
3608          vk_free(&device->vk.alloc, device->queues[i]);
3609    }
3610
3611    for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
3612       if (device->hw_ctx[i])
3613          device->ws->ctx_destroy(device->hw_ctx[i]);
3614    }
3615
3616    simple_mtx_destroy(&device->pstate_mtx);
3617    simple_mtx_destroy(&device->trace_mtx);
3618    mtx_destroy(&device->overallocation_mutex);
3619
3620    vk_device_finish(&device->vk);
3621    vk_free(&device->vk.alloc, device);
3622    return result;
3623 }
3624
3625 VKAPI_ATTR void VKAPI_CALL
3626 radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
3627 {
3628    RADV_FROM_HANDLE(radv_device, device, _device);
3629
3630    if (!device)
3631       return;
3632
3633    radv_device_finish_perf_counter_lock_cs(device);
3634    if (device->perf_counter_bo)
3635       device->ws->buffer_destroy(device->ws, device->perf_counter_bo);
3636
3637    if (device->gfx_init)
3638       device->ws->buffer_destroy(device->ws, device->gfx_init);
3639
3640    radv_device_finish_notifier(device);
3641    radv_device_finish_vs_prologs(device);
3642    radv_device_finish_border_color(device);
3643    radv_device_finish_vrs_image(device);
3644
3645    for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3646       for (unsigned q = 0; q < device->queue_count[i]; q++)
3647          radv_queue_finish(&device->queues[i][q]);
3648       if (device->queue_count[i])
3649          vk_free(&device->vk.alloc, device->queues[i]);
3650    }
3651    if (device->private_sdma_queue != VK_NULL_HANDLE) {
3652       radv_queue_finish(device->private_sdma_queue);
3653       vk_free(&device->vk.alloc, device->private_sdma_queue);
3654    }
3655
3656    for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
3657       if (device->hw_ctx[i])
3658          device->ws->ctx_destroy(device->hw_ctx[i]);
3659    }
3660
3661    mtx_destroy(&device->overallocation_mutex);
3662    simple_mtx_destroy(&device->pstate_mtx);
3663    simple_mtx_destroy(&device->trace_mtx);
3664
3665    radv_device_finish_meta(device);
3666
3667    VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
3668    radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
3669
3670    radv_trap_handler_finish(device);
3671    radv_finish_trace(device);
3672
3673    radv_destroy_shader_arenas(device);
3674
3675    radv_thread_trace_finish(device);
3676
3677    radv_spm_finish(device);
3678
3679    vk_device_finish(&device->vk);
3680    vk_free(&device->vk.alloc, device);
3681 }
3682
3683 VKAPI_ATTR VkResult VKAPI_CALL
3684 radv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, VkLayerProperties *pProperties)
3685 {
3686    if (pProperties == NULL) {
3687       *pPropertyCount = 0;
3688       return VK_SUCCESS;
3689    }
3690
3691    /* None supported at this time */
3692    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3693 }
3694
3695 VKAPI_ATTR VkResult VKAPI_CALL
3696 radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount,
3697                                     VkLayerProperties *pProperties)
3698 {
3699    if (pProperties == NULL) {
3700       *pPropertyCount = 0;
3701       return VK_SUCCESS;
3702    }
3703
3704    /* None supported at this time */
3705    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3706 }
3707
3708 static void
3709 radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sample_positions,
3710                        uint32_t esgs_ring_size, struct radeon_winsys_bo *esgs_ring_bo,
3711                        uint32_t gsvs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo,
3712                        struct radeon_winsys_bo *tess_rings_bo,
3713                        struct radeon_winsys_bo *task_rings_bo,
3714                        struct radeon_winsys_bo *mesh_scratch_ring_bo)
3715 {
3716    uint32_t *desc = &map[4];
3717
3718    if (esgs_ring_bo) {
3719       uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
3720
3721       /* stride 0, num records - size, add tid, swizzle, elsize4,
3722          index stride 64 */
3723       desc[0] = esgs_va;
3724       desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
3725       desc[2] = esgs_ring_size;
3726       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3727                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3728                 S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1);
3729
3730       if (device->physical_device->rad_info.gfx_level >= GFX11)
3731          desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
3732       else
3733          desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
3734
3735       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3736          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
3737                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
3738       } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
3739          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3740                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3741       } else {
3742          desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3743                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
3744       }
3745
3746       /* GS entry for ES->GS ring */
3747       /* stride 0, num records - size, elsize0,
3748          index stride 0 */
3749       desc[4] = esgs_va;
3750       desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
3751       desc[6] = esgs_ring_size;
3752       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3753                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3754
3755       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3756          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
3757                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
3758       } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
3759          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3760                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3761       } else {
3762          desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3763                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3764       }
3765    }
3766
3767    desc += 8;
3768
3769    if (gsvs_ring_bo) {
3770       uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
3771
3772       /* VS entry for GS->VS ring */
3773       /* stride 0, num records - size, elsize0,
3774          index stride 0 */
3775       desc[0] = gsvs_va;
3776       desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
3777       desc[2] = gsvs_ring_size;
3778       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3779                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3780
3781       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3782          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
3783                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
3784       } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
3785          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3786                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3787       } else {
3788          desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3789                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3790       }
3791
3792       /* stride gsvs_itemsize, num records 64
3793          elsize 4, index stride 16 */
3794       /* shader will patch stride and desc[2] */
3795       desc[4] = gsvs_va;
3796       desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
3797       desc[6] = 0;
3798       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3799                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3800                 S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true);
3801
3802       if (device->physical_device->rad_info.gfx_level >= GFX11)
3803          desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
3804       else
3805          desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
3806
3807       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3808          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
3809                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
3810       } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
3811          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3812                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3813       } else {
3814          desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3815                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
3816       }
3817    }
3818
3819    desc += 8;
3820
3821    if (tess_rings_bo) {
3822       uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
3823       uint64_t tess_offchip_va = tess_va + device->physical_device->hs.tess_offchip_ring_offset;
3824
3825       desc[0] = tess_va;
3826       desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
3827       desc[2] = device->physical_device->hs.tess_factor_ring_size;
3828       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3829                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3830
3831       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3832          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
3833                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
3834       } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
3835          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3836                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
3837       } else {
3838          desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3839                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3840       }
3841
3842       desc[4] = tess_offchip_va;
3843       desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
3844       desc[6] = device->physical_device->hs.tess_offchip_ring_size;
3845       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3846                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3847
3848       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3849          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
3850                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
3851       } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
3852          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3853                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
3854       } else {
3855          desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3856                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3857       }
3858    }
3859
3860    desc += 8;
3861
3862    if (task_rings_bo) {
3863       uint64_t task_va = radv_buffer_get_va(task_rings_bo);
3864       uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset;
3865       uint64_t task_payload_ring_va = task_va + device->physical_device->task_info.payload_ring_offset;
3866
3867       desc[0] = task_draw_ring_va;
3868       desc[1] = S_008F04_BASE_ADDRESS_HI(task_draw_ring_va >> 32);
3869       desc[2] = device->physical_device->task_info.num_entries * AC_TASK_DRAW_ENTRY_BYTES;
3870       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3871                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3872
3873       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3874          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) |
3875                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
3876       } else {
3877          assert(device->physical_device->rad_info.gfx_level >= GFX10_3);
3878          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) |
3879                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3880       }
3881
3882       desc[4] = task_payload_ring_va;
3883       desc[5] = S_008F04_BASE_ADDRESS_HI(task_payload_ring_va >> 32);
3884       desc[6] = device->physical_device->task_info.num_entries * AC_TASK_PAYLOAD_ENTRY_BYTES;
3885       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3886                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3887
3888       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3889          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) |
3890                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
3891       } else {
3892          assert(device->physical_device->rad_info.gfx_level >= GFX10_3);
3893          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) |
3894                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3895       }
3896    }
3897
3898    desc += 8;
3899
3900    if (mesh_scratch_ring_bo) {
3901       uint64_t va = radv_buffer_get_va(mesh_scratch_ring_bo);
3902
3903       desc[0] = va;
3904       desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
3905       desc[2] = RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES;
3906       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3907                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3908
3909       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3910          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) |
3911                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
3912       } else {
3913          assert(device->physical_device->rad_info.gfx_level >= GFX10_3);
3914          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) |
3915                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3916       }
3917    }
3918
3919    desc += 4;
3920
3921    if (add_sample_positions) {
3922       /* add sample positions after all rings */
3923       memcpy(desc, device->sample_locations_1x, 8);
3924       desc += 2;
3925       memcpy(desc, device->sample_locations_2x, 16);
3926       desc += 4;
3927       memcpy(desc, device->sample_locations_4x, 32);
3928       desc += 8;
3929       memcpy(desc, device->sample_locations_8x, 64);
3930    }
3931 }
3932
3933 static void
3934 radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs,
3935                         struct radeon_winsys_bo *esgs_ring_bo, uint32_t esgs_ring_size,
3936                         struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size)
3937 {
3938    if (!esgs_ring_bo && !gsvs_ring_bo)
3939       return;
3940
3941    if (esgs_ring_bo)
3942       radv_cs_add_buffer(device->ws, cs, esgs_ring_bo);
3943
3944    if (gsvs_ring_bo)
3945       radv_cs_add_buffer(device->ws, cs, gsvs_ring_bo);
3946
3947    if (device->physical_device->rad_info.gfx_level >= GFX7) {
3948       radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
3949       radeon_emit(cs, esgs_ring_size >> 8);
3950       radeon_emit(cs, gsvs_ring_size >> 8);
3951    } else {
3952       radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
3953       radeon_emit(cs, esgs_ring_size >> 8);
3954       radeon_emit(cs, gsvs_ring_size >> 8);
3955    }
3956 }
3957
3958 static void
3959 radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs,
3960                            struct radeon_winsys_bo *tess_rings_bo)
3961 {
3962    uint64_t tf_va;
3963    uint32_t tf_ring_size;
3964    if (!tess_rings_bo)
3965       return;
3966
3967    tf_ring_size = device->physical_device->hs.tess_factor_ring_size / 4;
3968    tf_va = radv_buffer_get_va(tess_rings_bo);
3969
3970    radv_cs_add_buffer(device->ws, cs, tess_rings_bo);
3971
3972    if (device->physical_device->rad_info.gfx_level >= GFX7) {
3973       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3974          /* TF_RING_SIZE is per SE on GFX11. */
3975          tf_ring_size /= device->physical_device->rad_info.max_se;
3976       }
3977
3978       radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size));
3979       radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, tf_va >> 8);
3980
3981       if (device->physical_device->rad_info.gfx_level >= GFX10) {
3982          radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI,
3983                                 S_030984_BASE_HI(tf_va >> 40));
3984       } else if (device->physical_device->rad_info.gfx_level == GFX9) {
3985          radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));
3986       }
3987
3988       radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param);
3989    } else {
3990       radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size));
3991       radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);
3992       radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param);
3993    }
3994 }
3995
3996 static VkResult
3997 radv_initialise_task_control_buffer(struct radv_device *device,
3998                                     struct radeon_winsys_bo *task_rings_bo)
3999 {
4000    uint32_t *ptr = (uint32_t *)device->ws->buffer_map(task_rings_bo);
4001    if (!ptr)
4002       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
4003
4004    const uint32_t num_entries = device->physical_device->task_info.num_entries;
4005    const uint64_t task_va = radv_buffer_get_va(task_rings_bo);
4006    const uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset;
4007    assert((task_draw_ring_va & 0xFFFFFF00) == (task_draw_ring_va & 0xFFFFFFFF));
4008
4009    /* 64-bit write_ptr */
4010    ptr[0] = num_entries;
4011    ptr[1] = 0;
4012    /* 64-bit read_ptr */
4013    ptr[2] = num_entries;
4014    ptr[3] = 0;
4015    /* 64-bit dealloc_ptr */
4016    ptr[4] = num_entries;
4017    ptr[5] = 0;
4018    /* num_entries */
4019    ptr[6] = num_entries;
4020    /* 64-bit draw ring address */
4021    ptr[7] = task_draw_ring_va;
4022    ptr[8] = task_draw_ring_va >> 32;
4023
4024    device->ws->buffer_unmap(task_rings_bo);
4025    return VK_SUCCESS;
4026 }
4027
4028 static void
4029 radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs,
4030                      struct radeon_winsys_bo *task_rings_bo, bool compute)
4031 {
4032    if (!task_rings_bo)
4033       return;
4034
4035    const uint64_t task_ctrlbuf_va = radv_buffer_get_va(task_rings_bo);
4036    assert(radv_is_aligned(task_ctrlbuf_va, 256));
4037    radv_cs_add_buffer(device->ws, cs, task_rings_bo);
4038
4039    /* Tell the GPU where the task control buffer is. */
4040    radeon_emit(cs, PKT3(PKT3_DISPATCH_TASK_STATE_INIT, 1, 0) | PKT3_SHADER_TYPE_S(!!compute));
4041    /* bits [31:8]: control buffer address lo, bits[7:0]: reserved (set to zero) */
4042    radeon_emit(cs, task_ctrlbuf_va & 0xFFFFFF00);
4043    /* bits [31:0]: control buffer address hi */
4044    radeon_emit(cs, task_ctrlbuf_va >> 32);
4045 }
4046
4047 static void
4048 radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs,
4049                            uint32_t size_per_wave, uint32_t waves,
4050                            struct radeon_winsys_bo *scratch_bo)
4051 {
4052    struct radeon_info *info = &device->physical_device->rad_info;
4053
4054    if (!scratch_bo)
4055       return;
4056
4057    radv_cs_add_buffer(device->ws, cs, scratch_bo);
4058
4059    if (info->gfx_level >= GFX11) {
4060       uint64_t va = radv_buffer_get_va(scratch_bo);
4061
4062       /* WAVES is per SE for SPI_TMPRING_SIZE. */
4063       waves /= info->num_se;
4064
4065       radeon_set_context_reg_seq(cs, R_0286E8_SPI_TMPRING_SIZE, 3);
4066       radeon_emit(cs, S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 256)));
4067       radeon_emit(cs, va >> 8);  /* SPI_GFX_SCRATCH_BASE_LO */
4068       radeon_emit(cs, va >> 40); /* SPI_GFX_SCRATCH_BASE_HI */
4069    } else {
4070       radeon_set_context_reg(
4071          cs, R_0286E8_SPI_TMPRING_SIZE,
4072          S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
4073    }
4074 }
4075
4076 static void
4077 radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs,
4078                           uint32_t size_per_wave, uint32_t waves,
4079                           struct radeon_winsys_bo *compute_scratch_bo)
4080 {
4081    struct radeon_info *info = &device->physical_device->rad_info;
4082    uint64_t scratch_va;
4083    uint32_t rsrc1;
4084
4085    if (!compute_scratch_bo)
4086       return;
4087
4088    scratch_va = radv_buffer_get_va(compute_scratch_bo);
4089    rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
4090
4091    if (device->physical_device->rad_info.gfx_level >= GFX11)
4092       rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
4093    else
4094       rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
4095
4096    radv_cs_add_buffer(device->ws, cs, compute_scratch_bo);
4097
4098    if (info->gfx_level >= GFX11) {
4099       radeon_set_sh_reg_seq(cs, R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO, 4);
4100       radeon_emit(cs, scratch_va >> 8);
4101       radeon_emit(cs, scratch_va >> 40);
4102    } else {
4103       radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
4104    }
4105
4106    radeon_emit(cs, scratch_va);
4107    radeon_emit(cs, rsrc1);
4108
4109    radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
4110                      S_00B860_WAVES(waves) |
4111                      S_00B860_WAVESIZE(round_up_u32(size_per_wave, info->gfx_level >= GFX11 ? 256 : 1024)));
4112 }
4113
4114 static void
4115 radv_emit_compute_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs,
4116                                   struct radeon_winsys_bo *descriptor_bo)
4117 {
4118    if (!descriptor_bo)
4119       return;
4120
4121    uint64_t va = radv_buffer_get_va(descriptor_bo);
4122    radv_cs_add_buffer(device->ws, cs, descriptor_bo);
4123
4124    /* Compute shader user data 0-1 have the scratch pointer (unlike GFX shaders),
4125     * so emit the descriptor pointer to user data 2-3 instead (task_ring_offsets arg).
4126     */
4127    radv_emit_shader_pointer(device, cs, R_00B908_COMPUTE_USER_DATA_2, va, true);
4128 }
4129
4130 static void
4131 radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs,
4132                                    struct radeon_winsys_bo *descriptor_bo)
4133 {
4134    uint64_t va;
4135
4136    if (!descriptor_bo)
4137       return;
4138
4139    va = radv_buffer_get_va(descriptor_bo);
4140
4141    radv_cs_add_buffer(device->ws, cs, descriptor_bo);
4142
4143    if (device->physical_device->rad_info.gfx_level >= GFX11) {
4144       uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
4145                          R_00B420_SPI_SHADER_PGM_LO_HS,
4146                          R_00B220_SPI_SHADER_PGM_LO_GS};
4147
4148       for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
4149          radv_emit_shader_pointer(device, cs, regs[i], va, true);
4150       }
4151    } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
4152       uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
4153                          R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
4154                          R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
4155
4156       for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
4157          radv_emit_shader_pointer(device, cs, regs[i], va, true);
4158       }
4159    } else if (device->physical_device->rad_info.gfx_level == GFX9) {
4160       uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
4161                          R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
4162                          R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
4163
4164       for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
4165          radv_emit_shader_pointer(device, cs, regs[i], va, true);
4166       }
4167    } else {
4168       uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
4169                          R_00B230_SPI_SHADER_USER_DATA_GS_0, R_00B330_SPI_SHADER_USER_DATA_ES_0,
4170                          R_00B430_SPI_SHADER_USER_DATA_HS_0, R_00B530_SPI_SHADER_USER_DATA_LS_0};
4171
4172       for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
4173          radv_emit_shader_pointer(device, cs, regs[i], va, true);
4174       }
4175    }
4176 }
4177
4178 static void
4179 radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_device *device)
4180 {
4181    if (device->gfx_init) {
4182       uint64_t va = radv_buffer_get_va(device->gfx_init);
4183
4184       radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
4185       radeon_emit(cs, va);
4186       radeon_emit(cs, va >> 32);
4187       radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
4188
4189       radv_cs_add_buffer(device->ws, cs, device->gfx_init);
4190    } else {
4191       si_emit_graphics(device, cs);
4192    }
4193 }
4194
4195 static void
4196 radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_device *device)
4197 {
4198    si_emit_compute(device, cs);
4199 }
4200
4201 static VkResult
4202 radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *device,
4203                         const struct radv_queue_ring_info *needs)
4204 {
4205    struct radeon_winsys *ws = device->ws;
4206    struct radeon_winsys_bo *scratch_bo = queue->scratch_bo;
4207    struct radeon_winsys_bo *descriptor_bo = queue->descriptor_bo;
4208    struct radeon_winsys_bo *compute_scratch_bo = queue->compute_scratch_bo;
4209    struct radeon_winsys_bo *esgs_ring_bo = queue->esgs_ring_bo;
4210    struct radeon_winsys_bo *gsvs_ring_bo = queue->gsvs_ring_bo;
4211    struct radeon_winsys_bo *tess_rings_bo = queue->tess_rings_bo;
4212    struct radeon_winsys_bo *task_rings_bo = queue->task_rings_bo;
4213    struct radeon_winsys_bo *mesh_scratch_ring_bo = queue->mesh_scratch_ring_bo;
4214    struct radeon_winsys_bo *gds_bo = queue->gds_bo;
4215    struct radeon_winsys_bo *gds_oa_bo = queue->gds_oa_bo;
4216    struct radeon_cmdbuf *dest_cs[3] = {0};
4217    const uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
4218    VkResult result = VK_SUCCESS;
4219
4220    const bool add_sample_positions = !queue->ring_info.sample_positions && needs->sample_positions;
4221    const uint32_t scratch_size = needs->scratch_size_per_wave * needs->scratch_waves;
4222    const uint32_t queue_scratch_size =
4223       queue->ring_info.scratch_size_per_wave * queue->ring_info.scratch_waves;
4224
4225    if (scratch_size > queue_scratch_size) {
4226       result = ws->buffer_create(ws, scratch_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags,
4227                                  RADV_BO_PRIORITY_SCRATCH, 0, &scratch_bo);
4228       if (result != VK_SUCCESS)
4229          goto fail;
4230    }
4231
4232    const uint32_t compute_scratch_size =
4233       needs->compute_scratch_size_per_wave * needs->compute_scratch_waves;
4234    const uint32_t compute_queue_scratch_size =
4235       queue->ring_info.compute_scratch_size_per_wave * queue->ring_info.compute_scratch_waves;
4236    if (compute_scratch_size > compute_queue_scratch_size) {
4237       result = ws->buffer_create(ws, compute_scratch_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags,
4238                                  RADV_BO_PRIORITY_SCRATCH, 0, &compute_scratch_bo);
4239       if (result != VK_SUCCESS)
4240          goto fail;
4241    }
4242
4243    if (needs->esgs_ring_size > queue->ring_info.esgs_ring_size) {
4244       result = ws->buffer_create(ws, needs->esgs_ring_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags,
4245                                  RADV_BO_PRIORITY_SCRATCH, 0, &esgs_ring_bo);
4246       if (result != VK_SUCCESS)
4247          goto fail;
4248    }
4249
4250    if (needs->gsvs_ring_size > queue->ring_info.gsvs_ring_size) {
4251       result = ws->buffer_create(ws, needs->gsvs_ring_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags,
4252                                  RADV_BO_PRIORITY_SCRATCH, 0, &gsvs_ring_bo);
4253       if (result != VK_SUCCESS)
4254          goto fail;
4255    }
4256
4257    if (!queue->ring_info.tess_rings && needs->tess_rings) {
4258       result = ws->buffer_create(
4259          ws, device->physical_device->hs.tess_offchip_ring_offset + device->physical_device->hs.tess_offchip_ring_size, 256,
4260          RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &tess_rings_bo);
4261       if (result != VK_SUCCESS)
4262          goto fail;
4263    }
4264
4265    if (!queue->ring_info.task_rings && needs->task_rings) {
4266       assert(device->physical_device->rad_info.gfx_level >= GFX10_3);
4267
4268       /* We write the control buffer from the CPU, so need to grant CPU access to the BO.
4269        * The draw ring needs to be zero-initialized otherwise the ready bits will be incorrect.
4270        */
4271       uint32_t task_rings_bo_flags =
4272          RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM;
4273
4274       result = ws->buffer_create(ws, device->physical_device->task_info.bo_size_bytes, 256,
4275                                  RADEON_DOMAIN_VRAM, task_rings_bo_flags, RADV_BO_PRIORITY_SCRATCH,
4276                                  0, &task_rings_bo);
4277       if (result != VK_SUCCESS)
4278          goto fail;
4279
4280       result = radv_initialise_task_control_buffer(device, task_rings_bo);
4281       if (result != VK_SUCCESS)
4282          goto fail;
4283    }
4284
4285    if (!queue->ring_info.mesh_scratch_ring && needs->mesh_scratch_ring) {
4286       assert(device->physical_device->rad_info.gfx_level >= GFX10_3);
4287       result =
4288          ws->buffer_create(ws, RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES, 256,
4289                            RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &mesh_scratch_ring_bo);
4290
4291       if (result != VK_SUCCESS)
4292          goto fail;
4293    }
4294
4295    if (!queue->ring_info.gds && needs->gds) {
4296       assert(device->physical_device->rad_info.gfx_level >= GFX10);
4297
4298       /* 4 streamout GDS counters.
4299        * We need 256B (64 dw) of GDS, otherwise streamout hangs.
4300        */
4301       result = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, ring_bo_flags,
4302                                  RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo);
4303       if (result != VK_SUCCESS)
4304          goto fail;
4305    }
4306
4307    if (!queue->ring_info.gds_oa && needs->gds_oa) {
4308       assert(device->physical_device->rad_info.gfx_level >= GFX10);
4309
4310       result = ws->buffer_create(ws, 4, 1, RADEON_DOMAIN_OA, ring_bo_flags,
4311                                  RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo);
4312       if (result != VK_SUCCESS)
4313          goto fail;
4314    }
4315
4316    /* Re-initialize the descriptor BO when any ring BOs changed.
4317     *
4318     * Additionally, make sure to create the descriptor BO for the compute queue
4319     * when it uses the task shader rings. The task rings BO is shared between the
4320     * GFX and compute queues and already initialized here.
4321     */
4322    if ((queue->qf == RADV_QUEUE_COMPUTE && !descriptor_bo && task_rings_bo) ||
4323        scratch_bo != queue->scratch_bo || esgs_ring_bo != queue->esgs_ring_bo ||
4324        gsvs_ring_bo != queue->gsvs_ring_bo || tess_rings_bo != queue->tess_rings_bo ||
4325        task_rings_bo != queue->task_rings_bo || mesh_scratch_ring_bo != queue->mesh_scratch_ring_bo ||
4326        add_sample_positions) {
4327       uint32_t size = 0;
4328       if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo || add_sample_positions) {
4329          size = 160; /* 2 dword + 2 padding + 4 dword * 9 */
4330          if (add_sample_positions)
4331             size += 128; /* 64+32+16+8 = 120 bytes */
4332       } else if (scratch_bo) {
4333          size = 8; /* 2 dword */
4334       }
4335
4336       result = ws->buffer_create(
4337          ws, size, 4096, RADEON_DOMAIN_VRAM,
4338          RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
4339          RADV_BO_PRIORITY_DESCRIPTOR, 0, &descriptor_bo);
4340       if (result != VK_SUCCESS)
4341          goto fail;
4342    }
4343
4344    if (descriptor_bo != queue->descriptor_bo) {
4345       uint32_t *map = (uint32_t *)ws->buffer_map(descriptor_bo);
4346       if (!map)
4347          goto fail;
4348
4349       if (scratch_bo) {
4350          uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
4351          uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
4352
4353          if (device->physical_device->rad_info.gfx_level >= GFX11)
4354             rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
4355          else
4356             rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
4357
4358          map[0] = scratch_va;
4359          map[1] = rsrc1;
4360       }
4361
4362       if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo || add_sample_positions)
4363          radv_fill_shader_rings(device, map, add_sample_positions, needs->esgs_ring_size,
4364                                 esgs_ring_bo, needs->gsvs_ring_size, gsvs_ring_bo, tess_rings_bo,
4365                                 task_rings_bo, mesh_scratch_ring_bo);
4366
4367       ws->buffer_unmap(descriptor_bo);
4368    }
4369
4370    for (int i = 0; i < 3; ++i) {
4371       /* Don't create continue preamble when it's not necessary. */
4372       if (i == 2) {
4373          /* We only need the continue preamble when we can't use indirect buffers. */
4374          if (!(device->instance->debug_flags & RADV_DEBUG_NO_IBS) &&
4375              device->physical_device->rad_info.gfx_level >= GFX7)
4376             continue;
4377          /* Continue preamble is unnecessary when no shader rings are used. */
4378          if (!needs->scratch_size_per_wave && !needs->compute_scratch_size_per_wave &&
4379              !needs->esgs_ring_size && !needs->gsvs_ring_size && !needs->tess_rings &&
4380              !needs->task_rings && !needs->mesh_scratch_ring && !needs->gds && !needs->gds_oa && !needs->sample_positions)
4381             continue;
4382       }
4383
4384       enum rgp_flush_bits sqtt_flush_bits = 0;
4385       struct radeon_cmdbuf *cs = NULL;
4386       cs = ws->cs_create(ws, radv_queue_family_to_ring(device->physical_device, queue->qf));
4387       if (!cs) {
4388          result = VK_ERROR_OUT_OF_HOST_MEMORY;
4389          goto fail;
4390       }
4391
4392       dest_cs[i] = cs;
4393
4394       if (scratch_bo)
4395          radv_cs_add_buffer(ws, cs, scratch_bo);
4396
4397       /* Emit initial configuration. */
4398       switch (queue->qf) {
4399       case RADV_QUEUE_GENERAL:
4400          radv_init_graphics_state(cs, device);
4401
4402          if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || task_rings_bo) {
4403             radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
4404             radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
4405
4406             radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
4407             radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
4408          }
4409
4410          radv_emit_gs_ring_sizes(device, cs, esgs_ring_bo, needs->esgs_ring_size, gsvs_ring_bo,
4411                                  needs->gsvs_ring_size);
4412          radv_emit_tess_factor_ring(device, cs, tess_rings_bo);
4413          radv_emit_task_rings(device, cs, task_rings_bo, false);
4414          radv_emit_graphics_shader_pointers(device, cs, descriptor_bo);
4415          radv_emit_compute_scratch(device, cs, needs->compute_scratch_size_per_wave,
4416                                    needs->compute_scratch_waves, compute_scratch_bo);
4417          radv_emit_graphics_scratch(device, cs, needs->scratch_size_per_wave, needs->scratch_waves,
4418                                     scratch_bo);
4419          break;
4420       case RADV_QUEUE_COMPUTE:
4421          radv_init_compute_state(cs, device);
4422
4423          if (task_rings_bo) {
4424             radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
4425             radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
4426          }
4427
4428          radv_emit_task_rings(device, cs, task_rings_bo, true);
4429          radv_emit_compute_shader_pointers(device, cs, descriptor_bo);
4430          radv_emit_compute_scratch(device, cs, needs->compute_scratch_size_per_wave,
4431                                    needs->compute_scratch_waves, compute_scratch_bo);
4432          break;
4433       default:
4434          break;
4435       }
4436
4437       if (gds_bo)
4438          radv_cs_add_buffer(ws, cs, gds_bo);
4439       if (gds_oa_bo)
4440          radv_cs_add_buffer(ws, cs, gds_oa_bo);
4441
4442       if (i < 2) {
4443          /* The two initial preambles have a cache flush at the beginning. */
4444          const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
4445          const bool is_mec = queue->qf == RADV_QUEUE_COMPUTE && gfx_level >= GFX7;
4446          enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE |
4447                                                RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 |
4448                                                RADV_CMD_FLAG_START_PIPELINE_STATS;
4449
4450          if (i == 0) {
4451             /* The full flush preamble should also wait for previous shader work to finish. */
4452             flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
4453             if (queue->qf == RADV_QUEUE_GENERAL)
4454                flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
4455          }
4456
4457          si_cs_emit_cache_flush(cs, gfx_level, NULL, 0, is_mec, flush_bits, &sqtt_flush_bits, 0);
4458       }
4459
4460       result = ws->cs_finalize(cs);
4461       if (result != VK_SUCCESS)
4462          goto fail;
4463    }
4464
4465    if (queue->initial_full_flush_preamble_cs)
4466       ws->cs_destroy(queue->initial_full_flush_preamble_cs);
4467
4468    if (queue->initial_preamble_cs)
4469       ws->cs_destroy(queue->initial_preamble_cs);
4470
4471    if (queue->continue_preamble_cs)
4472       ws->cs_destroy(queue->continue_preamble_cs);
4473
4474    queue->initial_full_flush_preamble_cs = dest_cs[0];
4475    queue->initial_preamble_cs = dest_cs[1];
4476    queue->continue_preamble_cs = dest_cs[2];
4477
4478    if (scratch_bo != queue->scratch_bo) {
4479       if (queue->scratch_bo)
4480          ws->buffer_destroy(ws, queue->scratch_bo);
4481       queue->scratch_bo = scratch_bo;
4482    }
4483
4484    if (compute_scratch_bo != queue->compute_scratch_bo) {
4485       if (queue->compute_scratch_bo)
4486          ws->buffer_destroy(ws, queue->compute_scratch_bo);
4487       queue->compute_scratch_bo = compute_scratch_bo;
4488    }
4489
4490    if (esgs_ring_bo != queue->esgs_ring_bo) {
4491       if (queue->esgs_ring_bo)
4492          ws->buffer_destroy(ws, queue->esgs_ring_bo);
4493       queue->esgs_ring_bo = esgs_ring_bo;
4494    }
4495
4496    if (gsvs_ring_bo != queue->gsvs_ring_bo) {
4497       if (queue->gsvs_ring_bo)
4498          ws->buffer_destroy(ws, queue->gsvs_ring_bo);
4499       queue->gsvs_ring_bo = gsvs_ring_bo;
4500    }
4501
4502    if (descriptor_bo != queue->descriptor_bo) {
4503       if (queue->descriptor_bo)
4504          ws->buffer_destroy(ws, queue->descriptor_bo);
4505       queue->descriptor_bo = descriptor_bo;
4506    }
4507
4508    queue->tess_rings_bo = tess_rings_bo;
4509    queue->task_rings_bo = task_rings_bo;
4510    queue->mesh_scratch_ring_bo = mesh_scratch_ring_bo;
4511    queue->gds_bo = gds_bo;
4512    queue->gds_oa_bo = gds_oa_bo;
4513    queue->ring_info = *needs;
4514    return VK_SUCCESS;
4515 fail:
4516    for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
4517       if (dest_cs[i])
4518          ws->cs_destroy(dest_cs[i]);
4519    if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
4520       ws->buffer_destroy(ws, descriptor_bo);
4521    if (scratch_bo && scratch_bo != queue->scratch_bo)
4522       ws->buffer_destroy(ws, scratch_bo);
4523    if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
4524       ws->buffer_destroy(ws, compute_scratch_bo);
4525    if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
4526       ws->buffer_destroy(ws, esgs_ring_bo);
4527    if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
4528       ws->buffer_destroy(ws, gsvs_ring_bo);
4529    if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
4530       ws->buffer_destroy(ws, tess_rings_bo);
4531    if (task_rings_bo && task_rings_bo != queue->task_rings_bo)
4532       ws->buffer_destroy(ws, task_rings_bo);
4533    if (gds_bo && gds_bo != queue->gds_bo)
4534       ws->buffer_destroy(ws, gds_bo);
4535    if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
4536       ws->buffer_destroy(ws, gds_oa_bo);
4537
4538    return vk_error(queue, result);
4539 }
4540
4541 static struct radeon_cmdbuf *
4542 radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool unlock)
4543 {
4544    struct radeon_cmdbuf **cs_ref = &device->perf_counter_lock_cs[pass * 2 + (unlock ? 1 : 0)];
4545    struct radeon_cmdbuf *cs;
4546
4547    if (*cs_ref)
4548       return *cs_ref;
4549
4550    cs = device->ws->cs_create(device->ws, AMD_IP_GFX);
4551    if (!cs)
4552       return NULL;
4553
4554    ASSERTED unsigned cdw = radeon_check_space(device->ws, cs, 21);
4555
4556    if (!unlock) {
4557       uint64_t mutex_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_LOCK_OFFSET;
4558       radeon_emit(cs, PKT3(PKT3_ATOMIC_MEM, 7, 0));
4559       radeon_emit(cs, ATOMIC_OP(TC_OP_ATOMIC_CMPSWAP_32) | ATOMIC_COMMAND(ATOMIC_COMMAND_LOOP));
4560       radeon_emit(cs, mutex_va);       /* addr lo */
4561       radeon_emit(cs, mutex_va >> 32); /* addr hi */
4562       radeon_emit(cs, 1);              /* data lo */
4563       radeon_emit(cs, 0);              /* data hi */
4564       radeon_emit(cs, 0);              /* compare data lo */
4565       radeon_emit(cs, 0);              /* compare data hi */
4566       radeon_emit(cs, 10);             /* loop interval */
4567    }
4568
4569    uint64_t va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET;
4570    uint64_t unset_va = va + (unlock ? 8 * pass : 0);
4571    uint64_t set_va = va + (unlock ? 0 : 8 * pass);
4572
4573    radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
4574    radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
4575                       COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM);
4576    radeon_emit(cs, 0); /* immediate */
4577    radeon_emit(cs, 0);
4578    radeon_emit(cs, unset_va);
4579    radeon_emit(cs, unset_va >> 32);
4580
4581    radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
4582    radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
4583                       COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM);
4584    radeon_emit(cs, 1); /* immediate */
4585    radeon_emit(cs, 0);
4586    radeon_emit(cs, set_va);
4587    radeon_emit(cs, set_va >> 32);
4588
4589    if (unlock) {
4590       uint64_t mutex_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_LOCK_OFFSET;
4591
4592       radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
4593       radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
4594                          COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM);
4595       radeon_emit(cs, 0); /* immediate */
4596       radeon_emit(cs, 0);
4597       radeon_emit(cs, mutex_va);
4598       radeon_emit(cs, mutex_va >> 32);
4599    }
4600
4601    assert(cs->cdw <= cdw);
4602
4603    VkResult result = device->ws->cs_finalize(cs);
4604    if (result != VK_SUCCESS) {
4605       device->ws->cs_destroy(cs);
4606       return NULL;
4607    }
4608
4609    /* All the casts are to avoid MSVC errors around pointer truncation in a non-taken
4610     * alternative.
4611     */
4612    if (p_atomic_cmpxchg((uintptr_t*)cs_ref, 0, (uintptr_t)cs) != 0) {
4613       device->ws->cs_destroy(cs);
4614    }
4615
4616    return *cs_ref;
4617 }
4618
4619 static VkResult
4620 radv_sparse_buffer_bind_memory(struct radv_device *device, const VkSparseBufferMemoryBindInfo *bind)
4621 {
4622    RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
4623    VkResult result;
4624
4625    for (uint32_t i = 0; i < bind->bindCount; ++i) {
4626       struct radv_device_memory *mem = NULL;
4627
4628       if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4629          mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4630
4631       result = device->ws->buffer_virtual_bind(device->ws, buffer->bo,
4632                                                bind->pBinds[i].resourceOffset, bind->pBinds[i].size,
4633                                                mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
4634       if (result != VK_SUCCESS)
4635          return result;
4636    }
4637
4638    return VK_SUCCESS;
4639 }
4640
4641 static VkResult
4642 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
4643                                      const VkSparseImageOpaqueMemoryBindInfo *bind)
4644 {
4645    RADV_FROM_HANDLE(radv_image, image, bind->image);
4646    VkResult result;
4647
4648    for (uint32_t i = 0; i < bind->bindCount; ++i) {
4649       struct radv_device_memory *mem = NULL;
4650
4651       if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4652          mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4653
4654       result = device->ws->buffer_virtual_bind(device->ws, image->bo,
4655                                                bind->pBinds[i].resourceOffset, bind->pBinds[i].size,
4656                                                mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
4657       if (result != VK_SUCCESS)
4658          return result;
4659    }
4660
4661    return VK_SUCCESS;
4662 }
4663
4664 static VkResult
4665 radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMemoryBindInfo *bind)
4666 {
4667    RADV_FROM_HANDLE(radv_image, image, bind->image);
4668    struct radeon_surf *surface = &image->planes[0].surface;
4669    uint32_t bs = vk_format_get_blocksize(image->vk.format);
4670    VkResult result;
4671
4672    for (uint32_t i = 0; i < bind->bindCount; ++i) {
4673       struct radv_device_memory *mem = NULL;
4674       uint32_t offset, pitch;
4675       uint32_t mem_offset = bind->pBinds[i].memoryOffset;
4676       const uint32_t layer = bind->pBinds[i].subresource.arrayLayer;
4677       const uint32_t level = bind->pBinds[i].subresource.mipLevel;
4678
4679       VkExtent3D bind_extent = bind->pBinds[i].extent;
4680       bind_extent.width =
4681          DIV_ROUND_UP(bind_extent.width, vk_format_get_blockwidth(image->vk.format));
4682       bind_extent.height =
4683          DIV_ROUND_UP(bind_extent.height, vk_format_get_blockheight(image->vk.format));
4684
4685       VkOffset3D bind_offset = bind->pBinds[i].offset;
4686       bind_offset.x /= vk_format_get_blockwidth(image->vk.format);
4687       bind_offset.y /= vk_format_get_blockheight(image->vk.format);
4688
4689       if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4690          mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4691
4692       if (device->physical_device->rad_info.gfx_level >= GFX9) {
4693          offset = surface->u.gfx9.surf_slice_size * layer + surface->u.gfx9.prt_level_offset[level];
4694          pitch = surface->u.gfx9.prt_level_pitch[level];
4695       } else {
4696          offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
4697                   surface->u.legacy.level[level].slice_size_dw * 4 * layer;
4698          pitch = surface->u.legacy.level[level].nblk_x;
4699       }
4700
4701       offset += (bind_offset.y * pitch * bs) + (bind_offset.x * surface->prt_tile_height * bs);
4702
4703       uint32_t aligned_extent_width = ALIGN(bind_extent.width, surface->prt_tile_width);
4704
4705       bool whole_subres = bind_offset.x == 0 && aligned_extent_width == pitch;
4706
4707       if (whole_subres) {
4708          uint32_t aligned_extent_height = ALIGN(bind_extent.height, surface->prt_tile_height);
4709
4710          uint32_t size = aligned_extent_width * aligned_extent_height * bs;
4711          result = device->ws->buffer_virtual_bind(device->ws, image->bo, offset, size,
4712                                                   mem ? mem->bo : NULL, mem_offset);
4713          if (result != VK_SUCCESS)
4714             return result;
4715       } else {
4716          uint32_t img_increment = pitch * bs;
4717          uint32_t mem_increment = aligned_extent_width * bs;
4718          uint32_t size = mem_increment * surface->prt_tile_height;
4719          for (unsigned y = 0; y < bind_extent.height; y += surface->prt_tile_height) {
4720             result = device->ws->buffer_virtual_bind(
4721                device->ws, image->bo, offset + img_increment * y, size, mem ? mem->bo : NULL,
4722                mem_offset + mem_increment * y);
4723             if (result != VK_SUCCESS)
4724                return result;
4725          }
4726       }
4727    }
4728
4729    return VK_SUCCESS;
4730 }
4731
4732 static VkResult
4733 radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device,
4734                       struct vk_command_buffer *const *cmd_buffers, uint32_t cmd_buffer_count,
4735                       bool *uses_perf_counters)
4736 {
4737    if (queue->qf == RADV_QUEUE_TRANSFER)
4738       return VK_SUCCESS;
4739
4740    /* Figure out the needs of the current submission.
4741     * Start by copying the queue's current info.
4742     * This is done because we only allow two possible behaviours for these buffers:
4743     * - Grow when the newly needed amount is larger than what we had
4744     * - Allocate the max size and reuse it, but don't free it until the queue is destroyed
4745     */
4746    struct radv_queue_ring_info needs = queue->ring_info;
4747    *uses_perf_counters = false;
4748    for (uint32_t j = 0; j < cmd_buffer_count; j++) {
4749       struct radv_cmd_buffer *cmd_buffer = container_of(cmd_buffers[j], struct radv_cmd_buffer, vk);
4750
4751       needs.scratch_size_per_wave =
4752          MAX2(needs.scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
4753       needs.scratch_waves = MAX2(needs.scratch_waves, cmd_buffer->scratch_waves_wanted);
4754       needs.compute_scratch_size_per_wave = MAX2(needs.compute_scratch_size_per_wave,
4755                                                  cmd_buffer->compute_scratch_size_per_wave_needed);
4756       needs.compute_scratch_waves =
4757          MAX2(needs.compute_scratch_waves, cmd_buffer->compute_scratch_waves_wanted);
4758       needs.esgs_ring_size = MAX2(needs.esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
4759       needs.gsvs_ring_size = MAX2(needs.gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
4760       needs.tess_rings |= cmd_buffer->tess_rings_needed;
4761       needs.task_rings |= cmd_buffer->task_rings_needed;
4762       needs.mesh_scratch_ring |= cmd_buffer->mesh_scratch_ring_needed;
4763       needs.gds |= cmd_buffer->gds_needed;
4764       needs.gds_oa |= cmd_buffer->gds_oa_needed;
4765       needs.sample_positions |= cmd_buffer->sample_positions_needed;
4766       *uses_perf_counters |= cmd_buffer->state.uses_perf_counters;
4767    }
4768
4769    /* Sanitize scratch size information. */
4770    needs.scratch_waves = needs.scratch_size_per_wave
4771                             ? MIN2(needs.scratch_waves, UINT32_MAX / needs.scratch_size_per_wave)
4772                             : 0;
4773    needs.compute_scratch_waves =
4774       needs.compute_scratch_size_per_wave
4775          ? MIN2(needs.compute_scratch_waves, UINT32_MAX / needs.compute_scratch_size_per_wave)
4776          : 0;
4777
4778    /* Return early if we already match these needs.
4779     * Note that it's not possible for any of the needed values to be less
4780     * than what the queue already had, because we only ever increase the allocated size.
4781     */
4782    if (queue->initial_full_flush_preamble_cs &&
4783        queue->ring_info.scratch_size_per_wave == needs.scratch_size_per_wave &&
4784        queue->ring_info.scratch_waves == needs.scratch_waves &&
4785        queue->ring_info.compute_scratch_size_per_wave == needs.compute_scratch_size_per_wave &&
4786        queue->ring_info.compute_scratch_waves == needs.compute_scratch_waves &&
4787        queue->ring_info.esgs_ring_size == needs.esgs_ring_size &&
4788        queue->ring_info.gsvs_ring_size == needs.gsvs_ring_size &&
4789        queue->ring_info.tess_rings == needs.tess_rings &&
4790        queue->ring_info.task_rings == needs.task_rings &&
4791        queue->ring_info.mesh_scratch_ring == needs.mesh_scratch_ring &&
4792        queue->ring_info.gds == needs.gds &&
4793        queue->ring_info.gds_oa == needs.gds_oa &&
4794        queue->ring_info.sample_positions == needs.sample_positions)
4795       return VK_SUCCESS;
4796
4797    return radv_update_preamble_cs(queue, device, &needs);
4798 }
4799
4800 struct radv_deferred_queue_submission {
4801    struct radv_queue *queue;
4802    VkCommandBuffer *cmd_buffers;
4803    uint32_t cmd_buffer_count;
4804
4805    /* Sparse bindings that happen on a queue. */
4806    VkSparseBufferMemoryBindInfo *buffer_binds;
4807    uint32_t buffer_bind_count;
4808    VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4809    uint32_t image_opaque_bind_count;
4810    VkSparseImageMemoryBindInfo *image_binds;
4811    uint32_t image_bind_count;
4812
4813    bool flush_caches;
4814    VkPipelineStageFlags2 wait_dst_stage_mask;
4815    struct radv_semaphore_part **wait_semaphores;
4816    uint32_t wait_semaphore_count;
4817    struct radv_semaphore_part **signal_semaphores;
4818    uint32_t signal_semaphore_count;
4819    VkFence fence;
4820
4821    uint64_t *wait_values;
4822    uint64_t *signal_values;
4823
4824    struct radv_semaphore_part *temporary_semaphore_parts;
4825    uint32_t temporary_semaphore_part_count;
4826
4827    struct list_head queue_pending_list;
4828    uint32_t submission_wait_count;
4829
4830    struct list_head processing_list;
4831 };
4832
4833 static VkResult
4834 radv_queue_submit_bind_sparse_memory(struct radv_device *device, struct vk_queue_submit *submission)
4835 {
4836    for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
4837       VkResult result = radv_sparse_buffer_bind_memory(device, submission->buffer_binds + i);
4838       if (result != VK_SUCCESS)
4839          return result;
4840    }
4841
4842    for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
4843       VkResult result =
4844          radv_sparse_image_opaque_bind_memory(device, submission->image_opaque_binds + i);
4845       if (result != VK_SUCCESS)
4846          return result;
4847    }
4848
4849    for (uint32_t i = 0; i < submission->image_bind_count; ++i) {
4850       VkResult result = radv_sparse_image_bind_memory(device, submission->image_binds + i);
4851       if (result != VK_SUCCESS)
4852          return result;
4853    }
4854
4855    return VK_SUCCESS;
4856 }
4857
4858 static VkResult
4859 radv_queue_submit_empty(struct radv_queue *queue, struct vk_queue_submit *submission)
4860 {
4861    struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4862    struct radv_winsys_submit_info submit = {
4863       .ip_type = radv_queue_ring(queue),
4864       .queue_index = queue->vk.index_in_family,
4865    };
4866
4867    return queue->device->ws->cs_submit(ctx, 1, &submit, submission->wait_count, submission->waits,
4868                                        submission->signal_count, submission->signals, false);
4869 }
4870
4871 static VkResult
4872 radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submission)
4873 {
4874    struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4875    uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
4876    bool can_patch = true;
4877    uint32_t advance;
4878    VkResult result;
4879    bool uses_perf_counters = false;
4880    unsigned cmd_buffer_count = submission->command_buffer_count;
4881
4882    result = radv_update_preambles(&queue->state, queue->device, submission->command_buffers,
4883                                   submission->command_buffer_count, &uses_perf_counters);
4884    if (result != VK_SUCCESS)
4885       return result;
4886
4887    if (queue->device->trace_bo)
4888       simple_mtx_lock(&queue->device->trace_mtx);
4889
4890    if (uses_perf_counters)
4891       cmd_buffer_count += 2;
4892
4893    struct radeon_cmdbuf **cs_array = malloc(sizeof(struct radeon_cmdbuf *) * cmd_buffer_count);
4894    if (!cs_array)
4895       goto fail;
4896
4897    for (uint32_t j = 0; j < submission->command_buffer_count; j++) {
4898       struct radv_cmd_buffer *cmd_buffer = (struct radv_cmd_buffer *)submission->command_buffers[j];
4899       assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4900
4901       cs_array[j + (uses_perf_counters ? 1 : 0)] = cmd_buffer->cs;
4902       if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
4903          can_patch = false;
4904
4905       cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
4906    }
4907
4908    if (uses_perf_counters) {
4909       cs_array[0] =
4910          radv_create_perf_counter_lock_cs(queue->device, submission->perf_pass_index, false);
4911       cs_array[cmd_buffer_count - 1] =
4912          radv_create_perf_counter_lock_cs(queue->device, submission->perf_pass_index, true);
4913       can_patch = false;
4914       if (!cs_array[0] || !cs_array[cmd_buffer_count - 1]) {
4915          result = VK_ERROR_OUT_OF_HOST_MEMORY;
4916          goto fail;
4917       }
4918    }
4919
4920    /* For fences on the same queue/vm amdgpu doesn't wait till all processing is finished
4921     * before starting the next cmdbuffer, so we need to do it here. */
4922    bool need_wait = submission->wait_count > 0;
4923
4924    struct radv_winsys_submit_info submit = {
4925       .ip_type = radv_queue_ring(queue),
4926       .queue_index = queue->vk.index_in_family,
4927       .cs_array = cs_array,
4928       .cs_count = 0,
4929       .initial_preamble_cs =
4930          need_wait ? queue->state.initial_full_flush_preamble_cs : queue->state.initial_preamble_cs,
4931       .continue_preamble_cs = queue->state.continue_preamble_cs,
4932    };
4933
4934    for (uint32_t j = 0; j < cmd_buffer_count; j += advance) {
4935       advance = MIN2(max_cs_submission, cmd_buffer_count - j);
4936       bool last_submit = j + advance == cmd_buffer_count;
4937
4938       if (queue->device->trace_bo)
4939          *queue->device->trace_id_ptr = 0;
4940
4941       submit.cs_count = advance;
4942
4943       result = queue->device->ws->cs_submit(
4944          ctx, 1, &submit, j == 0 ? submission->wait_count : 0, submission->waits,
4945          last_submit ? submission->signal_count : 0, submission->signals, can_patch);
4946
4947       if (result != VK_SUCCESS)
4948          goto fail;
4949
4950       if (queue->device->trace_bo) {
4951          radv_check_gpu_hangs(queue, cs_array[j]);
4952       }
4953
4954       if (queue->device->tma_bo) {
4955          radv_check_trap_handler(queue);
4956       }
4957
4958       submit.cs_array += advance;
4959       submit.initial_preamble_cs = queue->state.initial_preamble_cs;
4960    }
4961
4962 fail:
4963    free(cs_array);
4964    if (queue->device->trace_bo)
4965       simple_mtx_unlock(&queue->device->trace_mtx);
4966
4967    return result;
4968 }
4969
4970 static VkResult
4971 radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission)
4972 {
4973    struct radv_queue *queue = (struct radv_queue *)vqueue;
4974    VkResult result;
4975
4976    result = radv_queue_submit_bind_sparse_memory(queue->device, submission);
4977    if (result != VK_SUCCESS)
4978       goto fail;
4979
4980    if (!submission->command_buffer_count && !submission->wait_count && !submission->signal_count)
4981       return VK_SUCCESS;
4982
4983    if (!submission->command_buffer_count) {
4984       result = radv_queue_submit_empty(queue, submission);
4985    } else {
4986       result = radv_queue_submit_normal(queue, submission);
4987    }
4988
4989 fail:
4990    if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
4991       /* When something bad happened during the submission, such as
4992        * an out of memory issue, it might be hard to recover from
4993        * this inconsistent state. To avoid this sort of problem, we
4994        * assume that we are in a really bad situation and return
4995        * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
4996        * to submit the same job again to this device.
4997        */
4998       result = vk_device_set_lost(&queue->device->vk, "vkQueueSubmit() failed");
4999    }
5000    return result;
5001 }
5002
5003 bool
5004 radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs)
5005 {
5006    struct radeon_winsys_ctx *ctx = queue->hw_ctx;
5007    struct radv_winsys_submit_info submit = {
5008       .ip_type = radv_queue_ring(queue),
5009       .queue_index = queue->vk.index_in_family,
5010       .cs_array = &cs,
5011       .cs_count = 1,
5012    };
5013
5014    VkResult result = queue->device->ws->cs_submit(ctx, 1, &submit, 0, NULL, 0, NULL, false);
5015    if (result != VK_SUCCESS)
5016       return false;
5017
5018    return true;
5019 }
5020
5021 VKAPI_ATTR VkResult VKAPI_CALL
5022 radv_EnumerateInstanceExtensionProperties(const char *pLayerName, uint32_t *pPropertyCount,
5023                                           VkExtensionProperties *pProperties)
5024 {
5025    if (pLayerName)
5026       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
5027
5028    return vk_enumerate_instance_extension_properties(&radv_instance_extensions_supported,
5029                                                      pPropertyCount, pProperties);
5030 }
5031
5032 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
5033 radv_GetInstanceProcAddr(VkInstance _instance, const char *pName)
5034 {
5035    RADV_FROM_HANDLE(radv_instance, instance, _instance);
5036
5037    /* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly
5038     * when we have to return valid function pointers, NULL, or it's left
5039     * undefined.  See the table for exact details.
5040     */
5041    if (pName == NULL)
5042       return NULL;
5043
5044 #define LOOKUP_RADV_ENTRYPOINT(entrypoint)                                                         \
5045    if (strcmp(pName, "vk" #entrypoint) == 0)                                                       \
5046    return (PFN_vkVoidFunction)radv_##entrypoint
5047
5048    LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
5049    LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties);
5050    LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion);
5051    LOOKUP_RADV_ENTRYPOINT(CreateInstance);
5052
5053    /* GetInstanceProcAddr() can also be called with a NULL instance.
5054     * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057
5055     */
5056    LOOKUP_RADV_ENTRYPOINT(GetInstanceProcAddr);
5057
5058 #undef LOOKUP_RADV_ENTRYPOINT
5059
5060    if (instance == NULL)
5061       return NULL;
5062
5063    return vk_instance_get_proc_addr(&instance->vk, &radv_instance_entrypoints, pName);
5064 }
5065
5066 /* Windows will use a dll definition file to avoid build errors. */
5067 #ifdef _WIN32
5068 #undef PUBLIC
5069 #define PUBLIC
5070 #endif
5071
5072 /* The loader wants us to expose a second GetInstanceProcAddr function
5073  * to work around certain LD_PRELOAD issues seen in apps.
5074  */
5075 PUBLIC
5076 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
5077 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
5078 {
5079    return radv_GetInstanceProcAddr(instance, pName);
5080 }
5081
5082 PUBLIC
5083 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
5084 vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName)
5085 {
5086    RADV_FROM_HANDLE(radv_instance, instance, _instance);
5087    return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
5088 }
5089
5090 bool
5091 radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD)
5092 {
5093    /* Only set BO metadata for the first plane */
5094    if (memory->image && memory->image->offset == 0) {
5095       struct radeon_bo_metadata metadata;
5096       radv_init_metadata(device, memory->image, &metadata);
5097       device->ws->buffer_set_metadata(device->ws, memory->bo, &metadata);
5098    }
5099
5100    return device->ws->buffer_get_fd(device->ws, memory->bo, pFD);
5101 }
5102
5103 void
5104 radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device,
5105                         struct radeon_winsys_bo *bo)
5106 {
5107    memset(mem, 0, sizeof(*mem));
5108    vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY);
5109
5110    mem->bo = bo;
5111 }
5112
5113 void
5114 radv_device_memory_finish(struct radv_device_memory *mem)
5115 {
5116    vk_object_base_finish(&mem->base);
5117 }
5118
5119 void
5120 radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
5121                  struct radv_device_memory *mem)
5122 {
5123    if (mem == NULL)
5124       return;
5125
5126 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5127    if (mem->android_hardware_buffer)
5128       AHardwareBuffer_release(mem->android_hardware_buffer);
5129 #endif
5130
5131    if (mem->bo) {
5132       if (device->overallocation_disallowed) {
5133          mtx_lock(&device->overallocation_mutex);
5134          device->allocated_memory_size[mem->heap_index] -= mem->alloc_size;
5135          mtx_unlock(&device->overallocation_mutex);
5136       }
5137
5138       if (device->use_global_bo_list)
5139          device->ws->buffer_make_resident(device->ws, mem->bo, false);
5140       device->ws->buffer_destroy(device->ws, mem->bo);
5141       mem->bo = NULL;
5142    }
5143
5144    radv_device_memory_finish(mem);
5145    vk_free2(&device->vk.alloc, pAllocator, mem);
5146 }
5147
5148 static VkResult
5149 radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAllocateInfo,
5150                   const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
5151 {
5152    struct radv_device_memory *mem;
5153    VkResult result;
5154    enum radeon_bo_domain domain;
5155    uint32_t flags = 0;
5156
5157    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
5158
5159    const VkImportMemoryFdInfoKHR *import_info =
5160       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
5161    const VkMemoryDedicatedAllocateInfo *dedicate_info =
5162       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
5163    const VkExportMemoryAllocateInfo *export_info =
5164       vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
5165    const struct VkImportAndroidHardwareBufferInfoANDROID *ahb_import_info =
5166       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID);
5167    const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
5168       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
5169
5170    const struct wsi_memory_allocate_info *wsi_info =
5171       vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
5172
5173    if (pAllocateInfo->allocationSize == 0 && !ahb_import_info &&
5174        !(export_info && (export_info->handleTypes &
5175                          VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {
5176       /* Apparently, this is allowed */
5177       *pMem = VK_NULL_HANDLE;
5178       return VK_SUCCESS;
5179    }
5180
5181    mem =
5182       vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5183    if (mem == NULL)
5184       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
5185
5186    radv_device_memory_init(mem, device, NULL);
5187
5188    if (wsi_info) {
5189       if(wsi_info->implicit_sync)
5190          flags |= RADEON_FLAG_IMPLICIT_SYNC;
5191
5192       /* In case of prime, linear buffer is allocated in default heap which is VRAM.
5193        * Due to this when display is connected to iGPU and render on dGPU, ddx
5194        * function amdgpu_present_check_flip() fails due to which there is blit
5195        * instead of flip. Setting the flag RADEON_FLAG_GTT_WC allows kernel to
5196        * allocate GTT memory in supported hardware where GTT can be directly scanout.
5197        * Using wsi_info variable check to set the flag RADEON_FLAG_GTT_WC so that
5198        * only for memory allocated by driver this flag is set.
5199        */
5200       flags |= RADEON_FLAG_GTT_WC;
5201    }
5202
5203    if (dedicate_info) {
5204       mem->image = radv_image_from_handle(dedicate_info->image);
5205       mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
5206    } else {
5207       mem->image = NULL;
5208       mem->buffer = NULL;
5209    }
5210
5211    if (wsi_info && wsi_info->implicit_sync && mem->buffer) {
5212       /* Mark the linear prime buffer (aka the destination of the prime blit
5213        * as uncached.
5214        */
5215       flags |= RADEON_FLAG_VA_UNCACHED;
5216    }
5217
5218    float priority_float = 0.5;
5219    const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
5220       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
5221    if (priority_ext)
5222       priority_float = priority_ext->priority;
5223
5224    uint64_t replay_address = 0;
5225    const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info =
5226       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO);
5227    if (replay_info && replay_info->opaqueCaptureAddress)
5228       replay_address = replay_info->opaqueCaptureAddress;
5229
5230    unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
5231                             (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
5232
5233    mem->user_ptr = NULL;
5234
5235 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5236    mem->android_hardware_buffer = NULL;
5237 #endif
5238
5239    if (ahb_import_info) {
5240       result = radv_import_ahb_memory(device, mem, priority, ahb_import_info);
5241       if (result != VK_SUCCESS)
5242          goto fail;
5243    } else if (export_info && (export_info->handleTypes &
5244                               VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {
5245       result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo);
5246       if (result != VK_SUCCESS)
5247          goto fail;
5248    } else if (import_info) {
5249       assert(import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
5250              import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
5251       result = device->ws->buffer_from_fd(device->ws, import_info->fd, priority, &mem->bo, NULL);
5252       if (result != VK_SUCCESS) {
5253          goto fail;
5254       } else {
5255          close(import_info->fd);
5256       }
5257
5258       if (mem->image && mem->image->plane_count == 1 &&
5259           !vk_format_is_depth_or_stencil(mem->image->vk.format) && mem->image->info.samples == 1 &&
5260           mem->image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
5261          struct radeon_bo_metadata metadata;
5262          device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);
5263
5264          struct radv_image_create_info create_info = {.no_metadata_planes = true,
5265                                                       .bo_metadata = &metadata};
5266
5267          /* This gives a basic ability to import radeonsi images
5268           * that don't have DCC. This is not guaranteed by any
5269           * spec and can be removed after we support modifiers. */
5270          result = radv_image_create_layout(device, create_info, NULL, mem->image);
5271          if (result != VK_SUCCESS) {
5272             device->ws->buffer_destroy(device->ws, mem->bo);
5273             goto fail;
5274          }
5275       }
5276    } else if (host_ptr_info) {
5277       assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
5278       result = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
5279                                            pAllocateInfo->allocationSize, priority, &mem->bo);
5280       if (result != VK_SUCCESS) {
5281          goto fail;
5282       } else {
5283          mem->user_ptr = host_ptr_info->pHostPointer;
5284       }
5285    } else {
5286       uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
5287       uint32_t heap_index;
5288
5289       heap_index =
5290          device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex]
5291             .heapIndex;
5292       domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
5293       flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
5294
5295       if (!import_info && (!export_info || !export_info->handleTypes)) {
5296          flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
5297          if (device->use_global_bo_list) {
5298             flags |= RADEON_FLAG_PREFER_LOCAL_BO;
5299          }
5300       }
5301
5302       const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO);
5303       if (flags_info && flags_info->flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)
5304          flags |= RADEON_FLAG_REPLAYABLE;
5305
5306       if (device->instance->zero_vram)
5307          flags |= RADEON_FLAG_ZERO_VRAM;
5308
5309       if (device->overallocation_disallowed) {
5310          uint64_t total_size =
5311             device->physical_device->memory_properties.memoryHeaps[heap_index].size;
5312
5313          mtx_lock(&device->overallocation_mutex);
5314          if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
5315             mtx_unlock(&device->overallocation_mutex);
5316             result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
5317             goto fail;
5318          }
5319          device->allocated_memory_size[heap_index] += alloc_size;
5320          mtx_unlock(&device->overallocation_mutex);
5321       }
5322
5323       result = device->ws->buffer_create(device->ws, alloc_size,
5324                                          device->physical_device->rad_info.max_alignment, domain,
5325                                          flags, priority, replay_address, &mem->bo);
5326
5327       if (result != VK_SUCCESS) {
5328          if (device->overallocation_disallowed) {
5329             mtx_lock(&device->overallocation_mutex);
5330             device->allocated_memory_size[heap_index] -= alloc_size;
5331             mtx_unlock(&device->overallocation_mutex);
5332          }
5333          goto fail;
5334       }
5335
5336       mem->heap_index = heap_index;
5337       mem->alloc_size = alloc_size;
5338    }
5339
5340    if (!wsi_info) {
5341       if (device->use_global_bo_list) {
5342          result = device->ws->buffer_make_resident(device->ws, mem->bo, true);
5343          if (result != VK_SUCCESS)
5344             goto fail;
5345       }
5346    }
5347
5348    *pMem = radv_device_memory_to_handle(mem);
5349
5350    return VK_SUCCESS;
5351
5352 fail:
5353    radv_free_memory(device, pAllocator, mem);
5354
5355    return result;
5356 }
5357
5358 VKAPI_ATTR VkResult VKAPI_CALL
5359 radv_AllocateMemory(VkDevice _device, const VkMemoryAllocateInfo *pAllocateInfo,
5360                     const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
5361 {
5362    RADV_FROM_HANDLE(radv_device, device, _device);
5363    return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
5364 }
5365
5366 VKAPI_ATTR void VKAPI_CALL
5367 radv_FreeMemory(VkDevice _device, VkDeviceMemory _mem, const VkAllocationCallbacks *pAllocator)
5368 {
5369    RADV_FROM_HANDLE(radv_device, device, _device);
5370    RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
5371
5372    radv_free_memory(device, pAllocator, mem);
5373 }
5374
5375 VKAPI_ATTR VkResult VKAPI_CALL
5376 radv_MapMemory(VkDevice _device, VkDeviceMemory _memory, VkDeviceSize offset, VkDeviceSize size,
5377                VkMemoryMapFlags flags, void **ppData)
5378 {
5379    RADV_FROM_HANDLE(radv_device, device, _device);
5380    RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5381
5382    if (mem->user_ptr)
5383       *ppData = mem->user_ptr;
5384    else
5385       *ppData = device->ws->buffer_map(mem->bo);
5386
5387    if (*ppData) {
5388       *ppData = (uint8_t *)*ppData + offset;
5389       return VK_SUCCESS;
5390    }
5391
5392    return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
5393 }
5394
5395 VKAPI_ATTR void VKAPI_CALL
5396 radv_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
5397 {
5398    RADV_FROM_HANDLE(radv_device, device, _device);
5399    RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5400
5401    if (mem->user_ptr == NULL)
5402       device->ws->buffer_unmap(mem->bo);
5403 }
5404
5405 VKAPI_ATTR VkResult VKAPI_CALL
5406 radv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
5407                              const VkMappedMemoryRange *pMemoryRanges)
5408 {
5409    return VK_SUCCESS;
5410 }
5411
5412 VKAPI_ATTR VkResult VKAPI_CALL
5413 radv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
5414                                   const VkMappedMemoryRange *pMemoryRanges)
5415 {
5416    return VK_SUCCESS;
5417 }
5418
5419 static void
5420 radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize size,
5421                                     VkBufferCreateFlags flags, VkBufferCreateFlags usage,
5422                                     VkMemoryRequirements2 *pMemoryRequirements)
5423 {
5424    pMemoryRequirements->memoryRequirements.memoryTypeBits =
5425       (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5426
5427    if (flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
5428       pMemoryRequirements->memoryRequirements.alignment = 4096;
5429    else
5430       pMemoryRequirements->memoryRequirements.alignment = 16;
5431
5432    /* Top level acceleration structures need the bottom 6 bits to store
5433     * the root ids of instances. The hardware also needs bvh nodes to
5434     * be 64 byte aligned.
5435     */
5436    if (usage & VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR)
5437       pMemoryRequirements->memoryRequirements.alignment =
5438          MAX2(pMemoryRequirements->memoryRequirements.alignment, 64);
5439
5440    pMemoryRequirements->memoryRequirements.size =
5441       align64(size, pMemoryRequirements->memoryRequirements.alignment);
5442
5443    vk_foreach_struct(ext, pMemoryRequirements->pNext)
5444    {
5445       switch (ext->sType) {
5446       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5447          VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
5448          req->requiresDedicatedAllocation = false;
5449          req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5450          break;
5451       }
5452       default:
5453          break;
5454       }
5455    }
5456 }
5457
5458 VKAPI_ATTR void VKAPI_CALL
5459 radv_GetBufferMemoryRequirements2(VkDevice _device, const VkBufferMemoryRequirementsInfo2 *pInfo,
5460                                   VkMemoryRequirements2 *pMemoryRequirements)
5461 {
5462    RADV_FROM_HANDLE(radv_device, device, _device);
5463    RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
5464
5465    radv_get_buffer_memory_requirements(device, buffer->vk.size, buffer->vk.create_flags,
5466                                        buffer->vk.usage, pMemoryRequirements);
5467 }
5468
5469 VKAPI_ATTR void VKAPI_CALL
5470 radv_GetDeviceBufferMemoryRequirements(VkDevice _device,
5471                                        const VkDeviceBufferMemoryRequirements *pInfo,
5472                                        VkMemoryRequirements2 *pMemoryRequirements)
5473 {
5474    RADV_FROM_HANDLE(radv_device, device, _device);
5475
5476    radv_get_buffer_memory_requirements(device, pInfo->pCreateInfo->size, pInfo->pCreateInfo->flags,
5477                                        pInfo->pCreateInfo->usage, pMemoryRequirements);
5478 }
5479
5480 VKAPI_ATTR void VKAPI_CALL
5481 radv_GetImageMemoryRequirements2(VkDevice _device, const VkImageMemoryRequirementsInfo2 *pInfo,
5482                                  VkMemoryRequirements2 *pMemoryRequirements)
5483 {
5484    RADV_FROM_HANDLE(radv_device, device, _device);
5485    RADV_FROM_HANDLE(radv_image, image, pInfo->image);
5486
5487    pMemoryRequirements->memoryRequirements.memoryTypeBits =
5488       (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5489
5490    pMemoryRequirements->memoryRequirements.size = image->size;
5491    pMemoryRequirements->memoryRequirements.alignment = image->alignment;
5492
5493    vk_foreach_struct(ext, pMemoryRequirements->pNext)
5494    {
5495       switch (ext->sType) {
5496       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5497          VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
5498          req->requiresDedicatedAllocation =
5499             image->shareable && image->vk.tiling != VK_IMAGE_TILING_LINEAR;
5500          req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5501          break;
5502       }
5503       default:
5504          break;
5505       }
5506    }
5507 }
5508
5509 VKAPI_ATTR void VKAPI_CALL
5510 radv_GetDeviceImageMemoryRequirements(VkDevice device,
5511                                       const VkDeviceImageMemoryRequirements *pInfo,
5512                                       VkMemoryRequirements2 *pMemoryRequirements)
5513 {
5514    UNUSED VkResult result;
5515    VkImage image;
5516
5517    /* Determining the image size/alignment require to create a surface, which is complicated without
5518     * creating an image.
5519     * TODO: Avoid creating an image.
5520     */
5521    result = radv_CreateImage(device, pInfo->pCreateInfo, NULL, &image);
5522    assert(result == VK_SUCCESS);
5523
5524    VkImageMemoryRequirementsInfo2 info2 = {
5525       .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
5526       .image = image,
5527    };
5528
5529    radv_GetImageMemoryRequirements2(device, &info2, pMemoryRequirements);
5530
5531    radv_DestroyImage(device, image, NULL);
5532 }
5533
5534 VKAPI_ATTR void VKAPI_CALL
5535 radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory,
5536                                VkDeviceSize *pCommittedMemoryInBytes)
5537 {
5538    *pCommittedMemoryInBytes = 0;
5539 }
5540
5541 VKAPI_ATTR VkResult VKAPI_CALL
5542 radv_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount,
5543                        const VkBindBufferMemoryInfo *pBindInfos)
5544 {
5545    RADV_FROM_HANDLE(radv_device, device, _device);
5546
5547    for (uint32_t i = 0; i < bindInfoCount; ++i) {
5548       RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5549       RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
5550
5551       if (mem->alloc_size) {
5552          VkBufferMemoryRequirementsInfo2 info = {
5553             .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
5554             .buffer = pBindInfos[i].buffer,
5555          };
5556          VkMemoryRequirements2 reqs = {
5557             .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
5558          };
5559
5560          radv_GetBufferMemoryRequirements2(_device, &info, &reqs);
5561
5562          if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) {
5563             return vk_errorf(device, VK_ERROR_UNKNOWN,
5564                              "Device memory object too small for the buffer.\n");
5565          }
5566       }
5567
5568       buffer->bo = mem->bo;
5569       buffer->offset = pBindInfos[i].memoryOffset;
5570    }
5571    return VK_SUCCESS;
5572 }
5573
5574 VKAPI_ATTR VkResult VKAPI_CALL
5575 radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount,
5576                       const VkBindImageMemoryInfo *pBindInfos)
5577 {
5578    RADV_FROM_HANDLE(radv_device, device, _device);
5579
5580    for (uint32_t i = 0; i < bindInfoCount; ++i) {
5581       RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5582       RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
5583
5584       if (mem->alloc_size) {
5585          VkImageMemoryRequirementsInfo2 info = {
5586             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
5587             .image = pBindInfos[i].image,
5588          };
5589          VkMemoryRequirements2 reqs = {
5590             .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
5591          };
5592
5593          radv_GetImageMemoryRequirements2(_device, &info, &reqs);
5594
5595          if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) {
5596             return vk_errorf(device, VK_ERROR_UNKNOWN,
5597                              "Device memory object too small for the image.\n");
5598          }
5599       }
5600
5601       image->bo = mem->bo;
5602       image->offset = pBindInfos[i].memoryOffset;
5603    }
5604    return VK_SUCCESS;
5605 }
5606
5607 static void
5608 radv_destroy_event(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
5609                    struct radv_event *event)
5610 {
5611    if (event->bo)
5612       device->ws->buffer_destroy(device->ws, event->bo);
5613
5614    vk_object_base_finish(&event->base);
5615    vk_free2(&device->vk.alloc, pAllocator, event);
5616 }
5617
5618 VKAPI_ATTR VkResult VKAPI_CALL
5619 radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo,
5620                  const VkAllocationCallbacks *pAllocator, VkEvent *pEvent)
5621 {
5622    RADV_FROM_HANDLE(radv_device, device, _device);
5623    enum radeon_bo_domain bo_domain;
5624    enum radeon_bo_flag bo_flags;
5625    struct radv_event *event;
5626    VkResult result;
5627
5628    event = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8,
5629                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5630    if (!event)
5631       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
5632
5633    vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);
5634
5635    if (pCreateInfo->flags & VK_EVENT_CREATE_DEVICE_ONLY_BIT) {
5636       bo_domain = RADEON_DOMAIN_VRAM;
5637       bo_flags = RADEON_FLAG_NO_CPU_ACCESS;
5638    } else {
5639       bo_domain = RADEON_DOMAIN_GTT;
5640       bo_flags = RADEON_FLAG_CPU_ACCESS;
5641    }
5642
5643    result = device->ws->buffer_create(
5644       device->ws, 8, 8, bo_domain,
5645       RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_NO_INTERPROCESS_SHARING | bo_flags,
5646       RADV_BO_PRIORITY_FENCE, 0, &event->bo);
5647    if (result != VK_SUCCESS) {
5648       radv_destroy_event(device, pAllocator, event);
5649       return vk_error(device, result);
5650    }
5651
5652    if (!(pCreateInfo->flags & VK_EVENT_CREATE_DEVICE_ONLY_BIT)) {
5653       event->map = (uint64_t *)device->ws->buffer_map(event->bo);
5654       if (!event->map) {
5655          radv_destroy_event(device, pAllocator, event);
5656          return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
5657       }
5658    }
5659
5660    *pEvent = radv_event_to_handle(event);
5661
5662    return VK_SUCCESS;
5663 }
5664
5665 VKAPI_ATTR void VKAPI_CALL
5666 radv_DestroyEvent(VkDevice _device, VkEvent _event, const VkAllocationCallbacks *pAllocator)
5667 {
5668    RADV_FROM_HANDLE(radv_device, device, _device);
5669    RADV_FROM_HANDLE(radv_event, event, _event);
5670
5671    if (!event)
5672       return;
5673
5674    radv_destroy_event(device, pAllocator, event);
5675 }
5676
5677 VKAPI_ATTR VkResult VKAPI_CALL
5678 radv_GetEventStatus(VkDevice _device, VkEvent _event)
5679 {
5680    RADV_FROM_HANDLE(radv_device, device, _device);
5681    RADV_FROM_HANDLE(radv_event, event, _event);
5682
5683    if (vk_device_is_lost(&device->vk))
5684       return VK_ERROR_DEVICE_LOST;
5685
5686    if (*event->map == 1)
5687       return VK_EVENT_SET;
5688    return VK_EVENT_RESET;
5689 }
5690
5691 VKAPI_ATTR VkResult VKAPI_CALL
5692 radv_SetEvent(VkDevice _device, VkEvent _event)
5693 {
5694    RADV_FROM_HANDLE(radv_event, event, _event);
5695    *event->map = 1;
5696
5697    return VK_SUCCESS;
5698 }
5699
5700 VKAPI_ATTR VkResult VKAPI_CALL
5701 radv_ResetEvent(VkDevice _device, VkEvent _event)
5702 {
5703    RADV_FROM_HANDLE(radv_event, event, _event);
5704    *event->map = 0;
5705
5706    return VK_SUCCESS;
5707 }
5708
5709 void
5710 radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device,
5711                  struct radeon_winsys_bo *bo, uint64_t size,
5712                  uint64_t offset)
5713 {
5714    VkBufferCreateInfo createInfo = {
5715       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
5716       .size = size,
5717    };
5718
5719    vk_buffer_init(&device->vk, &buffer->vk, &createInfo);
5720
5721    buffer->bo = bo;
5722    buffer->offset = offset;
5723 }
5724
5725 void
5726 radv_buffer_finish(struct radv_buffer *buffer)
5727 {
5728    vk_buffer_finish(&buffer->vk);
5729 }
5730
5731 static void
5732 radv_destroy_buffer(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
5733                     struct radv_buffer *buffer)
5734 {
5735    if ((buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo)
5736       device->ws->buffer_destroy(device->ws, buffer->bo);
5737
5738    radv_buffer_finish(buffer);
5739    vk_free2(&device->vk.alloc, pAllocator, buffer);
5740 }
5741
5742 VKAPI_ATTR VkResult VKAPI_CALL
5743 radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo,
5744                   const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer)
5745 {
5746    RADV_FROM_HANDLE(radv_device, device, _device);
5747    struct radv_buffer *buffer;
5748
5749    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
5750
5751    buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
5752                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5753    if (buffer == NULL)
5754       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
5755
5756    vk_buffer_init(&device->vk, &buffer->vk, pCreateInfo);
5757    buffer->bo = NULL;
5758    buffer->offset = 0;
5759
5760    if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
5761       enum radeon_bo_flag flags = RADEON_FLAG_VIRTUAL;
5762       if (pCreateInfo->flags & VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)
5763          flags |= RADEON_FLAG_REPLAYABLE;
5764
5765       uint64_t replay_address = 0;
5766       const VkBufferOpaqueCaptureAddressCreateInfo *replay_info =
5767          vk_find_struct_const(pCreateInfo->pNext, BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO);
5768       if (replay_info && replay_info->opaqueCaptureAddress)
5769          replay_address = replay_info->opaqueCaptureAddress;
5770
5771       VkResult result =
5772          device->ws->buffer_create(device->ws, align64(buffer->vk.size, 4096), 4096, 0, flags,
5773                                    RADV_BO_PRIORITY_VIRTUAL, replay_address, &buffer->bo);
5774       if (result != VK_SUCCESS) {
5775          radv_destroy_buffer(device, pAllocator, buffer);
5776          return vk_error(device, result);
5777       }
5778    }
5779
5780    *pBuffer = radv_buffer_to_handle(buffer);
5781
5782    return VK_SUCCESS;
5783 }
5784
5785 VKAPI_ATTR void VKAPI_CALL
5786 radv_DestroyBuffer(VkDevice _device, VkBuffer _buffer, const VkAllocationCallbacks *pAllocator)
5787 {
5788    RADV_FROM_HANDLE(radv_device, device, _device);
5789    RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
5790
5791    if (!buffer)
5792       return;
5793
5794    radv_destroy_buffer(device, pAllocator, buffer);
5795 }
5796
5797 VKAPI_ATTR VkDeviceAddress VKAPI_CALL
5798 radv_GetBufferDeviceAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)
5799 {
5800    RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
5801    return radv_buffer_get_va(buffer->bo) + buffer->offset;
5802 }
5803
5804 VKAPI_ATTR uint64_t VKAPI_CALL
5805 radv_GetBufferOpaqueCaptureAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)
5806 {
5807    RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
5808    return buffer->bo ? radv_buffer_get_va(buffer->bo) + buffer->offset : 0;
5809 }
5810
5811 VKAPI_ATTR uint64_t VKAPI_CALL
5812 radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,
5813                                          const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo)
5814 {
5815    RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory);
5816    return radv_buffer_get_va(mem->bo);
5817 }
5818
5819 static inline unsigned
5820 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
5821 {
5822    if (stencil)
5823       return plane->surface.u.legacy.zs.stencil_tiling_index[level];
5824    else
5825       return plane->surface.u.legacy.tiling_index[level];
5826 }
5827
5828 static uint32_t
5829 radv_surface_max_layer_count(struct radv_image_view *iview)
5830 {
5831    return iview->vk.view_type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth
5832                                                        : (iview->vk.base_array_layer + iview->vk.layer_count);
5833 }
5834
5835 static unsigned
5836 get_dcc_max_uncompressed_block_size(const struct radv_device *device,
5837                                     const struct radv_image_view *iview)
5838 {
5839    if (device->physical_device->rad_info.gfx_level < GFX10 && iview->image->info.samples > 1) {
5840       if (iview->image->planes[0].surface.bpe == 1)
5841          return V_028C78_MAX_BLOCK_SIZE_64B;
5842       else if (iview->image->planes[0].surface.bpe == 2)
5843          return V_028C78_MAX_BLOCK_SIZE_128B;
5844    }
5845
5846    return V_028C78_MAX_BLOCK_SIZE_256B;
5847 }
5848
5849 static unsigned
5850 get_dcc_min_compressed_block_size(const struct radv_device *device)
5851 {
5852    if (!device->physical_device->rad_info.has_dedicated_vram) {
5853       /* amdvlk: [min-compressed-block-size] should be set to 32 for
5854        * dGPU and 64 for APU because all of our APUs to date use
5855        * DIMMs which have a request granularity size of 64B while all
5856        * other chips have a 32B request size.
5857        */
5858       return V_028C78_MIN_BLOCK_SIZE_64B;
5859    }
5860
5861    return V_028C78_MIN_BLOCK_SIZE_32B;
5862 }
5863
5864 static uint32_t
5865 radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview)
5866 {
5867    unsigned max_uncompressed_block_size = get_dcc_max_uncompressed_block_size(device, iview);
5868    unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device);
5869    unsigned max_compressed_block_size;
5870    unsigned independent_128b_blocks;
5871    unsigned independent_64b_blocks;
5872
5873    if (!radv_dcc_enabled(iview->image, iview->vk.base_mip_level))
5874       return 0;
5875
5876    /* For GFX9+ ac_surface computes values for us (except min_compressed
5877     * and max_uncompressed) */
5878    if (device->physical_device->rad_info.gfx_level >= GFX9) {
5879       max_compressed_block_size =
5880          iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size;
5881       independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks;
5882       independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks;
5883    } else {
5884       independent_128b_blocks = 0;
5885
5886       if (iview->image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
5887                                     VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
5888          /* If this DCC image is potentially going to be used in texture
5889           * fetches, we need some special settings.
5890           */
5891          independent_64b_blocks = 1;
5892          max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
5893       } else {
5894          /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
5895           * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
5896           * big as possible for better compression state.
5897           */
5898          independent_64b_blocks = 0;
5899          max_compressed_block_size = max_uncompressed_block_size;
5900       }
5901    }
5902
5903    uint32_t result = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
5904                      S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
5905                      S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
5906                      S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
5907
5908    if (device->physical_device->rad_info.gfx_level >= GFX11) {
5909       result |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(independent_128b_blocks) |
5910                 S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) |
5911                 S_028C78_FDCC_ENABLE(radv_dcc_enabled(iview->image, iview->vk.base_mip_level));
5912    } else {
5913       result |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX10(independent_128b_blocks);
5914    }
5915
5916    return result;
5917 }
5918
5919 void
5920 radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
5921                               struct radv_image_view *iview)
5922 {
5923    const struct util_format_description *desc;
5924    unsigned ntype, format, swap, endian;
5925    unsigned blend_clamp = 0, blend_bypass = 0;
5926    uint64_t va;
5927    const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
5928    const struct radeon_surf *surf = &plane->surface;
5929
5930    desc = vk_format_description(iview->vk.format);
5931
5932    memset(cb, 0, sizeof(*cb));
5933
5934    /* Intensity is implemented as Red, so treat it that way. */
5935    if (device->physical_device->rad_info.gfx_level >= GFX11)
5936       cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX11(desc->swizzle[3] == PIPE_SWIZZLE_1);
5937    else
5938       cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX6(desc->swizzle[3] == PIPE_SWIZZLE_1);
5939
5940    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
5941
5942    cb->cb_color_base = va >> 8;
5943
5944    if (device->physical_device->rad_info.gfx_level >= GFX9) {
5945       if (device->physical_device->rad_info.gfx_level >= GFX11) {
5946          cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
5947                                  S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
5948       } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
5949          cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
5950                                  S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
5951                                  S_028EE0_CMASK_PIPE_ALIGNED(1) |
5952                                  S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
5953       } else {
5954          struct gfx9_surf_meta_flags meta = {
5955             .rb_aligned = 1,
5956             .pipe_aligned = 1,
5957          };
5958
5959          if (surf->meta_offset)
5960             meta = surf->u.gfx9.color.dcc;
5961
5962          cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
5963                                 S_028C74_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
5964                                 S_028C74_RB_ALIGNED(meta.rb_aligned) |
5965                                 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
5966          cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.epitch);
5967       }
5968
5969       cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
5970       cb->cb_color_base |= surf->tile_swizzle;
5971    } else {
5972       const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->vk.base_mip_level];
5973       unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
5974
5975       cb->cb_color_base += level_info->offset_256B;
5976       if (level_info->mode == RADEON_SURF_MODE_2D)
5977          cb->cb_color_base |= surf->tile_swizzle;
5978
5979       pitch_tile_max = level_info->nblk_x / 8 - 1;
5980       slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
5981       tile_mode_index = si_tile_mode_index(plane, iview->vk.base_mip_level, false);
5982
5983       cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
5984       cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
5985       cb->cb_color_cmask_slice = surf->u.legacy.color.cmask_slice_tile_max;
5986
5987       cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
5988
5989       if (radv_image_has_fmask(iview->image)) {
5990          if (device->physical_device->rad_info.gfx_level >= GFX7)
5991             cb->cb_color_pitch |=
5992                S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
5993          cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index);
5994          cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max);
5995       } else {
5996          /* This must be set for fast clear to work without FMASK. */
5997          if (device->physical_device->rad_info.gfx_level >= GFX7)
5998             cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
5999          cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
6000          cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
6001       }
6002    }
6003
6004    /* CMASK variables */
6005    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6006    va += surf->cmask_offset;
6007    cb->cb_color_cmask = va >> 8;
6008
6009    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6010    va += surf->meta_offset;
6011
6012    if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) &&
6013        device->physical_device->rad_info.gfx_level <= GFX8)
6014       va += plane->surface.u.legacy.color.dcc_level[iview->vk.base_mip_level].dcc_offset;
6015
6016    unsigned dcc_tile_swizzle = surf->tile_swizzle;
6017    dcc_tile_swizzle &= ((1 << surf->meta_alignment_log2) - 1) >> 8;
6018
6019    cb->cb_dcc_base = va >> 8;
6020    cb->cb_dcc_base |= dcc_tile_swizzle;
6021
6022    /* GFX10 field has the same base shift as the GFX6 field. */
6023    uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6024    cb->cb_color_view =
6025       S_028C6C_SLICE_START(iview->vk.base_array_layer) | S_028C6C_SLICE_MAX_GFX10(max_slice);
6026
6027    if (iview->image->info.samples > 1) {
6028       unsigned log_samples = util_logbase2(iview->image->info.samples);
6029
6030       if (device->physical_device->rad_info.gfx_level >= GFX11)
6031          cb->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_samples);
6032       else
6033          cb->cb_color_attrib |=
6034             S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_samples);
6035    }
6036
6037    if (radv_image_has_fmask(iview->image)) {
6038       va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->fmask_offset;
6039       cb->cb_color_fmask = va >> 8;
6040       cb->cb_color_fmask |= surf->fmask_tile_swizzle;
6041    } else {
6042       cb->cb_color_fmask = cb->cb_color_base;
6043    }
6044
6045    ntype = radv_translate_color_numformat(iview->vk.format, desc,
6046                                           vk_format_get_first_non_void_channel(iview->vk.format));
6047    format = radv_translate_colorformat(iview->vk.format);
6048    assert(format != V_028C70_COLOR_INVALID);
6049
6050    swap = radv_translate_colorswap(iview->vk.format, false);
6051    endian = radv_colorformat_endian_swap(format);
6052
6053    /* blend clamp should be set for all NORM/SRGB types */
6054    if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||
6055        ntype == V_028C70_NUMBER_SRGB)
6056       blend_clamp = 1;
6057
6058    /* set blend bypass according to docs if SINT/UINT or
6059       8/24 COLOR variants */
6060    if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
6061        format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
6062        format == V_028C70_COLOR_X24_8_32_FLOAT) {
6063       blend_clamp = 0;
6064       blend_bypass = 1;
6065    }
6066 #if 0
6067         if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
6068             (format == V_028C70_COLOR_8 ||
6069              format == V_028C70_COLOR_8_8 ||
6070              format == V_028C70_COLOR_8_8_8_8))
6071                 ->color_is_int8 = true;
6072 #endif
6073    cb->cb_color_info =
6074       S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) |
6075       S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) |
6076       S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM &&
6077                           ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 &&
6078                           format != V_028C70_COLOR_24_8) |
6079       S_028C70_NUMBER_TYPE(ntype);
6080
6081    if (device->physical_device->rad_info.gfx_level >= GFX11)
6082       cb->cb_color_info |= S_028C70_FORMAT_GFX11(format);
6083    else
6084       cb->cb_color_info |= S_028C70_FORMAT_GFX6(format) | S_028C70_ENDIAN(endian);
6085
6086    if (radv_image_has_fmask(iview->image)) {
6087       cb->cb_color_info |= S_028C70_COMPRESSION(1);
6088       if (device->physical_device->rad_info.gfx_level == GFX6) {
6089          unsigned fmask_bankh = util_logbase2(surf->u.legacy.color.fmask.bankh);
6090          cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
6091       }
6092
6093       if (radv_image_is_tc_compat_cmask(iview->image)) {
6094          /* Allow the texture block to read FMASK directly
6095           * without decompressing it. This bit must be cleared
6096           * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
6097           * otherwise the operation doesn't happen.
6098           */
6099          cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
6100
6101          if (device->physical_device->rad_info.gfx_level == GFX8) {
6102             /* Set CMASK into a tiling format that allows
6103              * the texture block to read it.
6104              */
6105             cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
6106          }
6107       }
6108    }
6109
6110    if (radv_image_has_cmask(iview->image) &&
6111        !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
6112       cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
6113
6114    if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && !iview->disable_dcc_mrt &&
6115        device->physical_device->rad_info.gfx_level < GFX11)
6116       cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
6117
6118    cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
6119
6120    /* This must be set for fast clear to work without FMASK. */
6121    if (!radv_image_has_fmask(iview->image) && device->physical_device->rad_info.gfx_level == GFX6) {
6122       unsigned bankh = util_logbase2(surf->u.legacy.bankh);
6123       cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
6124    }
6125
6126    if (device->physical_device->rad_info.gfx_level >= GFX9) {
6127       unsigned mip0_depth = iview->image->vk.image_type == VK_IMAGE_TYPE_3D
6128                                ? (iview->extent.depth - 1)
6129                                : (iview->image->info.array_size - 1);
6130       unsigned width =
6131          vk_format_get_plane_width(iview->image->vk.format, iview->plane_id, iview->extent.width);
6132       unsigned height =
6133          vk_format_get_plane_height(iview->image->vk.format, iview->plane_id, iview->extent.height);
6134
6135       if (device->physical_device->rad_info.gfx_level >= GFX10) {
6136          cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->vk.base_mip_level);
6137
6138          cb->cb_color_attrib3 |=
6139             S_028EE0_MIP0_DEPTH(mip0_depth) | S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
6140             S_028EE0_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level >= GFX11 ? 0 : 1);
6141       } else {
6142          cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->vk.base_mip_level);
6143          cb->cb_color_attrib |=
6144             S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
6145       }
6146
6147       cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) | S_028C68_MIP0_HEIGHT(height - 1) |
6148                              S_028C68_MAX_MIP(iview->image->info.levels - 1);
6149    }
6150 }
6151
6152 static unsigned
6153 radv_calc_decompress_on_z_planes(struct radv_device *device, struct radv_image_view *iview)
6154 {
6155    unsigned max_zplanes = 0;
6156
6157    assert(radv_image_is_tc_compat_htile(iview->image));
6158
6159    if (device->physical_device->rad_info.gfx_level >= GFX9) {
6160       /* Default value for 32-bit depth surfaces. */
6161       max_zplanes = 4;
6162
6163       if (iview->vk.format == VK_FORMAT_D16_UNORM && iview->image->info.samples > 1)
6164          max_zplanes = 2;
6165
6166       /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
6167       if (device->physical_device->rad_info.has_two_planes_iterate256_bug &&
6168           radv_image_get_iterate256(device, iview->image) &&
6169           !radv_image_tile_stencil_disabled(device, iview->image) &&
6170           iview->image->info.samples == 4) {
6171          max_zplanes = 1;
6172       }
6173
6174       max_zplanes = max_zplanes + 1;
6175    } else {
6176       if (iview->vk.format == VK_FORMAT_D16_UNORM) {
6177          /* Do not enable Z plane compression for 16-bit depth
6178           * surfaces because isn't supported on GFX8. Only
6179           * 32-bit depth surfaces are supported by the hardware.
6180           * This allows to maintain shader compatibility and to
6181           * reduce the number of depth decompressions.
6182           */
6183          max_zplanes = 1;
6184       } else {
6185          if (iview->image->info.samples <= 1)
6186             max_zplanes = 5;
6187          else if (iview->image->info.samples <= 4)
6188             max_zplanes = 3;
6189          else
6190             max_zplanes = 2;
6191       }
6192    }
6193
6194    return max_zplanes;
6195 }
6196
6197 void
6198 radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
6199                             struct radv_ds_buffer_info *ds)
6200 {
6201    const struct radeon_surf *surf = &image->planes[0].surface;
6202
6203    assert(image->vk.format == VK_FORMAT_D16_UNORM);
6204    memset(ds, 0, sizeof(*ds));
6205
6206    ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
6207
6208    ds->db_z_info = S_028038_FORMAT(V_028040_Z_16) |
6209                    S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
6210                    S_028038_ZRANGE_PRECISION(1) |
6211                    S_028038_TILE_SURFACE_ENABLE(1);
6212    ds->db_stencil_info = S_02803C_FORMAT(V_028044_STENCIL_INVALID);
6213
6214    ds->db_depth_size = S_02801C_X_MAX(image->info.width - 1) |
6215                        S_02801C_Y_MAX(image->info.height - 1);
6216
6217    ds->db_htile_data_base = radv_buffer_get_va(htile_buffer->bo) >> 8;
6218    ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1) |
6219                           S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
6220 }
6221
6222 void
6223 radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
6224                            struct radv_image_view *iview)
6225 {
6226    unsigned level = iview->vk.base_mip_level;
6227    unsigned format, stencil_format;
6228    uint64_t va, s_offs, z_offs;
6229    bool stencil_only = iview->image->vk.format == VK_FORMAT_S8_UINT;
6230    const struct radv_image_plane *plane = &iview->image->planes[0];
6231    const struct radeon_surf *surf = &plane->surface;
6232
6233    assert(vk_format_get_plane_count(iview->image->vk.format) == 1);
6234
6235    memset(ds, 0, sizeof(*ds));
6236    if (!device->instance->absolute_depth_bias) {
6237       switch (iview->image->vk.format) {
6238       case VK_FORMAT_D24_UNORM_S8_UINT:
6239       case VK_FORMAT_X8_D24_UNORM_PACK32:
6240          ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
6241          break;
6242       case VK_FORMAT_D16_UNORM:
6243       case VK_FORMAT_D16_UNORM_S8_UINT:
6244          ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
6245          break;
6246       case VK_FORMAT_D32_SFLOAT:
6247       case VK_FORMAT_D32_SFLOAT_S8_UINT:
6248          ds->pa_su_poly_offset_db_fmt_cntl =
6249             S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
6250          break;
6251       default:
6252          break;
6253       }
6254    }
6255
6256    format = radv_translate_dbformat(iview->image->vk.format);
6257    stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
6258
6259    uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6260    ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) |
6261                        S_028008_SLICE_MAX(max_slice);
6262    if (device->physical_device->rad_info.gfx_level >= GFX10) {
6263       ds->db_depth_view |= S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) |
6264                            S_028008_SLICE_MAX_HI(max_slice >> 11);
6265    }
6266
6267    ds->db_htile_data_base = 0;
6268    ds->db_htile_surface = 0;
6269
6270    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6271    s_offs = z_offs = va;
6272
6273    if (device->physical_device->rad_info.gfx_level >= GFX9) {
6274       assert(surf->u.gfx9.surf_offset == 0);
6275       s_offs += surf->u.gfx9.zs.stencil_offset;
6276
6277       ds->db_z_info = S_028038_FORMAT(format) |
6278                       S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
6279                       S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
6280                       S_028038_MAXMIP(iview->image->info.levels - 1) |
6281                       S_028038_ZRANGE_PRECISION(1) |
6282                       S_028040_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11);
6283       ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
6284                             S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) |
6285                             S_028044_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11);
6286
6287       if (device->physical_device->rad_info.gfx_level == GFX9) {
6288          ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch);
6289          ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch);
6290       }
6291
6292       ds->db_depth_view |= S_028008_MIPID(level);
6293       ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
6294                           S_02801C_Y_MAX(iview->image->info.height - 1);
6295
6296       if (radv_htile_enabled(iview->image, level)) {
6297          ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
6298
6299          if (radv_image_is_tc_compat_htile(iview->image)) {
6300             unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
6301
6302             ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
6303
6304             if (device->physical_device->rad_info.gfx_level >= GFX10) {
6305                bool iterate256 = radv_image_get_iterate256(device, iview->image);
6306
6307                ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
6308                ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
6309                ds->db_z_info |= S_028040_ITERATE_256(iterate256);
6310                ds->db_stencil_info |= S_028044_ITERATE_256(iterate256);
6311             } else {
6312                ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
6313                ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
6314             }
6315          }
6316
6317          if (radv_image_tile_stencil_disabled(device, iview->image)) {
6318             ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
6319          }
6320
6321          va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->meta_offset;
6322          ds->db_htile_data_base = va >> 8;
6323          ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
6324
6325          if (device->physical_device->rad_info.gfx_level == GFX9) {
6326             ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
6327          }
6328
6329          if (radv_image_has_vrs_htile(device, iview->image)) {
6330             ds->db_htile_surface |= S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
6331          }
6332       }
6333    } else {
6334       const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
6335
6336       if (stencil_only)
6337          level_info = &surf->u.legacy.zs.stencil_level[level];
6338
6339       z_offs += (uint64_t)surf->u.legacy.level[level].offset_256B * 256;
6340       s_offs += (uint64_t)surf->u.legacy.zs.stencil_level[level].offset_256B * 256;
6341
6342       ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
6343       ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
6344       ds->db_stencil_info = S_028044_FORMAT(stencil_format);
6345
6346       if (iview->image->info.samples > 1)
6347          ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
6348
6349       if (device->physical_device->rad_info.gfx_level >= GFX7) {
6350          struct radeon_info *info = &device->physical_device->rad_info;
6351          unsigned tiling_index = surf->u.legacy.tiling_index[level];
6352          unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level];
6353          unsigned macro_index = surf->u.legacy.macro_tile_index;
6354          unsigned tile_mode = info->si_tile_mode_array[tiling_index];
6355          unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
6356          unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
6357
6358          if (stencil_only)
6359             tile_mode = stencil_tile_mode;
6360
6361          ds->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
6362                               S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
6363                               S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
6364                               S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
6365                               S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
6366                               S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
6367          ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
6368          ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
6369       } else {
6370          unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
6371          ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
6372          tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
6373          ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
6374          if (stencil_only)
6375             ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
6376       }
6377
6378       ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
6379                           S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
6380       ds->db_depth_slice =
6381          S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
6382
6383       if (radv_htile_enabled(iview->image, level)) {
6384          ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
6385
6386          if (radv_image_tile_stencil_disabled(device, iview->image)) {
6387             ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
6388          }
6389
6390          va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->meta_offset;
6391          ds->db_htile_data_base = va >> 8;
6392          ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
6393
6394          if (radv_image_is_tc_compat_htile(iview->image)) {
6395             unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
6396
6397             ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
6398             ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
6399          }
6400       }
6401    }
6402
6403    ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
6404    ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
6405 }
6406
6407 static unsigned
6408 radv_tex_wrap(VkSamplerAddressMode address_mode)
6409 {
6410    switch (address_mode) {
6411    case VK_SAMPLER_ADDRESS_MODE_REPEAT:
6412       return V_008F30_SQ_TEX_WRAP;
6413    case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
6414       return V_008F30_SQ_TEX_MIRROR;
6415    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
6416       return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
6417    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
6418       return V_008F30_SQ_TEX_CLAMP_BORDER;
6419    case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
6420       return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
6421    default:
6422       unreachable("illegal tex wrap mode");
6423       break;
6424    }
6425 }
6426
6427 static unsigned
6428 radv_tex_compare(VkCompareOp op)
6429 {
6430    switch (op) {
6431    case VK_COMPARE_OP_NEVER:
6432       return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
6433    case VK_COMPARE_OP_LESS:
6434       return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
6435    case VK_COMPARE_OP_EQUAL:
6436       return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
6437    case VK_COMPARE_OP_LESS_OR_EQUAL:
6438       return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
6439    case VK_COMPARE_OP_GREATER:
6440       return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
6441    case VK_COMPARE_OP_NOT_EQUAL:
6442       return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
6443    case VK_COMPARE_OP_GREATER_OR_EQUAL:
6444       return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
6445    case VK_COMPARE_OP_ALWAYS:
6446       return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
6447    default:
6448       unreachable("illegal compare mode");
6449       break;
6450    }
6451 }
6452
6453 static unsigned
6454 radv_tex_filter(VkFilter filter, unsigned max_ansio)
6455 {
6456    switch (filter) {
6457    case VK_FILTER_NEAREST:
6458       return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT
6459                             : V_008F38_SQ_TEX_XY_FILTER_POINT);
6460    case VK_FILTER_LINEAR:
6461       return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR
6462                             : V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
6463    case VK_FILTER_CUBIC_IMG:
6464    default:
6465       fprintf(stderr, "illegal texture filter");
6466       return 0;
6467    }
6468 }
6469
6470 static unsigned
6471 radv_tex_mipfilter(VkSamplerMipmapMode mode)
6472 {
6473    switch (mode) {
6474    case VK_SAMPLER_MIPMAP_MODE_NEAREST:
6475       return V_008F38_SQ_TEX_Z_FILTER_POINT;
6476    case VK_SAMPLER_MIPMAP_MODE_LINEAR:
6477       return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
6478    default:
6479       return V_008F38_SQ_TEX_Z_FILTER_NONE;
6480    }
6481 }
6482
6483 static unsigned
6484 radv_tex_bordercolor(VkBorderColor bcolor)
6485 {
6486    switch (bcolor) {
6487    case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
6488    case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
6489       return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
6490    case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
6491    case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
6492       return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
6493    case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
6494    case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
6495       return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
6496    case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
6497    case VK_BORDER_COLOR_INT_CUSTOM_EXT:
6498       return V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
6499    default:
6500       break;
6501    }
6502    return 0;
6503 }
6504
6505 static unsigned
6506 radv_tex_aniso_filter(unsigned filter)
6507 {
6508    return MIN2(util_logbase2(filter), 4);
6509 }
6510
6511 static unsigned
6512 radv_tex_filter_mode(VkSamplerReductionMode mode)
6513 {
6514    switch (mode) {
6515    case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
6516       return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
6517    case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
6518       return V_008F30_SQ_IMG_FILTER_MODE_MIN;
6519    case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
6520       return V_008F30_SQ_IMG_FILTER_MODE_MAX;
6521    default:
6522       break;
6523    }
6524    return 0;
6525 }
6526
6527 static uint32_t
6528 radv_get_max_anisotropy(struct radv_device *device, const VkSamplerCreateInfo *pCreateInfo)
6529 {
6530    if (device->force_aniso >= 0)
6531       return device->force_aniso;
6532
6533    if (pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0f)
6534       return (uint32_t)pCreateInfo->maxAnisotropy;
6535
6536    return 0;
6537 }
6538
6539 static uint32_t
6540 radv_register_border_color(struct radv_device *device, VkClearColorValue value)
6541 {
6542    uint32_t slot;
6543
6544    mtx_lock(&device->border_color_data.mutex);
6545
6546    for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) {
6547       if (!device->border_color_data.used[slot]) {
6548          /* Copy to the GPU wrt endian-ness. */
6549          util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot], &value,
6550                                  sizeof(VkClearColorValue));
6551
6552          device->border_color_data.used[slot] = true;
6553          break;
6554       }
6555    }
6556
6557    mtx_unlock(&device->border_color_data.mutex);
6558
6559    return slot;
6560 }
6561
6562 static void
6563 radv_unregister_border_color(struct radv_device *device, uint32_t slot)
6564 {
6565    mtx_lock(&device->border_color_data.mutex);
6566
6567    device->border_color_data.used[slot] = false;
6568
6569    mtx_unlock(&device->border_color_data.mutex);
6570 }
6571
6572 static void
6573 radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler,
6574                   const VkSamplerCreateInfo *pCreateInfo)
6575 {
6576    uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
6577    uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
6578    bool compat_mode = device->physical_device->rad_info.gfx_level == GFX8 ||
6579                       device->physical_device->rad_info.gfx_level == GFX9;
6580    unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
6581    unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
6582    bool trunc_coord =
6583       pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST;
6584    bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
6585                             pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
6586                             pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
6587    VkBorderColor border_color =
6588       uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
6589    uint32_t border_color_ptr;
6590    bool disable_cube_wrap = pCreateInfo->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT;
6591
6592    const struct VkSamplerReductionModeCreateInfo *sampler_reduction =
6593       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);
6594    if (sampler_reduction)
6595       filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
6596
6597    if (pCreateInfo->compareEnable)
6598       depth_compare_func = radv_tex_compare(pCreateInfo->compareOp);
6599
6600    sampler->border_color_slot = RADV_BORDER_COLOR_COUNT;
6601
6602    if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ||
6603        border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
6604       const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
6605          vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
6606
6607       assert(custom_border_color);
6608
6609       sampler->border_color_slot =
6610          radv_register_border_color(device, custom_border_color->customBorderColor);
6611
6612       /* Did we fail to find a slot? */
6613       if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) {
6614          fprintf(stderr, "WARNING: no free border color slots, defaulting to TRANS_BLACK.\n");
6615          border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
6616       }
6617    }
6618
6619    /* If we don't have a custom color, set the ptr to 0 */
6620    border_color_ptr =
6621       sampler->border_color_slot != RADV_BORDER_COLOR_COUNT ? sampler->border_color_slot : 0;
6622
6623    sampler->state[0] =
6624       (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
6625        S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
6626        S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
6627        S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |
6628        S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
6629        S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |
6630        S_008F30_DISABLE_CUBE_WRAP(disable_cube_wrap) | S_008F30_COMPAT_MODE(compat_mode) |
6631        S_008F30_FILTER_MODE(filter_mode) | S_008F30_TRUNC_COORD(trunc_coord));
6632    sampler->state[1] = (S_008F34_MIN_LOD(radv_float_to_ufixed(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
6633                         S_008F34_MAX_LOD(radv_float_to_ufixed(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
6634                         S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
6635    sampler->state[2] = (S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
6636                         S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
6637                         S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
6638                         S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)));
6639    sampler->state[3] = S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color));
6640
6641    if (device->physical_device->rad_info.gfx_level >= GFX10) {
6642       sampler->state[2] |=
6643          S_008F38_ANISO_OVERRIDE_GFX10(device->instance->disable_aniso_single_level);
6644    } else {
6645       sampler->state[2] |=
6646          S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.gfx_level <= GFX8) |
6647          S_008F38_FILTER_PREC_FIX(1) |
6648          S_008F38_ANISO_OVERRIDE_GFX8(device->instance->disable_aniso_single_level &&
6649                                       device->physical_device->rad_info.gfx_level >= GFX8);
6650    }
6651
6652    if (device->physical_device->rad_info.gfx_level >= GFX11) {
6653       sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX11(border_color_ptr);
6654    } else {
6655       sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX6(border_color_ptr);
6656    }
6657 }
6658
6659 VKAPI_ATTR VkResult VKAPI_CALL
6660 radv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo,
6661                    const VkAllocationCallbacks *pAllocator, VkSampler *pSampler)
6662 {
6663    RADV_FROM_HANDLE(radv_device, device, _device);
6664    struct radv_sampler *sampler;
6665
6666    const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
6667       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
6668
6669    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
6670
6671    sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
6672                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6673    if (!sampler)
6674       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
6675
6676    vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER);
6677
6678    radv_init_sampler(device, sampler, pCreateInfo);
6679
6680    sampler->ycbcr_sampler =
6681       ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion)
6682                        : NULL;
6683    *pSampler = radv_sampler_to_handle(sampler);
6684
6685    return VK_SUCCESS;
6686 }
6687
6688 VKAPI_ATTR void VKAPI_CALL
6689 radv_DestroySampler(VkDevice _device, VkSampler _sampler, const VkAllocationCallbacks *pAllocator)
6690 {
6691    RADV_FROM_HANDLE(radv_device, device, _device);
6692    RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
6693
6694    if (!sampler)
6695       return;
6696
6697    if (sampler->border_color_slot != RADV_BORDER_COLOR_COUNT)
6698       radv_unregister_border_color(device, sampler->border_color_slot);
6699
6700    vk_object_base_finish(&sampler->base);
6701    vk_free2(&device->vk.alloc, pAllocator, sampler);
6702 }
6703
6704 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
6705 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
6706 {
6707    /* For the full details on loader interface versioning, see
6708     * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
6709     * What follows is a condensed summary, to help you navigate the large and
6710     * confusing official doc.
6711     *
6712     *   - Loader interface v0 is incompatible with later versions. We don't
6713     *     support it.
6714     *
6715     *   - In loader interface v1:
6716     *       - The first ICD entrypoint called by the loader is
6717     *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
6718     *         entrypoint.
6719     *       - The ICD must statically expose no other Vulkan symbol unless it is
6720     *         linked with -Bsymbolic.
6721     *       - Each dispatchable Vulkan handle created by the ICD must be
6722     *         a pointer to a struct whose first member is VK_LOADER_DATA. The
6723     *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
6724     *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
6725     *         vkDestroySurfaceKHR(). The ICD must be capable of working with
6726     *         such loader-managed surfaces.
6727     *
6728     *    - Loader interface v2 differs from v1 in:
6729     *       - The first ICD entrypoint called by the loader is
6730     *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
6731     *         statically expose this entrypoint.
6732     *
6733     *    - Loader interface v3 differs from v2 in:
6734     *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
6735     *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
6736     *          because the loader no longer does so.
6737     *
6738     *    - Loader interface v4 differs from v3 in:
6739     *        - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
6740     *
6741     *    - Loader interface v5 differs from v4 in:
6742     *        - The ICD must support Vulkan API version 1.1 and must not return
6743     *          VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a
6744     *          Vulkan Loader with interface v4 or smaller is being used and the
6745     *          application provides an API version that is greater than 1.0.
6746     */
6747    *pSupportedVersion = MIN2(*pSupportedVersion, 5u);
6748    return VK_SUCCESS;
6749 }
6750
6751 VKAPI_ATTR VkResult VKAPI_CALL
6752 radv_GetMemoryFdKHR(VkDevice _device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFD)
6753 {
6754    RADV_FROM_HANDLE(radv_device, device, _device);
6755    RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
6756
6757    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
6758
6759    /* At the moment, we support only the below handle types. */
6760    assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
6761           pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
6762
6763    bool ret = radv_get_memory_fd(device, memory, pFD);
6764    if (ret == false)
6765       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6766    return VK_SUCCESS;
6767 }
6768
6769 static uint32_t
6770 radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev,
6771                                         enum radeon_bo_domain domains, enum radeon_bo_flag flags,
6772                                         enum radeon_bo_flag ignore_flags)
6773 {
6774    /* Don't count GTT/CPU as relevant:
6775     *
6776     * - We're not fully consistent between the two.
6777     * - Sometimes VRAM gets VRAM|GTT.
6778     */
6779    const enum radeon_bo_domain relevant_domains =
6780       RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA;
6781    uint32_t bits = 0;
6782    for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
6783       if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
6784          continue;
6785
6786       if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))
6787          continue;
6788
6789       bits |= 1u << i;
6790    }
6791
6792    return bits;
6793 }
6794
6795 static uint32_t
6796 radv_compute_valid_memory_types(struct radv_physical_device *dev, enum radeon_bo_domain domains,
6797                                 enum radeon_bo_flag flags)
6798 {
6799    enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);
6800    uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
6801
6802    if (!bits) {
6803       ignore_flags |= RADEON_FLAG_GTT_WC;
6804       bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
6805    }
6806
6807    if (!bits) {
6808       ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;
6809       bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
6810    }
6811
6812    return bits;
6813 }
6814 VKAPI_ATTR VkResult VKAPI_CALL
6815 radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType,
6816                               int fd, VkMemoryFdPropertiesKHR *pMemoryFdProperties)
6817 {
6818    RADV_FROM_HANDLE(radv_device, device, _device);
6819
6820    switch (handleType) {
6821    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
6822       enum radeon_bo_domain domains;
6823       enum radeon_bo_flag flags;
6824       if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
6825          return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
6826
6827       pMemoryFdProperties->memoryTypeBits =
6828          radv_compute_valid_memory_types(device->physical_device, domains, flags);
6829       return VK_SUCCESS;
6830    }
6831    default:
6832       /* The valid usage section for this function says:
6833        *
6834        *    "handleType must not be one of the handle types defined as
6835        *    opaque."
6836        *
6837        * So opaque handle types fall into the default "unsupported" case.
6838        */
6839       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
6840    }
6841 }
6842
6843 VKAPI_ATTR void VKAPI_CALL
6844 radv_GetDeviceGroupPeerMemoryFeatures(VkDevice device, uint32_t heapIndex,
6845                                       uint32_t localDeviceIndex, uint32_t remoteDeviceIndex,
6846                                       VkPeerMemoryFeatureFlags *pPeerMemoryFeatures)
6847 {
6848    assert(localDeviceIndex == remoteDeviceIndex);
6849
6850    *pPeerMemoryFeatures =
6851       VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
6852       VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
6853 }
6854
6855 static const VkTimeDomainEXT radv_time_domains[] = {
6856    VK_TIME_DOMAIN_DEVICE_EXT,
6857    VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
6858 #ifdef CLOCK_MONOTONIC_RAW
6859    VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
6860 #endif
6861 };
6862
6863 VKAPI_ATTR VkResult VKAPI_CALL
6864 radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,
6865                                                   uint32_t *pTimeDomainCount,
6866                                                   VkTimeDomainEXT *pTimeDomains)
6867 {
6868    int d;
6869    VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
6870
6871    for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
6872       vk_outarray_append_typed(VkTimeDomainEXT, &out, i)
6873       {
6874          *i = radv_time_domains[d];
6875       }
6876    }
6877
6878    return vk_outarray_status(&out);
6879 }
6880
6881 #ifndef _WIN32
6882 static uint64_t
6883 radv_clock_gettime(clockid_t clock_id)
6884 {
6885    struct timespec current;
6886    int ret;
6887
6888    ret = clock_gettime(clock_id, &current);
6889 #ifdef CLOCK_MONOTONIC_RAW
6890    if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
6891       ret = clock_gettime(CLOCK_MONOTONIC, &current);
6892 #endif
6893    if (ret < 0)
6894       return 0;
6895
6896    return (uint64_t)current.tv_sec * 1000000000ULL + current.tv_nsec;
6897 }
6898
6899 VKAPI_ATTR VkResult VKAPI_CALL
6900 radv_GetCalibratedTimestampsEXT(VkDevice _device, uint32_t timestampCount,
6901                                 const VkCalibratedTimestampInfoEXT *pTimestampInfos,
6902                                 uint64_t *pTimestamps, uint64_t *pMaxDeviation)
6903 {
6904    RADV_FROM_HANDLE(radv_device, device, _device);
6905    uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
6906    int d;
6907    uint64_t begin, end;
6908    uint64_t max_clock_period = 0;
6909
6910 #ifdef CLOCK_MONOTONIC_RAW
6911    begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
6912 #else
6913    begin = radv_clock_gettime(CLOCK_MONOTONIC);
6914 #endif
6915
6916    for (d = 0; d < timestampCount; d++) {
6917       switch (pTimestampInfos[d].timeDomain) {
6918       case VK_TIME_DOMAIN_DEVICE_EXT:
6919          pTimestamps[d] = device->ws->query_value(device->ws, RADEON_TIMESTAMP);
6920          uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
6921          max_clock_period = MAX2(max_clock_period, device_period);
6922          break;
6923       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
6924          pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
6925          max_clock_period = MAX2(max_clock_period, 1);
6926          break;
6927
6928 #ifdef CLOCK_MONOTONIC_RAW
6929       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
6930          pTimestamps[d] = begin;
6931          break;
6932 #endif
6933       default:
6934          pTimestamps[d] = 0;
6935          break;
6936       }
6937    }
6938
6939 #ifdef CLOCK_MONOTONIC_RAW
6940    end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
6941 #else
6942    end = radv_clock_gettime(CLOCK_MONOTONIC);
6943 #endif
6944
6945    /*
6946     * The maximum deviation is the sum of the interval over which we
6947     * perform the sampling and the maximum period of any sampled
6948     * clock. That's because the maximum skew between any two sampled
6949     * clock edges is when the sampled clock with the largest period is
6950     * sampled at the end of that period but right at the beginning of the
6951     * sampling interval and some other clock is sampled right at the
6952     * begining of its sampling period and right at the end of the
6953     * sampling interval. Let's assume the GPU has the longest clock
6954     * period and that the application is sampling GPU and monotonic:
6955     *
6956     *                               s                 e
6957     *                    w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
6958     *   Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
6959     *
6960     *                               g
6961     *             0         1         2         3
6962     *   GPU       -----_____-----_____-----_____-----_____
6963     *
6964     *                                                m
6965     *                                       x y z 0 1 2 3 4 5 6 7 8 9 a b c
6966     *   Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
6967     *
6968     *   Interval                     <----------------->
6969     *   Deviation           <-------------------------->
6970     *
6971     *           s  = read(raw)       2
6972     *           g  = read(GPU)       1
6973     *           m  = read(monotonic) 2
6974     *           e  = read(raw)       b
6975     *
6976     * We round the sample interval up by one tick to cover sampling error
6977     * in the interval clock
6978     */
6979
6980    uint64_t sample_interval = end - begin + 1;
6981
6982    *pMaxDeviation = sample_interval + max_clock_period;
6983
6984    return VK_SUCCESS;
6985 }
6986 #endif
6987
6988 VKAPI_ATTR void VKAPI_CALL
6989 radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,
6990                                                VkSampleCountFlagBits samples,
6991                                                VkMultisamplePropertiesEXT *pMultisampleProperties)
6992 {
6993    VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT |
6994                                              VK_SAMPLE_COUNT_8_BIT;
6995
6996    if (samples & supported_samples) {
6997       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
6998    } else {
6999       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
7000    }
7001 }
7002
7003 VKAPI_ATTR VkResult VKAPI_CALL
7004 radv_GetPhysicalDeviceFragmentShadingRatesKHR(
7005    VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount,
7006    VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
7007 {
7008    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates,
7009                           pFragmentShadingRateCount);
7010
7011 #define append_rate(w, h, s)                                                                       \
7012    {                                                                                               \
7013       VkPhysicalDeviceFragmentShadingRateKHR rate = {                                              \
7014          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR,          \
7015          .sampleCounts = s,                                                                        \
7016          .fragmentSize = {.width = w, .height = h},                                                \
7017       };                                                                                           \
7018       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate;         \
7019    }
7020
7021    for (uint32_t x = 2; x >= 1; x--) {
7022       for (uint32_t y = 2; y >= 1; y--) {
7023          VkSampleCountFlagBits samples;
7024
7025          if (x == 1 && y == 1) {
7026             samples = ~0;
7027          } else {
7028             samples = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT |
7029                       VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
7030          }
7031
7032          append_rate(x, y, samples);
7033       }
7034    }
7035 #undef append_rate
7036
7037    return vk_outarray_status(&out);
7038 }
7039
7040 static bool
7041 radv_thread_trace_set_pstate(struct radv_device *device, bool enable)
7042 {
7043    struct radeon_winsys *ws = device->ws;
7044    enum radeon_ctx_pstate pstate = enable ? RADEON_CTX_PSTATE_PEAK : RADEON_CTX_PSTATE_NONE;
7045
7046    if (device->physical_device->rad_info.has_stable_pstate) {
7047       for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
7048          for (unsigned q = 0; q < device->queue_count[i]; q++) {
7049             struct radv_queue *queue = &device->queues[i][q];
7050
7051             if (ws->ctx_set_pstate(queue->hw_ctx, pstate) < 0)
7052                return false;
7053          }
7054       }
7055    }
7056
7057    return true;
7058 }
7059
7060 bool
7061 radv_device_acquire_performance_counters(struct radv_device *device)
7062 {
7063    bool result = true;
7064    simple_mtx_lock(&device->pstate_mtx);
7065
7066    if (device->pstate_cnt == 0) {
7067       result = radv_thread_trace_set_pstate(device, true);
7068       if (result)
7069          ++device->pstate_cnt;
7070    }
7071
7072    simple_mtx_unlock(&device->pstate_mtx);
7073    return result;
7074 }
7075
7076 void
7077 radv_device_release_performance_counters(struct radv_device *device)
7078 {
7079    simple_mtx_lock(&device->pstate_mtx);
7080
7081    if (--device->pstate_cnt == 0)
7082       radv_thread_trace_set_pstate(device, false);
7083
7084    simple_mtx_unlock(&device->pstate_mtx);
7085 }
7086
7087 VKAPI_ATTR VkResult VKAPI_CALL
7088 radv_AcquireProfilingLockKHR(VkDevice _device, const VkAcquireProfilingLockInfoKHR *pInfo)
7089 {
7090    RADV_FROM_HANDLE(radv_device, device, _device);
7091    bool result = radv_device_acquire_performance_counters(device);
7092    return result ? VK_SUCCESS : VK_ERROR_UNKNOWN;
7093 }
7094
7095 VKAPI_ATTR void VKAPI_CALL
7096 radv_ReleaseProfilingLockKHR(VkDevice _device)
7097 {
7098    RADV_FROM_HANDLE(radv_device, device, _device);
7099    radv_device_release_performance_counters(device);
7100 }