78d89a2aceb8412394a9ff1f35219b553584b88d
[platform/upstream/mesa.git] / src / amd / vulkan / radv_device.c
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <string.h>
31
32 #ifdef __FreeBSD__
33 #include <sys/types.h>
34 #endif
35 #ifdef MAJOR_IN_MKDEV
36 #include <sys/mkdev.h>
37 #endif
38 #ifdef MAJOR_IN_SYSMACROS
39 #include <sys/sysmacros.h>
40 #endif
41
42 #ifdef __linux__
43 #include <sys/inotify.h>
44 #endif
45
46 #include "util/debug.h"
47 #include "util/disk_cache.h"
48 #include "radv_cs.h"
49 #include "radv_debug.h"
50 #include "radv_private.h"
51 #include "radv_shader.h"
52 #include "vk_util.h"
53 #ifdef _WIN32
54 typedef void *drmDevicePtr;
55 #include <io.h>
56 #else
57 #include <amdgpu.h>
58 #include <xf86drm.h>
59 #include "drm-uapi/amdgpu_drm.h"
60 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
61 #endif
62 #include "util/build_id.h"
63 #include "util/debug.h"
64 #include "util/driconf.h"
65 #include "util/mesa-sha1.h"
66 #include "util/os_time.h"
67 #include "util/timespec.h"
68 #include "util/u_atomic.h"
69 #include "winsys/null/radv_null_winsys_public.h"
70 #include "git_sha1.h"
71 #include "sid.h"
72 #include "vk_format.h"
73 #include "vk_sync.h"
74 #include "vk_sync_dummy.h"
75 #include "vulkan/vk_icd.h"
76
77 #ifdef LLVM_AVAILABLE
78 #include "ac_llvm_util.h"
79 #endif
80
81 /* The number of IBs per submit isn't infinite, it depends on the IP type
82  * (ie. some initial setup needed for a submit) and the number of IBs (4 DW).
83  * This limit is arbitrary but should be safe for now.  Ideally, we should get
84  * this limit from the KMD.
85  */
86 #define RADV_MAX_IBS_PER_SUBMIT 192
87
88 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
89 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
90 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
91 #endif
92
93 static VkResult radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission);
94
95 uint64_t
96 radv_get_current_time(void)
97 {
98    return os_time_get_nano();
99 }
100
101 static int
102 radv_device_get_cache_uuid(struct radv_physical_device *pdevice, void *uuid)
103 {
104    enum radeon_family family = pdevice->rad_info.family;
105    struct mesa_sha1 ctx;
106    unsigned char sha1[20];
107    unsigned ptr_size = sizeof(void *);
108
109    memset(uuid, 0, VK_UUID_SIZE);
110    _mesa_sha1_init(&ctx);
111
112    if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx)
113 #ifdef LLVM_AVAILABLE
114        || (pdevice->use_llvm &&
115            !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
116 #endif
117    )
118       return -1;
119
120    _mesa_sha1_update(&ctx, &family, sizeof(family));
121    _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
122    _mesa_sha1_final(&ctx, sha1);
123
124    memcpy(uuid, sha1, VK_UUID_SIZE);
125    return 0;
126 }
127
128 static void
129 radv_get_driver_uuid(void *uuid)
130 {
131    ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
132 }
133
134 static void
135 radv_get_device_uuid(struct radeon_info *info, void *uuid)
136 {
137    ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
138 }
139
140 static uint64_t
141 radv_get_adjusted_vram_size(struct radv_physical_device *device)
142 {
143    int ov = driQueryOptioni(&device->instance->dri_options, "override_vram_size");
144    if (ov >= 0)
145       return MIN2(device->rad_info.vram_size, (uint64_t)ov << 20);
146    return device->rad_info.vram_size;
147 }
148
149 static uint64_t
150 radv_get_visible_vram_size(struct radv_physical_device *device)
151 {
152    return MIN2(radv_get_adjusted_vram_size(device), device->rad_info.vram_vis_size);
153 }
154
155 static uint64_t
156 radv_get_vram_size(struct radv_physical_device *device)
157 {
158    uint64_t total_size = radv_get_adjusted_vram_size(device);
159    return total_size - MIN2(total_size, device->rad_info.vram_vis_size);
160 }
161
162 enum radv_heap {
163    RADV_HEAP_VRAM = 1 << 0,
164    RADV_HEAP_GTT = 1 << 1,
165    RADV_HEAP_VRAM_VIS = 1 << 2,
166    RADV_HEAP_MAX = 1 << 3,
167 };
168
169 static void
170 radv_physical_device_init_mem_types(struct radv_physical_device *device)
171 {
172    uint64_t visible_vram_size = radv_get_visible_vram_size(device);
173    uint64_t vram_size = radv_get_vram_size(device);
174    uint64_t gtt_size = device->rad_info.gart_size;
175    int vram_index = -1, visible_vram_index = -1, gart_index = -1;
176
177    device->memory_properties.memoryHeapCount = 0;
178    device->heaps = 0;
179
180    if (!device->rad_info.has_dedicated_vram) {
181       /* On APUs, the carveout is usually too small for games that request a minimum VRAM size
182        * greater than it. To workaround this, we compute the total available memory size (GTT +
183        * visible VRAM size) and report 2/3 as VRAM and 1/3 as GTT.
184        */
185       const uint64_t total_size = gtt_size + visible_vram_size;
186       visible_vram_size = align64((total_size * 2) / 3, device->rad_info.gart_page_size);
187       gtt_size = total_size - visible_vram_size;
188       vram_size = 0;
189    }
190
191    /* Only get a VRAM heap if it is significant, not if it is a 16 MiB
192     * remainder above visible VRAM. */
193    if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
194       vram_index = device->memory_properties.memoryHeapCount++;
195       device->heaps |= RADV_HEAP_VRAM;
196       device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap){
197          .size = vram_size,
198          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
199       };
200    }
201
202    if (gtt_size > 0) {
203       gart_index = device->memory_properties.memoryHeapCount++;
204       device->heaps |= RADV_HEAP_GTT;
205       device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap){
206          .size = gtt_size,
207          .flags = 0,
208       };
209    }
210
211    if (visible_vram_size) {
212       visible_vram_index = device->memory_properties.memoryHeapCount++;
213       device->heaps |= RADV_HEAP_VRAM_VIS;
214       device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap){
215          .size = visible_vram_size,
216          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
217       };
218    }
219
220    unsigned type_count = 0;
221
222    if (vram_index >= 0 || visible_vram_index >= 0) {
223       device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
224       device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
225       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
226          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
227          .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
228       };
229    }
230
231    if (gart_index >= 0) {
232       device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
233       device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
234       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
235          .propertyFlags =
236             VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
237          .heapIndex = gart_index,
238       };
239    }
240    if (visible_vram_index >= 0) {
241       device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
242       device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
243       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
244          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
245                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
246                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
247          .heapIndex = visible_vram_index,
248       };
249    }
250
251    if (gart_index >= 0) {
252       device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
253       device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
254       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
255          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
256                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
257          .heapIndex = gart_index,
258       };
259    }
260    device->memory_properties.memoryTypeCount = type_count;
261
262    if (device->rad_info.has_l2_uncached) {
263       for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
264          VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
265
266          if ((mem_type.propertyFlags &
267               (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
268              mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
269
270             VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |
271                                                    VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
272                                                    VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
273
274             device->memory_domains[type_count] = device->memory_domains[i];
275             device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
276             device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
277                .propertyFlags = property_flags,
278                .heapIndex = mem_type.heapIndex,
279             };
280          }
281       }
282       device->memory_properties.memoryTypeCount = type_count;
283    }
284 }
285
286 static const char *
287 radv_get_compiler_string(struct radv_physical_device *pdevice)
288 {
289    if (!pdevice->use_llvm) {
290       /* Some games like SotTR apply shader workarounds if the LLVM
291        * version is too old or if the LLVM version string is
292        * missing. This gives 2-5% performance with SotTR and ACO.
293        */
294       if (driQueryOptionb(&pdevice->instance->dri_options, "radv_report_llvm9_version_string")) {
295          return " (LLVM 9.0.1)";
296       }
297
298       return "";
299    }
300
301 #ifdef LLVM_AVAILABLE
302    return " (LLVM " MESA_LLVM_VERSION_STRING ")";
303 #else
304    unreachable("LLVM is not available");
305 #endif
306 }
307
308 int
309 radv_get_int_debug_option(const char *name, int default_value)
310 {
311    const char *str;
312    int result;
313
314    str = getenv(name);
315    if (!str) {
316       result = default_value;
317    } else {
318       char *endptr;
319
320       result = strtol(str, &endptr, 0);
321       if (str == endptr) {
322          /* No digits founs. */
323          result = default_value;
324       }
325    }
326
327    return result;
328 }
329
330 static bool
331 radv_thread_trace_enabled()
332 {
333    return radv_get_int_debug_option("RADV_THREAD_TRACE", -1) >= 0 ||
334           getenv("RADV_THREAD_TRACE_TRIGGER");
335 }
336
337 static bool
338 radv_spm_trace_enabled()
339 {
340    return radv_thread_trace_enabled() &&
341           debug_get_bool_option("RADV_THREAD_TRACE_CACHE_COUNTERS", false);
342 }
343
344 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) ||                    \
345    defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_DISPLAY_KHR)
346 #define RADV_USE_WSI_PLATFORM
347 #endif
348
349 #ifdef ANDROID
350 #define RADV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
351 #else
352 #define RADV_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
353 #endif
354
355 VKAPI_ATTR VkResult VKAPI_CALL
356 radv_EnumerateInstanceVersion(uint32_t *pApiVersion)
357 {
358    *pApiVersion = RADV_API_VERSION;
359    return VK_SUCCESS;
360 }
361
362 static const struct vk_instance_extension_table radv_instance_extensions_supported = {
363    .KHR_device_group_creation = true,
364    .KHR_external_fence_capabilities = true,
365    .KHR_external_memory_capabilities = true,
366    .KHR_external_semaphore_capabilities = true,
367    .KHR_get_physical_device_properties2 = true,
368    .EXT_debug_report = true,
369    .EXT_debug_utils = true,
370
371 #ifdef RADV_USE_WSI_PLATFORM
372    .KHR_get_surface_capabilities2 = true,
373    .KHR_surface = true,
374    .KHR_surface_protected_capabilities = true,
375 #endif
376 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
377    .KHR_wayland_surface = true,
378 #endif
379 #ifdef VK_USE_PLATFORM_XCB_KHR
380    .KHR_xcb_surface = true,
381 #endif
382 #ifdef VK_USE_PLATFORM_XLIB_KHR
383    .KHR_xlib_surface = true,
384 #endif
385 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
386    .EXT_acquire_xlib_display = true,
387 #endif
388 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
389    .KHR_display = true,
390    .KHR_get_display_properties2 = true,
391    .EXT_direct_mode_display = true,
392    .EXT_display_surface_counter = true,
393    .EXT_acquire_drm_display = true,
394 #endif
395 };
396
397 static void
398 radv_physical_device_get_supported_extensions(const struct radv_physical_device *device,
399                                               struct vk_device_extension_table *ext)
400 {
401    *ext = (struct vk_device_extension_table){
402       .KHR_8bit_storage = true,
403       .KHR_16bit_storage = true,
404       .KHR_acceleration_structure = radv_enable_rt(device, false),
405       .KHR_bind_memory2 = true,
406       .KHR_buffer_device_address = true,
407       .KHR_copy_commands2 = true,
408       .KHR_create_renderpass2 = true,
409       .KHR_dedicated_allocation = true,
410       .KHR_deferred_host_operations = true,
411       .KHR_depth_stencil_resolve = true,
412       .KHR_descriptor_update_template = true,
413       .KHR_device_group = true,
414       .KHR_draw_indirect_count = true,
415       .KHR_driver_properties = true,
416       .KHR_dynamic_rendering = true,
417       .KHR_external_fence = true,
418       .KHR_external_fence_fd = true,
419       .KHR_external_memory = true,
420       .KHR_external_memory_fd = true,
421       .KHR_external_semaphore = true,
422       .KHR_external_semaphore_fd = true,
423       .KHR_format_feature_flags2 = true,
424       .KHR_fragment_shading_rate = device->rad_info.gfx_level >= GFX10_3,
425       .KHR_get_memory_requirements2 = true,
426       .KHR_image_format_list = true,
427       .KHR_imageless_framebuffer = true,
428 #ifdef RADV_USE_WSI_PLATFORM
429       .KHR_incremental_present = true,
430 #endif
431       .KHR_maintenance1 = true,
432       .KHR_maintenance2 = true,
433       .KHR_maintenance3 = true,
434       .KHR_maintenance4 = true,
435       .KHR_multiview = true,
436       .KHR_pipeline_executable_properties = true,
437       .KHR_pipeline_library = !device->use_llvm,
438       .KHR_push_descriptor = true,
439       .KHR_ray_query = radv_enable_rt(device, false),
440       .KHR_ray_tracing_maintenance1 = radv_enable_rt(device, false),
441       .KHR_ray_tracing_pipeline = radv_enable_rt(device, true),
442       .KHR_relaxed_block_layout = true,
443       .KHR_sampler_mirror_clamp_to_edge = true,
444       .KHR_sampler_ycbcr_conversion = true,
445       .KHR_separate_depth_stencil_layouts = true,
446       .KHR_shader_atomic_int64 = true,
447       .KHR_shader_clock = true,
448       .KHR_shader_draw_parameters = true,
449       .KHR_shader_float16_int8 = true,
450       .KHR_shader_float_controls = true,
451       .KHR_shader_integer_dot_product = true,
452       .KHR_shader_non_semantic_info = true,
453       .KHR_shader_subgroup_extended_types = true,
454       .KHR_shader_subgroup_uniform_control_flow = true,
455       .KHR_shader_terminate_invocation = true,
456       .KHR_spirv_1_4 = true,
457       .KHR_storage_buffer_storage_class = true,
458 #ifdef RADV_USE_WSI_PLATFORM
459       .KHR_swapchain = true,
460       .KHR_swapchain_mutable_format = true,
461 #endif
462       .KHR_synchronization2 = true,
463       .KHR_timeline_semaphore = true,
464       .KHR_uniform_buffer_standard_layout = true,
465       .KHR_variable_pointers = true,
466       .KHR_vulkan_memory_model = true,
467       .KHR_workgroup_memory_explicit_layout = true,
468       .KHR_zero_initialize_workgroup_memory = true,
469       .EXT_4444_formats = true,
470       .EXT_border_color_swizzle = device->rad_info.gfx_level >= GFX10,
471       .EXT_buffer_device_address = true,
472       .EXT_calibrated_timestamps = RADV_SUPPORT_CALIBRATED_TIMESTAMPS,
473       .EXT_color_write_enable = true,
474       .EXT_conditional_rendering = true,
475       .EXT_conservative_rasterization = device->rad_info.gfx_level >= GFX9,
476       .EXT_custom_border_color = true,
477       .EXT_debug_marker = radv_thread_trace_enabled(),
478       .EXT_depth_clip_control = true,
479       .EXT_depth_clip_enable = true,
480       .EXT_depth_range_unrestricted = true,
481       .EXT_descriptor_indexing = true,
482       .EXT_discard_rectangles = true,
483 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
484       .EXT_display_control = true,
485 #endif
486       .EXT_extended_dynamic_state = true,
487       .EXT_extended_dynamic_state2 = true,
488       .EXT_external_memory_dma_buf = true,
489       .EXT_external_memory_host = device->rad_info.has_userptr,
490       .EXT_global_priority = true,
491       .EXT_global_priority_query = true,
492       .EXT_host_query_reset = true,
493       .EXT_image_2d_view_of_3d = true,
494       .EXT_image_drm_format_modifier = device->rad_info.gfx_level >= GFX9,
495       .EXT_image_robustness = true,
496       .EXT_image_view_min_lod = true,
497       .EXT_index_type_uint8 = device->rad_info.gfx_level >= GFX8,
498       .EXT_inline_uniform_block = true,
499       .EXT_line_rasterization = true,
500       .EXT_memory_budget = true,
501       .EXT_memory_priority = true,
502       .EXT_multi_draw = true,
503       .EXT_non_seamless_cube_map = true,
504       .EXT_pci_bus_info = true,
505 #ifndef _WIN32
506       .EXT_physical_device_drm = true,
507 #endif
508       .EXT_pipeline_creation_cache_control = true,
509       .EXT_pipeline_creation_feedback = true,
510       .EXT_post_depth_coverage = device->rad_info.gfx_level >= GFX10,
511       .EXT_primitive_topology_list_restart = true,
512       .EXT_primitives_generated_query = true,
513       .EXT_private_data = true,
514       .EXT_provoking_vertex = true,
515       .EXT_queue_family_foreign = true,
516       .EXT_robustness2 = true,
517       .EXT_sample_locations = device->rad_info.gfx_level < GFX10,
518       .EXT_sampler_filter_minmax = true,
519       .EXT_scalar_block_layout = device->rad_info.gfx_level >= GFX7,
520       .EXT_separate_stencil_usage = true,
521       .EXT_shader_atomic_float = true,
522 #ifdef LLVM_AVAILABLE
523       .EXT_shader_atomic_float2 = !device->use_llvm || LLVM_VERSION_MAJOR >= 14,
524 #else
525       .EXT_shader_atomic_float2 = true,
526 #endif
527       .EXT_shader_demote_to_helper_invocation = true,
528       .EXT_shader_image_atomic_int64 = true,
529       .EXT_shader_module_identifier = true,
530       .EXT_shader_stencil_export = true,
531       .EXT_shader_subgroup_ballot = true,
532       .EXT_shader_subgroup_vote = true,
533       .EXT_shader_viewport_index_layer = true,
534       .EXT_subgroup_size_control = true,
535       .EXT_texel_buffer_alignment = true,
536       .EXT_transform_feedback = device->rad_info.gfx_level < GFX11,
537       .EXT_vertex_attribute_divisor = true,
538       .EXT_vertex_input_dynamic_state = !device->use_llvm,
539       .EXT_ycbcr_image_arrays = true,
540       .AMD_buffer_marker = true,
541       .AMD_device_coherent_memory = true,
542       .AMD_draw_indirect_count = true,
543       .AMD_gcn_shader = true,
544       .AMD_gpu_shader_half_float = device->rad_info.has_packed_math_16bit,
545       .AMD_gpu_shader_int16 = device->rad_info.has_packed_math_16bit,
546       .AMD_memory_overallocation_behavior = true,
547       .AMD_mixed_attachment_samples = true,
548       .AMD_rasterization_order = device->rad_info.has_out_of_order_rast,
549       .AMD_shader_ballot = true,
550       .AMD_shader_core_properties = true,
551       .AMD_shader_core_properties2 = true,
552       .AMD_shader_explicit_vertex_parameter = true,
553       .AMD_shader_fragment_mask = device->rad_info.gfx_level < GFX11,
554       .AMD_shader_image_load_store_lod = true,
555       .AMD_shader_trinary_minmax = true,
556       .AMD_texture_gather_bias_lod = true,
557 #ifdef ANDROID
558       .ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER,
559       .ANDROID_native_buffer = true,
560 #endif
561       .GOOGLE_decorate_string = true,
562       .GOOGLE_hlsl_functionality1 = true,
563       .GOOGLE_user_type = true,
564       .INTEL_shader_integer_functions2 = true,
565       .NV_compute_shader_derivatives = true,
566       .NV_mesh_shader = device->use_ngg && device->rad_info.gfx_level >= GFX10_3 &&
567                         device->instance->perftest_flags & RADV_PERFTEST_NV_MS && !device->use_llvm,
568       /* Undocumented extension purely for vkd3d-proton. This check is to prevent anyone else from
569        * using it.
570        */
571       .VALVE_descriptor_set_host_mapping =
572          device->vk.instance->app_info.engine_name &&
573          strcmp(device->vk.instance->app_info.engine_name, "vkd3d") == 0,
574       .VALVE_mutable_descriptor_type = true,
575    };
576 }
577
578 static bool
579 radv_is_conformant(const struct radv_physical_device *pdevice)
580 {
581    return pdevice->rad_info.gfx_level >= GFX8;
582 }
583
584 static void
585 radv_physical_device_init_queue_table(struct radv_physical_device *pdevice)
586 {
587    int idx = 0;
588    pdevice->vk_queue_to_radv[idx] = RADV_QUEUE_GENERAL;
589    idx++;
590
591    for (unsigned i = 1; i < RADV_MAX_QUEUE_FAMILIES; i++)
592       pdevice->vk_queue_to_radv[i] = RADV_MAX_QUEUE_FAMILIES + 1;
593
594    if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 &&
595        !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
596       pdevice->vk_queue_to_radv[idx] = RADV_QUEUE_COMPUTE;
597       idx++;
598    }
599    pdevice->num_queues = idx;
600 }
601
602 static VkResult
603 radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm_device,
604                                 struct radv_physical_device **device_out)
605 {
606    VkResult result;
607    int fd = -1;
608    int master_fd = -1;
609
610 #ifdef _WIN32
611    assert(drm_device == NULL);
612 #else
613    if (drm_device) {
614       const char *path = drm_device->nodes[DRM_NODE_RENDER];
615       drmVersionPtr version;
616
617       fd = open(path, O_RDWR | O_CLOEXEC);
618       if (fd < 0) {
619          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
620                           "Could not open device %s: %m", path);
621       }
622
623       version = drmGetVersion(fd);
624       if (!version) {
625          close(fd);
626
627          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
628                           "Could not get the kernel driver version for device %s: %m", path);
629       }
630
631       if (strcmp(version->name, "amdgpu")) {
632          drmFreeVersion(version);
633          close(fd);
634
635          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
636                           "Device '%s' is not using the AMDGPU kernel driver: %m", path);
637       }
638       drmFreeVersion(version);
639
640       if (instance->debug_flags & RADV_DEBUG_STARTUP)
641          fprintf(stderr, "radv: info: Found compatible device '%s'.\n", path);
642    }
643 #endif
644
645    struct radv_physical_device *device = vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8,
646                                                     VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
647    if (!device) {
648       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
649       goto fail_fd;
650    }
651
652    struct vk_physical_device_dispatch_table dispatch_table;
653    vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
654                                                       &radv_physical_device_entrypoints, true);
655    vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
656                                                       &wsi_physical_device_entrypoints, false);
657
658    result = vk_physical_device_init(&device->vk, &instance->vk, NULL, &dispatch_table);
659    if (result != VK_SUCCESS) {
660       goto fail_alloc;
661    }
662
663    device->instance = instance;
664
665 #ifdef _WIN32
666    device->ws = radv_null_winsys_create();
667 #else
668    if (drm_device) {
669       bool reserve_vmid = radv_thread_trace_enabled();
670
671       device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags,
672                                              reserve_vmid);
673    } else {
674       device->ws = radv_null_winsys_create();
675    }
676 #endif
677
678    if (!device->ws) {
679       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys");
680       goto fail_base;
681    }
682
683    device->vk.supported_sync_types = device->ws->get_sync_types(device->ws);
684
685 #ifndef _WIN32
686    if (drm_device && instance->vk.enabled_extensions.KHR_display) {
687       master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
688       if (master_fd >= 0) {
689          uint32_t accel_working = 0;
690          struct drm_amdgpu_info request = {.return_pointer = (uintptr_t)&accel_working,
691                                            .return_size = sizeof(accel_working),
692                                            .query = AMDGPU_INFO_ACCEL_WORKING};
693
694          if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) <
695                 0 ||
696              !accel_working) {
697             close(master_fd);
698             master_fd = -1;
699          }
700       }
701    }
702 #endif
703
704    device->master_fd = master_fd;
705    device->local_fd = fd;
706    device->ws->query_info(device->ws, &device->rad_info);
707
708    device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
709 #ifndef LLVM_AVAILABLE
710    if (device->use_llvm) {
711       fprintf(stderr, "ERROR: LLVM compiler backend selected for radv, but LLVM support was not "
712                       "enabled at build time.\n");
713       abort();
714    }
715 #endif
716
717 #ifdef ANDROID
718    device->emulate_etc2 = !radv_device_supports_etc(device);
719 #else
720    device->emulate_etc2 = !radv_device_supports_etc(device) &&
721                           driQueryOptionb(&device->instance->dri_options, "radv_require_etc2");
722 #endif
723
724    snprintf(device->name, sizeof(device->name), "AMD RADV %s%s", device->rad_info.name,
725             radv_get_compiler_string(device));
726
727    const char *marketing_name = device->ws->get_chip_name(device->ws);
728    snprintf(device->marketing_name, sizeof(device->name), "%s (RADV %s%s)",
729             marketing_name, device->rad_info.name, radv_get_compiler_string(device));
730
731 #ifdef ENABLE_SHADER_CACHE
732    if (radv_device_get_cache_uuid(device, device->cache_uuid)) {
733       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID");
734       goto fail_wsi;
735    }
736
737    /* The gpu id is already embedded in the uuid so we just pass "radv"
738     * when creating the cache.
739     */
740    char buf[VK_UUID_SIZE * 2 + 1];
741    disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
742    device->disk_cache = disk_cache_create(device->name, buf, 0);
743 #endif
744
745    if (!radv_is_conformant(device))
746       vk_warn_non_conformant_implementation("radv");
747
748    radv_get_driver_uuid(&device->driver_uuid);
749    radv_get_device_uuid(&device->rad_info, &device->device_uuid);
750
751    device->out_of_order_rast_allowed =
752       device->rad_info.has_out_of_order_rast &&
753       !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
754
755    device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
756
757    device->use_ngg = (device->rad_info.gfx_level >= GFX10 &&
758                      device->rad_info.family != CHIP_NAVI14 &&
759                      !(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) ||
760                      device->rad_info.gfx_level >= GFX11;
761
762    device->use_ngg_culling = device->use_ngg && device->rad_info.max_render_backends > 1 &&
763                              (device->rad_info.gfx_level >= GFX10_3 ||
764                               (device->instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
765                              !(device->instance->debug_flags & RADV_DEBUG_NO_NGGC);
766
767    device->use_ngg_streamout = false;
768
769    /* Determine the number of threads per wave for all stages. */
770    device->cs_wave_size = 64;
771    device->ps_wave_size = 64;
772    device->ge_wave_size = 64;
773    device->rt_wave_size = 64;
774
775    if (device->rad_info.gfx_level >= GFX10) {
776       if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
777          device->cs_wave_size = 32;
778
779       /* For pixel shaders, wave64 is recommanded. */
780       if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
781          device->ps_wave_size = 32;
782
783       if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
784          device->ge_wave_size = 32;
785
786       if (!(device->instance->perftest_flags & RADV_PERFTEST_RT_WAVE_64))
787          device->rt_wave_size = 32;
788    }
789
790    radv_physical_device_init_mem_types(device);
791
792    radv_physical_device_get_supported_extensions(device, &device->vk.supported_extensions);
793
794    radv_get_nir_options(device);
795
796 #ifndef _WIN32
797    if (drm_device) {
798       struct stat primary_stat = {0}, render_stat = {0};
799
800       device->available_nodes = drm_device->available_nodes;
801       device->bus_info = *drm_device->businfo.pci;
802
803       if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) &&
804           stat(drm_device->nodes[DRM_NODE_PRIMARY], &primary_stat) != 0) {
805          result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
806                             "failed to stat DRM primary node %s",
807                             drm_device->nodes[DRM_NODE_PRIMARY]);
808          goto fail_perfcounters;
809       }
810       device->primary_devid = primary_stat.st_rdev;
811
812       if ((drm_device->available_nodes & (1 << DRM_NODE_RENDER)) &&
813           stat(drm_device->nodes[DRM_NODE_RENDER], &render_stat) != 0) {
814          result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
815                             "failed to stat DRM render node %s",
816                             drm_device->nodes[DRM_NODE_RENDER]);
817          goto fail_perfcounters;
818       }
819       device->render_devid = render_stat.st_rdev;
820    }
821 #endif
822
823    if ((device->instance->debug_flags & RADV_DEBUG_INFO))
824       ac_print_gpu_info(&device->rad_info, stdout);
825
826    radv_physical_device_init_queue_table(device);
827
828    /* We don't check the error code, but later check if it is initialized. */
829    ac_init_perfcounters(&device->rad_info, false, false, &device->ac_perfcounters);
830
831    /* The WSI is structured as a layer on top of the driver, so this has
832     * to be the last part of initialization (at least until we get other
833     * semi-layers).
834     */
835    result = radv_init_wsi(device);
836    if (result != VK_SUCCESS) {
837       vk_error(instance, result);
838       goto fail_perfcounters;
839    }
840
841    device->gs_table_depth =
842       ac_get_gs_table_depth(device->rad_info.gfx_level, device->rad_info.family);
843
844    ac_get_hs_info(&device->rad_info, &device->hs);
845    ac_get_task_info(&device->rad_info, &device->task_info);
846
847    *device_out = device;
848
849    return VK_SUCCESS;
850
851 fail_perfcounters:
852    ac_destroy_perfcounters(&device->ac_perfcounters);
853    disk_cache_destroy(device->disk_cache);
854 #ifdef ENABLE_SHADER_CACHE
855 fail_wsi:
856 #endif
857    device->ws->destroy(device->ws);
858 fail_base:
859    vk_physical_device_finish(&device->vk);
860 fail_alloc:
861    vk_free(&instance->vk.alloc, device);
862 fail_fd:
863    if (fd != -1)
864       close(fd);
865    if (master_fd != -1)
866       close(master_fd);
867    return result;
868 }
869
870 static void
871 radv_physical_device_destroy(struct radv_physical_device *device)
872 {
873    radv_finish_wsi(device);
874    ac_destroy_perfcounters(&device->ac_perfcounters);
875    device->ws->destroy(device->ws);
876    disk_cache_destroy(device->disk_cache);
877    if (device->local_fd != -1)
878       close(device->local_fd);
879    if (device->master_fd != -1)
880       close(device->master_fd);
881    vk_physical_device_finish(&device->vk);
882    vk_free(&device->instance->vk.alloc, device);
883 }
884
885 static const struct debug_control radv_debug_options[] = {
886    {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
887    {"nodcc", RADV_DEBUG_NO_DCC},
888    {"shaders", RADV_DEBUG_DUMP_SHADERS},
889    {"nocache", RADV_DEBUG_NO_CACHE},
890    {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
891    {"nohiz", RADV_DEBUG_NO_HIZ},
892    {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
893    {"allbos", RADV_DEBUG_ALL_BOS},
894    {"noibs", RADV_DEBUG_NO_IBS},
895    {"spirv", RADV_DEBUG_DUMP_SPIRV},
896    {"vmfaults", RADV_DEBUG_VM_FAULTS},
897    {"zerovram", RADV_DEBUG_ZERO_VRAM},
898    {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
899    {"preoptir", RADV_DEBUG_PREOPTIR},
900    {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
901    {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
902    {"info", RADV_DEBUG_INFO},
903    {"startup", RADV_DEBUG_STARTUP},
904    {"checkir", RADV_DEBUG_CHECKIR},
905    {"nobinning", RADV_DEBUG_NOBINNING},
906    {"nongg", RADV_DEBUG_NO_NGG},
907    {"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
908    {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
909    {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
910    {"llvm", RADV_DEBUG_LLVM},
911    {"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
912    {"hang", RADV_DEBUG_HANG},
913    {"img", RADV_DEBUG_IMG},
914    {"noumr", RADV_DEBUG_NO_UMR},
915    {"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},
916    {"splitfma", RADV_DEBUG_SPLIT_FMA},
917    {"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC},
918    {"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},
919    {"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},
920    {"noatocdithering", RADV_DEBUG_NO_ATOC_DITHERING},
921    {"nonggc", RADV_DEBUG_NO_NGGC},
922    {"prologs", RADV_DEBUG_DUMP_PROLOGS},
923    {"nodma", RADV_DEBUG_NO_DMA_BLIT},
924    {NULL, 0}};
925
926 const char *
927 radv_get_debug_option_name(int id)
928 {
929    assert(id < ARRAY_SIZE(radv_debug_options) - 1);
930    return radv_debug_options[id].string;
931 }
932
933 static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_PERFTEST_LOCAL_BOS},
934                                                              {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
935                                                              {"bolist", RADV_PERFTEST_BO_LIST},
936                                                              {"cswave32", RADV_PERFTEST_CS_WAVE_32},
937                                                              {"pswave32", RADV_PERFTEST_PS_WAVE_32},
938                                                              {"gewave32", RADV_PERFTEST_GE_WAVE_32},
939                                                              {"nosam", RADV_PERFTEST_NO_SAM},
940                                                              {"sam", RADV_PERFTEST_SAM},
941                                                              {"rt", RADV_PERFTEST_RT},
942                                                              {"nggc", RADV_PERFTEST_NGGC},
943                                                              {"emulate_rt", RADV_PERFTEST_EMULATE_RT},
944                                                              {"nv_ms", RADV_PERFTEST_NV_MS},
945                                                              {"rtwave64", RADV_PERFTEST_RT_WAVE_64},
946                                                              {NULL, 0}};
947
948 const char *
949 radv_get_perftest_option_name(int id)
950 {
951    assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
952    return radv_perftest_options[id].string;
953 }
954
955 // clang-format off
956 static const driOptionDescription radv_dri_options[] = {
957    DRI_CONF_SECTION_PERFORMANCE
958       DRI_CONF_ADAPTIVE_SYNC(true)
959       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
960       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
961       DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
962       DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
963       DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING(false)
964       DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP(false)
965       DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false)
966       DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false)
967       DRI_CONF_RADV_ABSOLUTE_DEPTH_BIAS(false)
968       DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0)
969    DRI_CONF_SECTION_END
970
971    DRI_CONF_SECTION_DEBUG
972       DRI_CONF_OVERRIDE_VRAM_SIZE()
973       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
974       DRI_CONF_RADV_ZERO_VRAM(false)
975       DRI_CONF_RADV_LOWER_DISCARD_TO_DEMOTE(false)
976       DRI_CONF_RADV_INVARIANT_GEOM(false)
977       DRI_CONF_RADV_SPLIT_FMA(false)
978       DRI_CONF_RADV_DISABLE_TC_COMPAT_HTILE_GENERAL(false)
979       DRI_CONF_RADV_DISABLE_DCC(false)
980       DRI_CONF_RADV_REPORT_APU_AS_DGPU(false)
981       DRI_CONF_RADV_REQUIRE_ETC2(false)
982       DRI_CONF_RADV_DISABLE_ANISO_SINGLE_LEVEL(false)
983       DRI_CONF_RADV_DISABLE_SINKING_LOAD_INPUT_FS(false)
984    DRI_CONF_SECTION_END
985 };
986 // clang-format on
987
988 static void
989 radv_init_dri_options(struct radv_instance *instance)
990 {
991    driParseOptionInfo(&instance->available_dri_options, radv_dri_options,
992                       ARRAY_SIZE(radv_dri_options));
993    driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "radv", NULL, NULL,
994                        instance->vk.app_info.app_name, instance->vk.app_info.app_version,
995                        instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
996
997    instance->enable_mrt_output_nan_fixup =
998       driQueryOptionb(&instance->dri_options, "radv_enable_mrt_output_nan_fixup");
999
1000    instance->disable_shrink_image_store =
1001       driQueryOptionb(&instance->dri_options, "radv_disable_shrink_image_store");
1002
1003    instance->absolute_depth_bias =
1004       driQueryOptionb(&instance->dri_options, "radv_absolute_depth_bias");
1005
1006    instance->disable_tc_compat_htile_in_general =
1007       driQueryOptionb(&instance->dri_options, "radv_disable_tc_compat_htile_general");
1008
1009    if (driQueryOptionb(&instance->dri_options, "radv_no_dynamic_bounds"))
1010       instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
1011
1012    if (driQueryOptionb(&instance->dri_options, "radv_lower_discard_to_demote"))
1013       instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
1014
1015    if (driQueryOptionb(&instance->dri_options, "radv_invariant_geom"))
1016       instance->debug_flags |= RADV_DEBUG_INVARIANT_GEOM;
1017
1018    if (driQueryOptionb(&instance->dri_options, "radv_split_fma"))
1019       instance->debug_flags |= RADV_DEBUG_SPLIT_FMA;
1020
1021    if (driQueryOptionb(&instance->dri_options, "radv_disable_dcc"))
1022       instance->debug_flags |= RADV_DEBUG_NO_DCC;
1023
1024    instance->zero_vram =
1025       driQueryOptionb(&instance->dri_options, "radv_zero_vram");
1026
1027    instance->report_apu_as_dgpu =
1028       driQueryOptionb(&instance->dri_options, "radv_report_apu_as_dgpu");
1029
1030    instance->disable_aniso_single_level =
1031       driQueryOptionb(&instance->dri_options, "radv_disable_aniso_single_level");
1032
1033    instance->disable_sinking_load_input_fs =
1034       driQueryOptionb(&instance->dri_options, "radv_disable_sinking_load_input_fs");
1035 }
1036
1037 VKAPI_ATTR VkResult VKAPI_CALL
1038 radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1039                     const VkAllocationCallbacks *pAllocator, VkInstance *pInstance)
1040 {
1041    struct radv_instance *instance;
1042    VkResult result;
1043
1044    if (!pAllocator)
1045       pAllocator = vk_default_allocator();
1046
1047    instance = vk_zalloc(pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1048    if (!instance)
1049       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1050
1051    struct vk_instance_dispatch_table dispatch_table;
1052    vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &radv_instance_entrypoints, true);
1053    vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &wsi_instance_entrypoints, false);
1054    struct vk_instance_extension_table extensions_supported = radv_instance_extensions_supported;
1055
1056    result = vk_instance_init(&instance->vk, &extensions_supported, &dispatch_table,
1057                              pCreateInfo, pAllocator);
1058    if (result != VK_SUCCESS) {
1059       vk_free(pAllocator, instance);
1060       return vk_error(instance, result);
1061    }
1062
1063    instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"), radv_debug_options);
1064    instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"), radv_perftest_options);
1065
1066    if (instance->debug_flags & RADV_DEBUG_STARTUP)
1067       fprintf(stderr, "radv: info: Created an instance.\n");
1068
1069    instance->physical_devices_enumerated = false;
1070    list_inithead(&instance->physical_devices);
1071
1072    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1073
1074    radv_init_dri_options(instance);
1075
1076    *pInstance = radv_instance_to_handle(instance);
1077
1078    return VK_SUCCESS;
1079 }
1080
1081 VKAPI_ATTR void VKAPI_CALL
1082 radv_DestroyInstance(VkInstance _instance, const VkAllocationCallbacks *pAllocator)
1083 {
1084    RADV_FROM_HANDLE(radv_instance, instance, _instance);
1085
1086    if (!instance)
1087       return;
1088
1089    list_for_each_entry_safe(struct radv_physical_device, pdevice, &instance->physical_devices, link)
1090    {
1091       radv_physical_device_destroy(pdevice);
1092    }
1093
1094    VG(VALGRIND_DESTROY_MEMPOOL(instance));
1095
1096    driDestroyOptionCache(&instance->dri_options);
1097    driDestroyOptionInfo(&instance->available_dri_options);
1098
1099    vk_instance_finish(&instance->vk);
1100    vk_free(&instance->vk.alloc, instance);
1101 }
1102
1103 static VkResult
1104 radv_enumerate_physical_devices(struct radv_instance *instance)
1105 {
1106    if (instance->physical_devices_enumerated)
1107       return VK_SUCCESS;
1108
1109    instance->physical_devices_enumerated = true;
1110
1111    VkResult result = VK_SUCCESS;
1112
1113    if (getenv("RADV_FORCE_FAMILY")) {
1114       /* When RADV_FORCE_FAMILY is set, the driver creates a nul
1115        * device that allows to test the compiler without having an
1116        * AMDGPU instance.
1117        */
1118       struct radv_physical_device *pdevice;
1119
1120       result = radv_physical_device_try_create(instance, NULL, &pdevice);
1121       if (result != VK_SUCCESS)
1122          return result;
1123
1124       list_addtail(&pdevice->link, &instance->physical_devices);
1125       return VK_SUCCESS;
1126    }
1127
1128 #ifndef _WIN32
1129    /* TODO: Check for more devices ? */
1130    drmDevicePtr devices[8];
1131    int max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
1132
1133    if (instance->debug_flags & RADV_DEBUG_STARTUP)
1134       fprintf(stderr, "radv: info: Found %d drm nodes.\n", max_devices);
1135
1136    if (max_devices < 1)
1137       return vk_error(instance, VK_SUCCESS);
1138
1139    for (unsigned i = 0; i < (unsigned)max_devices; i++) {
1140       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
1141           devices[i]->bustype == DRM_BUS_PCI &&
1142           devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
1143
1144          struct radv_physical_device *pdevice;
1145          result = radv_physical_device_try_create(instance, devices[i], &pdevice);
1146          /* Incompatible DRM device, skip. */
1147          if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
1148             result = VK_SUCCESS;
1149             continue;
1150          }
1151
1152          /* Error creating the physical device, report the error. */
1153          if (result != VK_SUCCESS)
1154             break;
1155
1156          list_addtail(&pdevice->link, &instance->physical_devices);
1157       }
1158    }
1159    drmFreeDevices(devices, max_devices);
1160 #endif
1161
1162    /* If we successfully enumerated any devices, call it success */
1163    return result;
1164 }
1165
1166 VKAPI_ATTR VkResult VKAPI_CALL
1167 radv_EnumeratePhysicalDevices(VkInstance _instance, uint32_t *pPhysicalDeviceCount,
1168                               VkPhysicalDevice *pPhysicalDevices)
1169 {
1170    RADV_FROM_HANDLE(radv_instance, instance, _instance);
1171    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices, pPhysicalDeviceCount);
1172
1173    VkResult result = radv_enumerate_physical_devices(instance);
1174    if (result != VK_SUCCESS)
1175       return result;
1176
1177    list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)
1178    {
1179       vk_outarray_append_typed(VkPhysicalDevice, &out, i)
1180       {
1181          *i = radv_physical_device_to_handle(pdevice);
1182       }
1183    }
1184
1185    return vk_outarray_status(&out);
1186 }
1187
1188 VKAPI_ATTR VkResult VKAPI_CALL
1189 radv_EnumeratePhysicalDeviceGroups(VkInstance _instance, uint32_t *pPhysicalDeviceGroupCount,
1190                                    VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties)
1191 {
1192    RADV_FROM_HANDLE(radv_instance, instance, _instance);
1193    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out, pPhysicalDeviceGroupProperties,
1194                           pPhysicalDeviceGroupCount);
1195
1196    VkResult result = radv_enumerate_physical_devices(instance);
1197    if (result != VK_SUCCESS)
1198       return result;
1199
1200    list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)
1201    {
1202       vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p)
1203       {
1204          p->physicalDeviceCount = 1;
1205          memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
1206          p->physicalDevices[0] = radv_physical_device_to_handle(pdevice);
1207          p->subsetAllocation = false;
1208       }
1209    }
1210
1211    return vk_outarray_status(&out);
1212 }
1213
1214 VKAPI_ATTR void VKAPI_CALL
1215 radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures *pFeatures)
1216 {
1217    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1218    memset(pFeatures, 0, sizeof(*pFeatures));
1219
1220    *pFeatures = (VkPhysicalDeviceFeatures){
1221       .robustBufferAccess = true,
1222       .fullDrawIndexUint32 = true,
1223       .imageCubeArray = true,
1224       .independentBlend = true,
1225       .geometryShader = true,
1226       .tessellationShader = true,
1227       .sampleRateShading = true,
1228       .dualSrcBlend = true,
1229       .logicOp = true,
1230       .multiDrawIndirect = true,
1231       .drawIndirectFirstInstance = true,
1232       .depthClamp = true,
1233       .depthBiasClamp = true,
1234       .fillModeNonSolid = true,
1235       .depthBounds = true,
1236       .wideLines = true,
1237       .largePoints = true,
1238       .alphaToOne = false,
1239       .multiViewport = true,
1240       .samplerAnisotropy = true,
1241       .textureCompressionETC2 = radv_device_supports_etc(pdevice) || pdevice->emulate_etc2,
1242       .textureCompressionASTC_LDR = false,
1243       .textureCompressionBC = true,
1244       .occlusionQueryPrecise = true,
1245       .pipelineStatisticsQuery = true,
1246       .vertexPipelineStoresAndAtomics = true,
1247       .fragmentStoresAndAtomics = true,
1248       .shaderTessellationAndGeometryPointSize = true,
1249       .shaderImageGatherExtended = true,
1250       .shaderStorageImageExtendedFormats = true,
1251       .shaderStorageImageMultisample = true,
1252       .shaderUniformBufferArrayDynamicIndexing = true,
1253       .shaderSampledImageArrayDynamicIndexing = true,
1254       .shaderStorageBufferArrayDynamicIndexing = true,
1255       .shaderStorageImageArrayDynamicIndexing = true,
1256       .shaderStorageImageReadWithoutFormat = true,
1257       .shaderStorageImageWriteWithoutFormat = true,
1258       .shaderClipDistance = true,
1259       .shaderCullDistance = true,
1260       .shaderFloat64 = true,
1261       .shaderInt64 = true,
1262       .shaderInt16 = true,
1263       .sparseBinding = true,
1264       .sparseResidencyBuffer = pdevice->rad_info.family >= CHIP_POLARIS10,
1265       .sparseResidencyImage2D = pdevice->rad_info.family >= CHIP_POLARIS10,
1266       .sparseResidencyAliased = pdevice->rad_info.family >= CHIP_POLARIS10,
1267       .variableMultisampleRate = true,
1268       .shaderResourceMinLod = true,
1269       .shaderResourceResidency = true,
1270       .inheritedQueries = true,
1271    };
1272 }
1273
1274 static void
1275 radv_get_physical_device_features_1_1(struct radv_physical_device *pdevice,
1276                                       VkPhysicalDeviceVulkan11Features *f)
1277 {
1278    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
1279
1280    f->storageBuffer16BitAccess = true;
1281    f->uniformAndStorageBuffer16BitAccess = true;
1282    f->storagePushConstant16 = true;
1283    f->storageInputOutput16 = pdevice->rad_info.has_packed_math_16bit;
1284    f->multiview = true;
1285    f->multiviewGeometryShader = true;
1286    f->multiviewTessellationShader = true;
1287    f->variablePointersStorageBuffer = true;
1288    f->variablePointers = true;
1289    f->protectedMemory = false;
1290    f->samplerYcbcrConversion = true;
1291    f->shaderDrawParameters = true;
1292 }
1293
1294 static void
1295 radv_get_physical_device_features_1_2(struct radv_physical_device *pdevice,
1296                                       VkPhysicalDeviceVulkan12Features *f)
1297 {
1298    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
1299
1300    f->samplerMirrorClampToEdge = true;
1301    f->drawIndirectCount = true;
1302    f->storageBuffer8BitAccess = true;
1303    f->uniformAndStorageBuffer8BitAccess = true;
1304    f->storagePushConstant8 = true;
1305    f->shaderBufferInt64Atomics = true;
1306    f->shaderSharedInt64Atomics = true;
1307    f->shaderFloat16 = pdevice->rad_info.has_packed_math_16bit;
1308    f->shaderInt8 = true;
1309
1310    f->descriptorIndexing = true;
1311    f->shaderInputAttachmentArrayDynamicIndexing = true;
1312    f->shaderUniformTexelBufferArrayDynamicIndexing = true;
1313    f->shaderStorageTexelBufferArrayDynamicIndexing = true;
1314    f->shaderUniformBufferArrayNonUniformIndexing = true;
1315    f->shaderSampledImageArrayNonUniformIndexing = true;
1316    f->shaderStorageBufferArrayNonUniformIndexing = true;
1317    f->shaderStorageImageArrayNonUniformIndexing = true;
1318    f->shaderInputAttachmentArrayNonUniformIndexing = true;
1319    f->shaderUniformTexelBufferArrayNonUniformIndexing = true;
1320    f->shaderStorageTexelBufferArrayNonUniformIndexing = true;
1321    f->descriptorBindingUniformBufferUpdateAfterBind = true;
1322    f->descriptorBindingSampledImageUpdateAfterBind = true;
1323    f->descriptorBindingStorageImageUpdateAfterBind = true;
1324    f->descriptorBindingStorageBufferUpdateAfterBind = true;
1325    f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
1326    f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
1327    f->descriptorBindingUpdateUnusedWhilePending = true;
1328    f->descriptorBindingPartiallyBound = true;
1329    f->descriptorBindingVariableDescriptorCount = true;
1330    f->runtimeDescriptorArray = true;
1331
1332    f->samplerFilterMinmax = true;
1333    f->scalarBlockLayout = pdevice->rad_info.gfx_level >= GFX7;
1334    f->imagelessFramebuffer = true;
1335    f->uniformBufferStandardLayout = true;
1336    f->shaderSubgroupExtendedTypes = true;
1337    f->separateDepthStencilLayouts = true;
1338    f->hostQueryReset = true;
1339    f->timelineSemaphore = true, f->bufferDeviceAddress = true;
1340    f->bufferDeviceAddressCaptureReplay = true;
1341    f->bufferDeviceAddressMultiDevice = false;
1342    f->vulkanMemoryModel = true;
1343    f->vulkanMemoryModelDeviceScope = true;
1344    f->vulkanMemoryModelAvailabilityVisibilityChains = false;
1345    f->shaderOutputViewportIndex = true;
1346    f->shaderOutputLayer = true;
1347    f->subgroupBroadcastDynamicId = true;
1348 }
1349
1350 static void
1351 radv_get_physical_device_features_1_3(struct radv_physical_device *pdevice,
1352                                       VkPhysicalDeviceVulkan13Features *f)
1353 {
1354    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES);
1355
1356    f->robustImageAccess = true;
1357    f->inlineUniformBlock = true;
1358    f->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1359    f->pipelineCreationCacheControl = true;
1360    f->privateData = true;
1361    f->shaderDemoteToHelperInvocation = true;
1362    f->shaderTerminateInvocation = true;
1363    f->subgroupSizeControl = true;
1364    f->computeFullSubgroups = true;
1365    f->synchronization2 = true;
1366    f->textureCompressionASTC_HDR = false;
1367    f->shaderZeroInitializeWorkgroupMemory = true;
1368    f->dynamicRendering = true;
1369    f->shaderIntegerDotProduct = true;
1370    f->maintenance4 = true;
1371 }
1372
1373 VKAPI_ATTR void VKAPI_CALL
1374 radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
1375                                 VkPhysicalDeviceFeatures2 *pFeatures)
1376 {
1377    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1378    radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1379
1380    VkPhysicalDeviceVulkan11Features core_1_1 = {
1381       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1382    };
1383    radv_get_physical_device_features_1_1(pdevice, &core_1_1);
1384
1385    VkPhysicalDeviceVulkan12Features core_1_2 = {
1386       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1387    };
1388    radv_get_physical_device_features_1_2(pdevice, &core_1_2);
1389
1390    VkPhysicalDeviceVulkan13Features core_1_3 = {
1391       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
1392    };
1393    radv_get_physical_device_features_1_3(pdevice, &core_1_3);
1394
1395 #define CORE_FEATURE(major, minor, feature) features->feature = core_##major##_##minor.feature
1396
1397    vk_foreach_struct(ext, pFeatures->pNext)
1398    {
1399       if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1))
1400          continue;
1401       if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2))
1402          continue;
1403       if (vk_get_physical_device_core_1_3_feature_ext(ext, &core_1_3))
1404          continue;
1405
1406       switch (ext->sType) {
1407       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
1408          VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
1409             (VkPhysicalDeviceConditionalRenderingFeaturesEXT *)ext;
1410          features->conditionalRendering = true;
1411          features->inheritedConditionalRendering = false;
1412          break;
1413       }
1414       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
1415          VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
1416             (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
1417          features->vertexAttributeInstanceRateDivisor = true;
1418          features->vertexAttributeInstanceRateZeroDivisor = true;
1419          break;
1420       }
1421       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
1422          VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
1423             (VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
1424          features->transformFeedback = pdevice->rad_info.gfx_level < GFX11;
1425          features->geometryStreams = !pdevice->use_ngg_streamout && pdevice->rad_info.gfx_level < GFX11;
1426          break;
1427       }
1428       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
1429          VkPhysicalDeviceScalarBlockLayoutFeatures *features =
1430             (VkPhysicalDeviceScalarBlockLayoutFeatures *)ext;
1431          CORE_FEATURE(1, 2, scalarBlockLayout);
1432          break;
1433       }
1434       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
1435          VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
1436             (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
1437          features->memoryPriority = true;
1438          break;
1439       }
1440       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
1441          VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
1442             (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
1443          CORE_FEATURE(1, 2, bufferDeviceAddress);
1444          CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);
1445          CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);
1446          break;
1447       }
1448       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
1449          VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
1450             (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
1451          features->depthClipEnable = true;
1452          break;
1453       }
1454       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1455          VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1456             (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1457          features->computeDerivativeGroupQuads = false;
1458          features->computeDerivativeGroupLinear = true;
1459          break;
1460       }
1461       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1462          VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1463             (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;
1464          features->ycbcrImageArrays = true;
1465          break;
1466       }
1467       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1468          VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1469             (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1470          features->indexTypeUint8 = pdevice->rad_info.gfx_level >= GFX8;
1471          break;
1472       }
1473       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1474          VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1475             (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1476          features->pipelineExecutableInfo = true;
1477          break;
1478       }
1479       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
1480          VkPhysicalDeviceShaderClockFeaturesKHR *features =
1481             (VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
1482          features->shaderSubgroupClock = true;
1483          features->shaderDeviceClock = pdevice->rad_info.gfx_level >= GFX8;
1484          break;
1485       }
1486       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
1487          VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
1488             (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
1489          features->texelBufferAlignment = true;
1490          break;
1491       }
1492       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: {
1493          VkPhysicalDeviceCoherentMemoryFeaturesAMD *features =
1494             (VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext;
1495          features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached;
1496          break;
1497       }
1498       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
1499          VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
1500             (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
1501          features->rectangularLines = false;
1502          features->bresenhamLines = true;
1503          features->smoothLines = false;
1504          features->stippledRectangularLines = false;
1505          /* FIXME: Some stippled Bresenham CTS fails on Vega10
1506           * but work on Raven.
1507           */
1508          features->stippledBresenhamLines = pdevice->rad_info.gfx_level != GFX9;
1509          features->stippledSmoothLines = false;
1510          break;
1511       }
1512       case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
1513          VkDeviceMemoryOverallocationCreateInfoAMD *features =
1514             (VkDeviceMemoryOverallocationCreateInfoAMD *)ext;
1515          features->overallocationBehavior = true;
1516          break;
1517       }
1518       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1519          VkPhysicalDeviceRobustness2FeaturesEXT *features =
1520             (VkPhysicalDeviceRobustness2FeaturesEXT *)ext;
1521          features->robustBufferAccess2 = true;
1522          features->robustImageAccess2 = true;
1523          features->nullDescriptor = true;
1524          break;
1525       }
1526       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1527          VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
1528             (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
1529          features->customBorderColors = true;
1530          features->customBorderColorWithoutFormat = true;
1531          break;
1532       }
1533       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
1534          VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
1535             (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *)ext;
1536          features->extendedDynamicState = true;
1537          break;
1538       }
1539       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
1540          VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features =
1541             (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *)ext;
1542          features->shaderBufferFloat32Atomics = true;
1543          features->shaderBufferFloat32AtomicAdd = false;
1544          features->shaderBufferFloat64Atomics = true;
1545          features->shaderBufferFloat64AtomicAdd = false;
1546          features->shaderSharedFloat32Atomics = true;
1547          features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.gfx_level >= GFX8;
1548          features->shaderSharedFloat64Atomics = true;
1549          features->shaderSharedFloat64AtomicAdd = false;
1550          features->shaderImageFloat32Atomics = true;
1551          features->shaderImageFloat32AtomicAdd = false;
1552          features->sparseImageFloat32Atomics = true;
1553          features->sparseImageFloat32AtomicAdd = false;
1554          break;
1555       }
1556       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
1557          VkPhysicalDevice4444FormatsFeaturesEXT *features =
1558             (VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
1559          features->formatA4R4G4B4 = true;
1560          features->formatA4B4G4R4 = true;
1561          break;
1562       }
1563       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT: {
1564          VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *features =
1565             (VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *)ext;
1566          features->shaderImageInt64Atomics = true;
1567          features->sparseImageInt64Atomics = true;
1568          break;
1569       }
1570       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
1571          VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
1572             (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
1573          features->mutableDescriptorType = true;
1574          break;
1575       }
1576       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
1577          VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
1578             (VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
1579          features->pipelineFragmentShadingRate = true;
1580          features->primitiveFragmentShadingRate = true;
1581          features->attachmentFragmentShadingRate =
1582             !(pdevice->instance->debug_flags & RADV_DEBUG_NO_HIZ) &&
1583             pdevice->rad_info.gfx_level < GFX11; /* TODO: VRS no longer uses HTILE. */
1584          break;
1585       }
1586       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {
1587          VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =
1588             (VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;
1589          features->workgroupMemoryExplicitLayout = true;
1590          features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
1591          features->workgroupMemoryExplicitLayout8BitAccess = true;
1592          features->workgroupMemoryExplicitLayout16BitAccess = true;
1593          break;
1594       }
1595       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
1596          VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
1597             (VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
1598          features->provokingVertexLast = true;
1599          features->transformFeedbackPreservesProvokingVertex = true;
1600          break;
1601       }
1602       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
1603          VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =
1604             (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;
1605          features->extendedDynamicState2 = true;
1606          features->extendedDynamicState2LogicOp = true;
1607          features->extendedDynamicState2PatchControlPoints = false;
1608          break;
1609       }
1610       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_EXT: {
1611          VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *features =
1612             (VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *)ext;
1613          features->globalPriorityQuery = true;
1614          break;
1615       }
1616       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR: {
1617          VkPhysicalDeviceAccelerationStructureFeaturesKHR *features =
1618             (VkPhysicalDeviceAccelerationStructureFeaturesKHR *)ext;
1619          features->accelerationStructure = true;
1620          features->accelerationStructureCaptureReplay = false;
1621          features->accelerationStructureIndirectBuild = false;
1622          features->accelerationStructureHostCommands = true;
1623          features->descriptorBindingAccelerationStructureUpdateAfterBind = true;
1624          break;
1625       }
1626       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR: {
1627          VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *features =
1628             (VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *)ext;
1629          features->shaderSubgroupUniformControlFlow = true;
1630          break;
1631       }
1632       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: {
1633          VkPhysicalDeviceMultiDrawFeaturesEXT *features = (VkPhysicalDeviceMultiDrawFeaturesEXT *)ext;
1634          features->multiDraw = true;
1635          break;
1636       }
1637       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {
1638          VkPhysicalDeviceColorWriteEnableFeaturesEXT *features =
1639             (VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext;
1640          features->colorWriteEnable = true;
1641          break;
1642       }
1643       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: {
1644          VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features =
1645             (VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *)ext;
1646          bool has_shader_buffer_float_minmax = radv_has_shader_buffer_float_minmax(pdevice);
1647          bool has_shader_image_float_minmax =
1648             pdevice->rad_info.gfx_level != GFX8 && pdevice->rad_info.gfx_level != GFX9;
1649          features->shaderBufferFloat16Atomics = false;
1650          features->shaderBufferFloat16AtomicAdd = false;
1651          features->shaderBufferFloat16AtomicMinMax = false;
1652          features->shaderBufferFloat32AtomicMinMax = has_shader_buffer_float_minmax;
1653          features->shaderBufferFloat64AtomicMinMax = has_shader_buffer_float_minmax;
1654          features->shaderSharedFloat16Atomics = false;
1655          features->shaderSharedFloat16AtomicAdd = false;
1656          features->shaderSharedFloat16AtomicMinMax = false;
1657          features->shaderSharedFloat32AtomicMinMax = true;
1658          features->shaderSharedFloat64AtomicMinMax = true;
1659          features->shaderImageFloat32AtomicMinMax = has_shader_image_float_minmax;
1660          features->sparseImageFloat32AtomicMinMax = has_shader_image_float_minmax;
1661          break;
1662       }
1663       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: {
1664          VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *features =
1665             (VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *)ext;
1666          features->primitiveTopologyListRestart = true;
1667          features->primitiveTopologyPatchListRestart = false;
1668          break;
1669       }
1670       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR: {
1671          VkPhysicalDeviceRayQueryFeaturesKHR *features =
1672             (VkPhysicalDeviceRayQueryFeaturesKHR *)ext;
1673          features->rayQuery = true;
1674          break;
1675       }
1676       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR: {
1677          VkPhysicalDeviceRayTracingPipelineFeaturesKHR *features =
1678             (VkPhysicalDeviceRayTracingPipelineFeaturesKHR *)ext;
1679          features->rayTracingPipeline = true;
1680          features->rayTracingPipelineShaderGroupHandleCaptureReplay = false;
1681          features->rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false;
1682          features->rayTracingPipelineTraceRaysIndirect = true;
1683          features->rayTraversalPrimitiveCulling = true;
1684          break;
1685       }
1686       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_MAINTENANCE_1_FEATURES_KHR: {
1687          VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR *features =
1688             (VkPhysicalDeviceRayTracingMaintenance1FeaturesKHR *)ext;
1689          features->rayTracingMaintenance1 = true;
1690          features->rayTracingPipelineTraceRaysIndirect2 = radv_enable_rt(pdevice, true);
1691          break;
1692       }
1693       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES: {
1694          VkPhysicalDeviceMaintenance4Features *features =
1695             (VkPhysicalDeviceMaintenance4Features *)ext;
1696          features->maintenance4 = true;
1697          break;
1698       }
1699       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT: {
1700          VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *features =
1701             (VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *)ext;
1702          features->vertexInputDynamicState = true;
1703          break;
1704       }
1705       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_MIN_LOD_FEATURES_EXT: {
1706          VkPhysicalDeviceImageViewMinLodFeaturesEXT *features =
1707             (VkPhysicalDeviceImageViewMinLodFeaturesEXT *)ext;
1708          features->minLod = true;
1709          break;
1710       }
1711       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES: {
1712          VkPhysicalDeviceSynchronization2Features *features =
1713             (VkPhysicalDeviceSynchronization2Features *)ext;
1714          features->synchronization2 = true;
1715          break;
1716       }
1717       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DYNAMIC_RENDERING_FEATURES: {
1718          VkPhysicalDeviceDynamicRenderingFeatures *features =
1719             (VkPhysicalDeviceDynamicRenderingFeatures *)ext;
1720          features->dynamicRendering = true;
1721          break;
1722       }
1723       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV: {
1724          VkPhysicalDeviceMeshShaderFeaturesNV *features =
1725             (VkPhysicalDeviceMeshShaderFeaturesNV *)ext;
1726          features->meshShader = true;
1727          features->taskShader = false; /* TODO */
1728          break;
1729       }
1730       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXTURE_COMPRESSION_ASTC_HDR_FEATURES: {
1731          VkPhysicalDeviceTextureCompressionASTCHDRFeatures *features =
1732             (VkPhysicalDeviceTextureCompressionASTCHDRFeatures *)ext;
1733          features->textureCompressionASTC_HDR = false;
1734          break;
1735       }
1736       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_SET_HOST_MAPPING_FEATURES_VALVE: {
1737          VkPhysicalDeviceDescriptorSetHostMappingFeaturesVALVE *features =
1738             (VkPhysicalDeviceDescriptorSetHostMappingFeaturesVALVE *)ext;
1739          features->descriptorSetHostMapping = true;
1740          break;
1741       }
1742       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT: {
1743          VkPhysicalDeviceDepthClipControlFeaturesEXT *features =
1744             (VkPhysicalDeviceDepthClipControlFeaturesEXT *)ext;
1745          features->depthClipControl = true;
1746          break;
1747       }
1748       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT: {
1749          VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *features =
1750             (VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *)ext;
1751          features->image2DViewOf3D = true;
1752          features->sampler2DViewOf3D = false;
1753          break;
1754       }
1755       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL: {
1756          VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *features =
1757             (VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *)ext;
1758          features->shaderIntegerFunctions2 = true;
1759          break;
1760       }
1761       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: {
1762          VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features =
1763             (VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *)ext;
1764          features->primitivesGeneratedQuery = true;
1765          features->primitivesGeneratedQueryWithRasterizerDiscard = true;
1766          features->primitivesGeneratedQueryWithNonZeroStreams = true;
1767          break;
1768       }
1769       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_NON_SEAMLESS_CUBE_MAP_FEATURES_EXT : {
1770          VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT *features =
1771             (VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT *)ext;
1772          features->nonSeamlessCubeMap = true;
1773          break;
1774       }
1775       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT: {
1776          VkPhysicalDeviceBorderColorSwizzleFeaturesEXT *features =
1777             (VkPhysicalDeviceBorderColorSwizzleFeaturesEXT *)ext;
1778          features->borderColorSwizzle = true;
1779          features->borderColorSwizzleFromImage = true;
1780          break;
1781       }
1782       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_FEATURES_EXT: {
1783          VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *features =
1784             (VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *)ext;
1785          features->shaderModuleIdentifier = true;
1786          break;
1787       }
1788       default:
1789          break;
1790       }
1791    }
1792 }
1793
1794 static size_t
1795 radv_max_descriptor_set_size()
1796 {
1797    /* make sure that the entire descriptor set is addressable with a signed
1798     * 32-bit int. So the sum of all limits scaled by descriptor size has to
1799     * be at most 2 GiB. the combined image & samples object count as one of
1800     * both. This limit is for the pipeline layout, not for the set layout, but
1801     * there is no set limit, so we just set a pipeline limit. I don't think
1802     * any app is going to hit this soon. */
1803    return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
1804            MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1805           (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1806            32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1807            32 /* sampler, largest when combined with image */ + 64 /* sampled image */ +
1808            64 /* storage image */);
1809 }
1810
1811 static uint32_t
1812 radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdevice)
1813 {
1814    uint32_t uniform_offset_alignment =
1815       driQueryOptioni(&pdevice->instance->dri_options, "radv_override_uniform_offset_alignment");
1816    if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
1817       fprintf(stderr,
1818               "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
1819               "not a power of two\n",
1820               uniform_offset_alignment);
1821       uniform_offset_alignment = 0;
1822    }
1823
1824    /* Take at least the hardware limit. */
1825    return MAX2(uniform_offset_alignment, 4);
1826 }
1827
1828 VKAPI_ATTR void VKAPI_CALL
1829 radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
1830                                  VkPhysicalDeviceProperties *pProperties)
1831 {
1832    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1833    VkSampleCountFlags sample_counts = 0xf;
1834
1835    size_t max_descriptor_set_size = radv_max_descriptor_set_size();
1836
1837    VkPhysicalDeviceLimits limits = {
1838       .maxImageDimension1D = (1 << 14),
1839       .maxImageDimension2D = (1 << 14),
1840       .maxImageDimension3D = (1 << 11),
1841       .maxImageDimensionCube = (1 << 14),
1842       .maxImageArrayLayers = (1 << 11),
1843       .maxTexelBufferElements = UINT32_MAX,
1844       .maxUniformBufferRange = UINT32_MAX,
1845       .maxStorageBufferRange = UINT32_MAX,
1846       .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1847       .maxMemoryAllocationCount = UINT32_MAX,
1848       .maxSamplerAllocationCount = 64 * 1024,
1849       .bufferImageGranularity = 1,
1850       .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
1851       .maxBoundDescriptorSets = MAX_SETS,
1852       .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1853       .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1854       .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1855       .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1856       .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1857       .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
1858       .maxPerStageResources = max_descriptor_set_size,
1859       .maxDescriptorSetSamplers = max_descriptor_set_size,
1860       .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1861       .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1862       .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1863       .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1864       .maxDescriptorSetSampledImages = max_descriptor_set_size,
1865       .maxDescriptorSetStorageImages = max_descriptor_set_size,
1866       .maxDescriptorSetInputAttachments = max_descriptor_set_size,
1867       .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
1868       .maxVertexInputBindings = MAX_VBS,
1869       .maxVertexInputAttributeOffset = UINT32_MAX,
1870       .maxVertexInputBindingStride = 2048,
1871       .maxVertexOutputComponents = 128,
1872       .maxTessellationGenerationLevel = 64,
1873       .maxTessellationPatchSize = 32,
1874       .maxTessellationControlPerVertexInputComponents = 128,
1875       .maxTessellationControlPerVertexOutputComponents = 128,
1876       .maxTessellationControlPerPatchOutputComponents = 120,
1877       .maxTessellationControlTotalOutputComponents = 4096,
1878       .maxTessellationEvaluationInputComponents = 128,
1879       .maxTessellationEvaluationOutputComponents = 128,
1880       .maxGeometryShaderInvocations = 127,
1881       .maxGeometryInputComponents = 64,
1882       .maxGeometryOutputComponents = 128,
1883       .maxGeometryOutputVertices = 256,
1884       .maxGeometryTotalOutputComponents = 1024,
1885       .maxFragmentInputComponents = 128,
1886       .maxFragmentOutputAttachments = 8,
1887       .maxFragmentDualSrcAttachments = 1,
1888       .maxFragmentCombinedOutputResources = max_descriptor_set_size,
1889       .maxComputeSharedMemorySize = pdevice->rad_info.gfx_level >= GFX7 ? 65536 : 32768,
1890       .maxComputeWorkGroupCount = {65535, 65535, 65535},
1891       .maxComputeWorkGroupInvocations = 1024,
1892       .maxComputeWorkGroupSize = {1024, 1024, 1024},
1893       .subPixelPrecisionBits = 8,
1894       .subTexelPrecisionBits = 8,
1895       .mipmapPrecisionBits = 8,
1896       .maxDrawIndexedIndexValue = UINT32_MAX,
1897       .maxDrawIndirectCount = UINT32_MAX,
1898       .maxSamplerLodBias = 16,
1899       .maxSamplerAnisotropy = 16,
1900       .maxViewports = MAX_VIEWPORTS,
1901       .maxViewportDimensions = {(1 << 14), (1 << 14)},
1902       .viewportBoundsRange = {INT16_MIN, INT16_MAX},
1903       .viewportSubPixelBits = 8,
1904       .minMemoryMapAlignment = 4096, /* A page */
1905       .minTexelBufferOffsetAlignment = 4,
1906       .minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdevice),
1907       .minStorageBufferOffsetAlignment = 4,
1908       .minTexelOffset = -32,
1909       .maxTexelOffset = 31,
1910       .minTexelGatherOffset = -32,
1911       .maxTexelGatherOffset = 31,
1912       .minInterpolationOffset = -2,
1913       .maxInterpolationOffset = 2,
1914       .subPixelInterpolationOffsetBits = 8,
1915       .maxFramebufferWidth = MAX_FRAMEBUFFER_WIDTH,
1916       .maxFramebufferHeight = MAX_FRAMEBUFFER_HEIGHT,
1917       .maxFramebufferLayers = (1 << 10),
1918       .framebufferColorSampleCounts = sample_counts,
1919       .framebufferDepthSampleCounts = sample_counts,
1920       .framebufferStencilSampleCounts = sample_counts,
1921       .framebufferNoAttachmentsSampleCounts = sample_counts,
1922       .maxColorAttachments = MAX_RTS,
1923       .sampledImageColorSampleCounts = sample_counts,
1924       .sampledImageIntegerSampleCounts = sample_counts,
1925       .sampledImageDepthSampleCounts = sample_counts,
1926       .sampledImageStencilSampleCounts = sample_counts,
1927       .storageImageSampleCounts = sample_counts,
1928       .maxSampleMaskWords = 1,
1929       .timestampComputeAndGraphics = true,
1930       .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
1931       .maxClipDistances = 8,
1932       .maxCullDistances = 8,
1933       .maxCombinedClipAndCullDistances = 8,
1934       .discreteQueuePriorities = 2,
1935       .pointSizeRange = {0.0, 8191.875},
1936       .lineWidthRange = {0.0, 8191.875},
1937       .pointSizeGranularity = (1.0 / 8.0),
1938       .lineWidthGranularity = (1.0 / 8.0),
1939       .strictLines = false, /* FINISHME */
1940       .standardSampleLocations = true,
1941       .optimalBufferCopyOffsetAlignment = 1,
1942       .optimalBufferCopyRowPitchAlignment = 1,
1943       .nonCoherentAtomSize = 64,
1944    };
1945
1946    VkPhysicalDeviceType device_type;
1947
1948    if (pdevice->rad_info.has_dedicated_vram || pdevice->instance->report_apu_as_dgpu) {
1949       device_type = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
1950    } else {
1951       device_type = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1952    }
1953
1954    *pProperties = (VkPhysicalDeviceProperties){
1955       .apiVersion = RADV_API_VERSION,
1956       .driverVersion = vk_get_driver_version(),
1957       .vendorID = ATI_VENDOR_ID,
1958       .deviceID = pdevice->rad_info.pci_id,
1959       .deviceType = device_type,
1960       .limits = limits,
1961       .sparseProperties =
1962          {
1963             .residencyNonResidentStrict = pdevice->rad_info.family >= CHIP_POLARIS10,
1964             .residencyStandard2DBlockShape = pdevice->rad_info.family >= CHIP_POLARIS10,
1965          },
1966    };
1967
1968    strcpy(pProperties->deviceName, pdevice->marketing_name);
1969    memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1970 }
1971
1972 static void
1973 radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice,
1974                                         VkPhysicalDeviceVulkan11Properties *p)
1975 {
1976    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
1977
1978    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1979    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1980    memset(p->deviceLUID, 0, VK_LUID_SIZE);
1981    /* The LUID is for Windows. */
1982    p->deviceLUIDValid = false;
1983    p->deviceNodeMask = 0;
1984
1985    p->subgroupSize = RADV_SUBGROUP_SIZE;
1986    p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT;
1987    p->subgroupSupportedOperations =
1988       VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
1989       VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
1990       VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
1991       VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1992    p->subgroupQuadOperationsInAllStages = true;
1993
1994    p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1995    p->maxMultiviewViewCount = MAX_VIEWS;
1996    p->maxMultiviewInstanceIndex = INT_MAX;
1997    p->protectedNoFault = false;
1998    p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
1999    p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
2000 }
2001
2002 static void
2003 radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
2004                                         VkPhysicalDeviceVulkan12Properties *p)
2005 {
2006    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
2007
2008    p->driverID = VK_DRIVER_ID_MESA_RADV;
2009    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
2010    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 "%s",
2011             radv_get_compiler_string(pdevice));
2012
2013    if (radv_is_conformant(pdevice)) {
2014       if (pdevice->rad_info.gfx_level >= GFX10_3) {
2015          p->conformanceVersion = (VkConformanceVersion){
2016             .major = 1,
2017             .minor = 3,
2018             .subminor = 0,
2019             .patch = 0,
2020          };
2021       } else {
2022          p->conformanceVersion = (VkConformanceVersion){
2023             .major = 1,
2024             .minor = 2,
2025             .subminor = 7,
2026             .patch = 1,
2027          };
2028       }
2029    } else {
2030       p->conformanceVersion = (VkConformanceVersion){
2031          .major = 0,
2032          .minor = 0,
2033          .subminor = 0,
2034          .patch = 0,
2035       };
2036    }
2037
2038    /* On AMD hardware, denormals and rounding modes for fp16/fp64 are
2039     * controlled by the same config register.
2040     */
2041    if (pdevice->rad_info.has_packed_math_16bit) {
2042       p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
2043       p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
2044    } else {
2045       p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
2046       p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
2047    }
2048
2049    /* With LLVM, do not allow both preserving and flushing denorms because
2050     * different shaders in the same pipeline can have different settings and
2051     * this won't work for merged shaders. To make it work, this requires LLVM
2052     * support for changing the register. The same logic applies for the
2053     * rounding modes because they are configured with the same config
2054     * register.
2055     */
2056    p->shaderDenormFlushToZeroFloat32 = true;
2057    p->shaderDenormPreserveFloat32 = !pdevice->use_llvm;
2058    p->shaderRoundingModeRTEFloat32 = true;
2059    p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;
2060    p->shaderSignedZeroInfNanPreserveFloat32 = true;
2061
2062    p->shaderDenormFlushToZeroFloat16 =
2063       pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
2064    p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
2065    p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
2066    p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
2067    p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
2068
2069    p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.gfx_level >= GFX8 && !pdevice->use_llvm;
2070    p->shaderDenormPreserveFloat64 = pdevice->rad_info.gfx_level >= GFX8;
2071    p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.gfx_level >= GFX8;
2072    p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.gfx_level >= GFX8 && !pdevice->use_llvm;
2073    p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.gfx_level >= GFX8;
2074
2075    p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
2076    p->shaderUniformBufferArrayNonUniformIndexingNative = false;
2077    p->shaderSampledImageArrayNonUniformIndexingNative = false;
2078    p->shaderStorageBufferArrayNonUniformIndexingNative = false;
2079    p->shaderStorageImageArrayNonUniformIndexingNative = false;
2080    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
2081    p->robustBufferAccessUpdateAfterBind = true;
2082    p->quadDivergentImplicitLod = false;
2083
2084    size_t max_descriptor_set_size =
2085       ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
2086        MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
2087       (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
2088        32 /* storage buffer, 32 due to potential space wasted on alignment */ +
2089        32 /* sampler, largest when combined with image */ + 64 /* sampled image */ +
2090        64 /* storage image */);
2091    p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
2092    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
2093    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
2094    p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
2095    p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
2096    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
2097    p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
2098    p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
2099    p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
2100    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
2101    p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
2102    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
2103    p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
2104    p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
2105    p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
2106
2107    /* We support all of the depth resolve modes */
2108    p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
2109                                    VK_RESOLVE_MODE_AVERAGE_BIT | VK_RESOLVE_MODE_MIN_BIT |
2110                                    VK_RESOLVE_MODE_MAX_BIT;
2111
2112    /* Average doesn't make sense for stencil so we don't support that */
2113    p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
2114                                      VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT;
2115
2116    p->independentResolveNone = true;
2117    p->independentResolve = true;
2118
2119    /* GFX6-8 only support single channel min/max filter. */
2120    p->filterMinmaxImageComponentMapping = pdevice->rad_info.gfx_level >= GFX9;
2121    p->filterMinmaxSingleComponentFormats = true;
2122
2123    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
2124
2125    p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
2126 }
2127
2128 static void
2129 radv_get_physical_device_properties_1_3(struct radv_physical_device *pdevice,
2130                                         VkPhysicalDeviceVulkan13Properties *p)
2131 {
2132    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES);
2133
2134    p->minSubgroupSize = 64;
2135    p->maxSubgroupSize = 64;
2136    p->maxComputeWorkgroupSubgroups = UINT32_MAX;
2137    p->requiredSubgroupSizeStages = 0;
2138    if (pdevice->rad_info.gfx_level >= GFX10) {
2139       /* Only GFX10+ supports wave32. */
2140       p->minSubgroupSize = 32;
2141       p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
2142    }
2143
2144    p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
2145    p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
2146    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
2147    p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
2148    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
2149    p->maxInlineUniformTotalSize = UINT16_MAX;
2150
2151    bool accel = pdevice->rad_info.has_accelerated_dot_product;
2152    p->integerDotProduct8BitUnsignedAccelerated = accel;
2153    p->integerDotProduct8BitSignedAccelerated = accel;
2154    p->integerDotProduct8BitMixedSignednessAccelerated = false;
2155    p->integerDotProduct4x8BitPackedUnsignedAccelerated = accel;
2156    p->integerDotProduct4x8BitPackedSignedAccelerated = accel;
2157    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = false;
2158    p->integerDotProduct16BitUnsignedAccelerated = accel;
2159    p->integerDotProduct16BitSignedAccelerated = accel;
2160    p->integerDotProduct16BitMixedSignednessAccelerated = false;
2161    p->integerDotProduct32BitUnsignedAccelerated = false;
2162    p->integerDotProduct32BitSignedAccelerated = false;
2163    p->integerDotProduct32BitMixedSignednessAccelerated = false;
2164    p->integerDotProduct64BitUnsignedAccelerated = false;
2165    p->integerDotProduct64BitSignedAccelerated = false;
2166    p->integerDotProduct64BitMixedSignednessAccelerated = false;
2167    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = accel;
2168    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = accel;
2169    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
2170    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = accel;
2171    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = accel;
2172    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false;
2173    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = accel;
2174    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = accel;
2175    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
2176    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
2177    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
2178    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
2179    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
2180    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
2181    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
2182
2183    p->storageTexelBufferOffsetAlignmentBytes = 4;
2184    p->storageTexelBufferOffsetSingleTexelAlignment = true;
2185    p->uniformTexelBufferOffsetAlignmentBytes = 4;
2186    p->uniformTexelBufferOffsetSingleTexelAlignment = true;
2187
2188    p->maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
2189 }
2190
2191 VKAPI_ATTR void VKAPI_CALL
2192 radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
2193                                   VkPhysicalDeviceProperties2 *pProperties)
2194 {
2195    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2196    radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
2197
2198    VkPhysicalDeviceVulkan11Properties core_1_1 = {
2199       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
2200    };
2201    radv_get_physical_device_properties_1_1(pdevice, &core_1_1);
2202
2203    VkPhysicalDeviceVulkan12Properties core_1_2 = {
2204       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
2205    };
2206    radv_get_physical_device_properties_1_2(pdevice, &core_1_2);
2207
2208    VkPhysicalDeviceVulkan13Properties core_1_3 = {
2209       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
2210    };
2211    radv_get_physical_device_properties_1_3(pdevice, &core_1_3);
2212
2213    vk_foreach_struct(ext, pProperties->pNext)
2214    {
2215       if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
2216          continue;
2217       if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
2218          continue;
2219       if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
2220          continue;
2221
2222       switch (ext->sType) {
2223       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
2224          VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
2225             (VkPhysicalDevicePushDescriptorPropertiesKHR *)ext;
2226          properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
2227          break;
2228       }
2229       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
2230          VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
2231             (VkPhysicalDeviceDiscardRectanglePropertiesEXT *)ext;
2232          properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
2233          break;
2234       }
2235       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
2236          VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
2237             (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *)ext;
2238          properties->minImportedHostPointerAlignment = 4096;
2239          break;
2240       }
2241       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
2242          VkPhysicalDeviceShaderCorePropertiesAMD *properties =
2243             (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
2244
2245          /* Shader engines. */
2246          properties->shaderEngineCount = pdevice->rad_info.max_se;
2247          properties->shaderArraysPerEngineCount = pdevice->rad_info.max_sa_per_se;
2248          properties->computeUnitsPerShaderArray = pdevice->rad_info.min_good_cu_per_sa;
2249          properties->simdPerComputeUnit = pdevice->rad_info.num_simd_per_compute_unit;
2250          properties->wavefrontsPerSimd = pdevice->rad_info.max_wave64_per_simd;
2251          properties->wavefrontSize = 64;
2252
2253          /* SGPR. */
2254          properties->sgprsPerSimd = pdevice->rad_info.num_physical_sgprs_per_simd;
2255          properties->minSgprAllocation = pdevice->rad_info.min_sgpr_alloc;
2256          properties->maxSgprAllocation = pdevice->rad_info.max_sgpr_alloc;
2257          properties->sgprAllocationGranularity = pdevice->rad_info.sgpr_alloc_granularity;
2258
2259          /* VGPR. */
2260          properties->vgprsPerSimd = pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
2261          properties->minVgprAllocation = pdevice->rad_info.min_wave64_vgpr_alloc;
2262          properties->maxVgprAllocation = pdevice->rad_info.max_vgpr_alloc;
2263          properties->vgprAllocationGranularity = pdevice->rad_info.wave64_vgpr_alloc_granularity;
2264          break;
2265       }
2266       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
2267          VkPhysicalDeviceShaderCoreProperties2AMD *properties =
2268             (VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
2269
2270          properties->shaderCoreFeatures = 0;
2271          properties->activeComputeUnitCount = pdevice->rad_info.num_good_compute_units;
2272          break;
2273       }
2274       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
2275          VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
2276             (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
2277          properties->maxVertexAttribDivisor = UINT32_MAX;
2278          break;
2279       }
2280       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
2281          VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
2282             (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
2283          properties->primitiveOverestimationSize = 0;
2284          properties->maxExtraPrimitiveOverestimationSize = 0;
2285          properties->extraPrimitiveOverestimationSizeGranularity = 0;
2286          properties->primitiveUnderestimation = false;
2287          properties->conservativePointAndLineRasterization = false;
2288          properties->degenerateTrianglesRasterized = true;
2289          properties->degenerateLinesRasterized = false;
2290          properties->fullyCoveredFragmentShaderInputVariable = false;
2291          properties->conservativeRasterizationPostDepthCoverage = false;
2292          break;
2293       }
2294 #ifndef _WIN32
2295       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
2296          VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
2297             (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
2298          properties->pciDomain = pdevice->bus_info.domain;
2299          properties->pciBus = pdevice->bus_info.bus;
2300          properties->pciDevice = pdevice->bus_info.dev;
2301          properties->pciFunction = pdevice->bus_info.func;
2302          break;
2303       }
2304 #endif
2305       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
2306          VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
2307             (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
2308          properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
2309          properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
2310          properties->maxTransformFeedbackBufferSize = UINT32_MAX;
2311          properties->maxTransformFeedbackStreamDataSize = 512;
2312          properties->maxTransformFeedbackBufferDataSize = 512;
2313          properties->maxTransformFeedbackBufferDataStride = 512;
2314          properties->transformFeedbackQueries = !pdevice->use_ngg_streamout;
2315          properties->transformFeedbackStreamsLinesTriangles = !pdevice->use_ngg_streamout;
2316          properties->transformFeedbackRasterizationStreamSelect = false;
2317          properties->transformFeedbackDraw = true;
2318          break;
2319       }
2320       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
2321          VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
2322             (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
2323          properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT |
2324                                                   VK_SAMPLE_COUNT_8_BIT;
2325          properties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
2326          properties->sampleLocationCoordinateRange[0] = 0.0f;
2327          properties->sampleLocationCoordinateRange[1] = 0.9375f;
2328          properties->sampleLocationSubPixelBits = 4;
2329          properties->variableSampleLocations = false;
2330          break;
2331       }
2332       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
2333          VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
2334             (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
2335          props->lineSubPixelPrecisionBits = 4;
2336          break;
2337       }
2338       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
2339          VkPhysicalDeviceRobustness2PropertiesEXT *properties =
2340             (VkPhysicalDeviceRobustness2PropertiesEXT *)ext;
2341          properties->robustStorageBufferAccessSizeAlignment = 4;
2342          properties->robustUniformBufferAccessSizeAlignment = 4;
2343          break;
2344       }
2345       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
2346          VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
2347             (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
2348          props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
2349          break;
2350       }
2351       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
2352          VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
2353             (VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
2354          props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
2355          props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
2356          props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
2357          props->primitiveFragmentShadingRateWithMultipleViewports = true;
2358          props->layeredShadingRateAttachments = false; /* TODO */
2359          props->fragmentShadingRateNonTrivialCombinerOps = true;
2360          props->maxFragmentSize = (VkExtent2D){2, 2};
2361          props->maxFragmentSizeAspectRatio = 2;
2362          props->maxFragmentShadingRateCoverageSamples = 32;
2363          props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT;
2364          props->fragmentShadingRateWithShaderDepthStencilWrites = false;
2365          props->fragmentShadingRateWithSampleMask = true;
2366          props->fragmentShadingRateWithShaderSampleMask = false;
2367          props->fragmentShadingRateWithConservativeRasterization = true;
2368          props->fragmentShadingRateWithFragmentShaderInterlock = false;
2369          props->fragmentShadingRateWithCustomSampleLocations = false;
2370          props->fragmentShadingRateStrictMultiplyCombiner = true;
2371          break;
2372       }
2373       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
2374          VkPhysicalDeviceProvokingVertexPropertiesEXT *props =
2375             (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
2376          props->provokingVertexModePerPipeline = true;
2377          props->transformFeedbackPreservesTriangleFanProvokingVertex = true;
2378          break;
2379       }
2380       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR: {
2381          VkPhysicalDeviceAccelerationStructurePropertiesKHR *props =
2382             (VkPhysicalDeviceAccelerationStructurePropertiesKHR *)ext;
2383          props->maxGeometryCount = (1 << 24) - 1;
2384          props->maxInstanceCount = (1 << 24) - 1;
2385          props->maxPrimitiveCount = (1 << 29) - 1;
2386          props->maxPerStageDescriptorAccelerationStructures =
2387             pProperties->properties.limits.maxPerStageDescriptorStorageBuffers;
2388          props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures =
2389             pProperties->properties.limits.maxPerStageDescriptorStorageBuffers;
2390          props->maxDescriptorSetAccelerationStructures =
2391             pProperties->properties.limits.maxDescriptorSetStorageBuffers;
2392          props->maxDescriptorSetUpdateAfterBindAccelerationStructures =
2393             pProperties->properties.limits.maxDescriptorSetStorageBuffers;
2394          props->minAccelerationStructureScratchOffsetAlignment = 128;
2395          break;
2396       }
2397 #ifndef _WIN32
2398       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
2399          VkPhysicalDeviceDrmPropertiesEXT *props = (VkPhysicalDeviceDrmPropertiesEXT *)ext;
2400          if (pdevice->available_nodes & (1 << DRM_NODE_PRIMARY)) {
2401             props->hasPrimary = true;
2402             props->primaryMajor = (int64_t)major(pdevice->primary_devid);
2403             props->primaryMinor = (int64_t)minor(pdevice->primary_devid);
2404          } else {
2405             props->hasPrimary = false;
2406          }
2407          if (pdevice->available_nodes & (1 << DRM_NODE_RENDER)) {
2408             props->hasRender = true;
2409             props->renderMajor = (int64_t)major(pdevice->render_devid);
2410             props->renderMinor = (int64_t)minor(pdevice->render_devid);
2411          } else {
2412             props->hasRender = false;
2413          }
2414          break;
2415       }
2416 #endif
2417       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
2418          VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
2419          props->maxMultiDrawCount = 2048;
2420          break;
2421       }
2422       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR: {
2423          VkPhysicalDeviceRayTracingPipelinePropertiesKHR *props =
2424             (VkPhysicalDeviceRayTracingPipelinePropertiesKHR *)ext;
2425          props->shaderGroupHandleSize = RADV_RT_HANDLE_SIZE;
2426          props->maxRayRecursionDepth = 31;    /* Minimum allowed for DXR. */
2427          props->maxShaderGroupStride = 16384; /* dummy */
2428          props->shaderGroupBaseAlignment = 16;
2429          props->shaderGroupHandleCaptureReplaySize = 16;
2430          props->maxRayDispatchInvocationCount = 1024 * 1024 * 64;
2431          props->shaderGroupHandleAlignment = 16;
2432          props->maxRayHitAttributeSize = RADV_MAX_HIT_ATTRIB_SIZE;
2433          break;
2434       }
2435       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES: {
2436          VkPhysicalDeviceMaintenance4Properties *properties =
2437             (VkPhysicalDeviceMaintenance4Properties *)ext;
2438          properties->maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
2439          break;
2440       }
2441       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV: {
2442          VkPhysicalDeviceMeshShaderPropertiesNV *properties =
2443             (VkPhysicalDeviceMeshShaderPropertiesNV *)ext;
2444
2445          /* Task shader limitations:
2446           * Same as compute, because TS are compiled to CS.
2447           */
2448          properties->maxDrawMeshTasksCount = 65535;
2449          properties->maxTaskTotalMemorySize = 65536;
2450          properties->maxTaskWorkGroupInvocations = 1024;
2451          properties->maxTaskWorkGroupSize[0] = 1024;
2452          properties->maxTaskWorkGroupSize[1] = 1024;
2453          properties->maxTaskWorkGroupSize[2] = 1024;
2454          properties->maxTaskOutputCount = 65535;
2455
2456          /* Mesh shader limitations:
2457           * Same as NGG, because MS are compiled to NGG.
2458           */
2459          properties->maxMeshMultiviewViewCount = MAX_VIEWS;
2460          properties->maxMeshOutputPrimitives = 256;
2461          properties->maxMeshOutputVertices = 256;
2462          properties->maxMeshTotalMemorySize = 31 * 1024; /* Reserve 1K for prim indices, etc. */
2463          properties->maxMeshWorkGroupInvocations = 256;
2464          properties->maxMeshWorkGroupSize[0] = 256;
2465          properties->maxMeshWorkGroupSize[1] = 256;
2466          properties->maxMeshWorkGroupSize[2] = 256;
2467          properties->meshOutputPerPrimitiveGranularity = 1;
2468          properties->meshOutputPerVertexGranularity = 1;
2469
2470          break;
2471       }
2472       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT: {
2473          VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *properties =
2474             (VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext;
2475          STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
2476                        sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
2477          memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
2478                 vk_shaderModuleIdentifierAlgorithmUUID,
2479                 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
2480          break;
2481       }
2482       default:
2483          break;
2484       }
2485    }
2486 }
2487
2488 static void
2489 radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice,
2490                                                  uint32_t *pCount,
2491                                                  VkQueueFamilyProperties **pQueueFamilyProperties)
2492 {
2493    int num_queue_families = 1;
2494    int idx;
2495    if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 &&
2496        !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
2497       num_queue_families++;
2498
2499    if (pQueueFamilyProperties == NULL) {
2500       *pCount = num_queue_families;
2501       return;
2502    }
2503
2504    if (!*pCount)
2505       return;
2506
2507    idx = 0;
2508    if (*pCount >= 1) {
2509       *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2510          .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT |
2511                        VK_QUEUE_SPARSE_BINDING_BIT,
2512          .queueCount = 1,
2513          .timestampValidBits = 64,
2514          .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2515       };
2516       idx++;
2517    }
2518
2519    if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 &&
2520        !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
2521       if (*pCount > idx) {
2522          *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2523             .queueFlags =
2524                VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
2525             .queueCount = pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues,
2526             .timestampValidBits = 64,
2527             .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2528          };
2529          idx++;
2530       }
2531    }
2532    *pCount = idx;
2533 }
2534
2535 static const VkQueueGlobalPriorityEXT radv_global_queue_priorities[] = {
2536    VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT,
2537    VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT,
2538    VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT,
2539    VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT,
2540 };
2541
2542 VKAPI_ATTR void VKAPI_CALL
2543 radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pCount,
2544                                              VkQueueFamilyProperties2 *pQueueFamilyProperties)
2545 {
2546    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2547    if (!pQueueFamilyProperties) {
2548       radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2549       return;
2550    }
2551    VkQueueFamilyProperties *properties[] = {
2552       &pQueueFamilyProperties[0].queueFamilyProperties,
2553       &pQueueFamilyProperties[1].queueFamilyProperties,
2554       &pQueueFamilyProperties[2].queueFamilyProperties,
2555    };
2556    radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2557    assert(*pCount <= 3);
2558
2559    for (uint32_t i = 0; i < *pCount; i++) {
2560       vk_foreach_struct(ext, pQueueFamilyProperties[i].pNext)
2561       {
2562          switch (ext->sType) {
2563          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_EXT: {
2564             VkQueueFamilyGlobalPriorityPropertiesEXT *prop =
2565                (VkQueueFamilyGlobalPriorityPropertiesEXT *)ext;
2566             STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <= VK_MAX_GLOBAL_PRIORITY_SIZE_EXT);
2567             prop->priorityCount = ARRAY_SIZE(radv_global_queue_priorities);
2568             memcpy(&prop->priorities, radv_global_queue_priorities, sizeof(radv_global_queue_priorities));
2569             break;
2570          }
2571          default:
2572             break;
2573          }
2574       }
2575    }
2576 }
2577
2578 static void
2579 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
2580                                   VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2581 {
2582    RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2583    VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
2584
2585    /* For all memory heaps, the computation of budget is as follow:
2586     *   heap_budget = heap_size - global_heap_usage + app_heap_usage
2587     *
2588     * The Vulkan spec 1.1.97 says that the budget should include any
2589     * currently allocated device memory.
2590     *
2591     * Note that the application heap usages are not really accurate (eg.
2592     * in presence of shared buffers).
2593     */
2594    if (!device->rad_info.has_dedicated_vram) {
2595       /* On APUs, the driver exposes fake heaps to the application because usually the carveout is
2596        * too small for games but the budgets need to be redistributed accordingly.
2597        */
2598
2599       assert(device->heaps == (RADV_HEAP_GTT | RADV_HEAP_VRAM_VIS));
2600       assert(device->memory_properties.memoryHeaps[0].flags == 0); /* GTT */
2601       assert(device->memory_properties.memoryHeaps[1].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
2602       uint8_t gtt_heap_idx = 0, vram_vis_heap_idx = 1;
2603
2604       /* Get the visible VRAM/GTT heap sizes and internal usages. */
2605       uint64_t gtt_heap_size = device->memory_properties.memoryHeaps[gtt_heap_idx].size;
2606       uint64_t vram_vis_heap_size = device->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
2607
2608       uint64_t vram_vis_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) +
2609                                          device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2610       uint64_t gtt_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
2611
2612       /* Compute the total heap size, internal and system usage. */
2613       uint64_t total_heap_size = vram_vis_heap_size + gtt_heap_size;
2614       uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
2615       uint64_t total_system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
2616                                     device->ws->query_value(device->ws, RADEON_GTT_USAGE);
2617
2618       uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
2619
2620       /* Compute the total free space that can be allocated for this process accross all heaps. */
2621       uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
2622
2623       /* Compute the remaining visible VRAM size for this process. */
2624       uint64_t vram_vis_free_space = vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage);
2625
2626       /* Distribute the total free space (2/3rd as VRAM and 1/3rd as GTT) to match the heap sizes,
2627        * and align down to the page size to be conservative.
2628        */
2629       vram_vis_free_space = ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space),
2630                                           device->rad_info.gart_page_size);
2631       uint64_t gtt_free_space = total_free_space - vram_vis_free_space;
2632
2633       memoryBudget->heapBudget[vram_vis_heap_idx] = vram_vis_free_space + vram_vis_internal_usage;
2634       memoryBudget->heapUsage[vram_vis_heap_idx] = vram_vis_internal_usage;
2635       memoryBudget->heapBudget[gtt_heap_idx] = gtt_free_space + gtt_internal_usage;
2636       memoryBudget->heapUsage[gtt_heap_idx] = gtt_internal_usage;
2637    } else {
2638       unsigned mask = device->heaps;
2639       unsigned heap = 0;
2640       while (mask) {
2641          uint64_t internal_usage = 0, system_usage = 0;
2642          unsigned type = 1u << u_bit_scan(&mask);
2643
2644          switch (type) {
2645          case RADV_HEAP_VRAM:
2646             internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2647             system_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE);
2648             break;
2649          case RADV_HEAP_VRAM_VIS:
2650             internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS);
2651             if (!(device->heaps & RADV_HEAP_VRAM))
2652                internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2653             system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE);
2654             break;
2655          case RADV_HEAP_GTT:
2656             internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
2657             system_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE);
2658             break;
2659          }
2660
2661          uint64_t total_usage = MAX2(internal_usage, system_usage);
2662
2663          uint64_t free_space = device->memory_properties.memoryHeaps[heap].size -
2664                                MIN2(device->memory_properties.memoryHeaps[heap].size, total_usage);
2665          memoryBudget->heapBudget[heap] = free_space + internal_usage;
2666          memoryBudget->heapUsage[heap] = internal_usage;
2667          ++heap;
2668       }
2669
2670       assert(heap == memory_properties->memoryHeapCount);
2671    }
2672
2673    /* The heapBudget and heapUsage values must be zero for array elements
2674     * greater than or equal to
2675     * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
2676     */
2677    for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
2678       memoryBudget->heapBudget[i] = 0;
2679       memoryBudget->heapUsage[i] = 0;
2680    }
2681 }
2682
2683 VKAPI_ATTR void VKAPI_CALL
2684 radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
2685                                         VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
2686 {
2687    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2688
2689    pMemoryProperties->memoryProperties = pdevice->memory_properties;
2690
2691    VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
2692       vk_find_struct(pMemoryProperties->pNext, PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
2693    if (memory_budget)
2694       radv_get_memory_budget_properties(physicalDevice, memory_budget);
2695 }
2696
2697 VKAPI_ATTR VkResult VKAPI_CALL
2698 radv_GetMemoryHostPointerPropertiesEXT(
2699    VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, const void *pHostPointer,
2700    VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
2701 {
2702    RADV_FROM_HANDLE(radv_device, device, _device);
2703
2704    switch (handleType) {
2705    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
2706       const struct radv_physical_device *physical_device = device->physical_device;
2707       uint32_t memoryTypeBits = 0;
2708       for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
2709          if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&
2710              !(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
2711             memoryTypeBits = (1 << i);
2712             break;
2713          }
2714       }
2715       pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
2716       return VK_SUCCESS;
2717    }
2718    default:
2719       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
2720    }
2721 }
2722
2723 static enum radeon_ctx_priority
2724 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
2725 {
2726    /* Default to MEDIUM when a specific global priority isn't requested */
2727    if (!pObj)
2728       return RADEON_CTX_PRIORITY_MEDIUM;
2729
2730    switch (pObj->globalPriority) {
2731    case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
2732       return RADEON_CTX_PRIORITY_REALTIME;
2733    case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
2734       return RADEON_CTX_PRIORITY_HIGH;
2735    case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
2736       return RADEON_CTX_PRIORITY_MEDIUM;
2737    case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
2738       return RADEON_CTX_PRIORITY_LOW;
2739    default:
2740       unreachable("Illegal global priority value");
2741       return RADEON_CTX_PRIORITY_INVALID;
2742    }
2743 }
2744
2745 int
2746 radv_queue_init(struct radv_device *device, struct radv_queue *queue, int idx,
2747                 const VkDeviceQueueCreateInfo *create_info,
2748                 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
2749 {
2750    queue->device = device;
2751    queue->priority = radv_get_queue_global_priority(global_priority);
2752    queue->hw_ctx = device->hw_ctx[queue->priority];
2753    queue->state.qf = vk_queue_to_radv(device->physical_device, create_info->queueFamilyIndex);
2754
2755    VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
2756    if (result != VK_SUCCESS)
2757       return result;
2758
2759    queue->vk.driver_submit = radv_queue_submit;
2760
2761    return VK_SUCCESS;
2762 }
2763
2764 static void
2765 radv_queue_state_finish(struct radv_queue_state *queue, struct radeon_winsys *ws)
2766 {
2767    if (queue->initial_full_flush_preamble_cs)
2768       ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2769    if (queue->initial_preamble_cs)
2770       ws->cs_destroy(queue->initial_preamble_cs);
2771    if (queue->continue_preamble_cs)
2772       ws->cs_destroy(queue->continue_preamble_cs);
2773    if (queue->descriptor_bo)
2774       ws->buffer_destroy(ws, queue->descriptor_bo);
2775    if (queue->scratch_bo)
2776       ws->buffer_destroy(ws, queue->scratch_bo);
2777    if (queue->esgs_ring_bo)
2778       ws->buffer_destroy(ws, queue->esgs_ring_bo);
2779    if (queue->gsvs_ring_bo)
2780       ws->buffer_destroy(ws, queue->gsvs_ring_bo);
2781    if (queue->tess_rings_bo)
2782       ws->buffer_destroy(ws, queue->tess_rings_bo);
2783    if (queue->task_rings_bo)
2784       ws->buffer_destroy(ws, queue->task_rings_bo);
2785    if (queue->gds_bo)
2786       ws->buffer_destroy(ws, queue->gds_bo);
2787    if (queue->gds_oa_bo)
2788       ws->buffer_destroy(ws, queue->gds_oa_bo);
2789    if (queue->compute_scratch_bo)
2790       ws->buffer_destroy(ws, queue->compute_scratch_bo);
2791 }
2792
2793 static void
2794 radv_queue_finish(struct radv_queue *queue)
2795 {
2796    radv_queue_state_finish(&queue->state, queue->device->ws);
2797    vk_queue_finish(&queue->vk);
2798 }
2799
2800 static VkResult
2801 radv_device_init_border_color(struct radv_device *device)
2802 {
2803    VkResult result;
2804
2805    result = device->ws->buffer_create(
2806       device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
2807       RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING,
2808       RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo);
2809
2810    if (result != VK_SUCCESS)
2811       return vk_error(device, result);
2812
2813    result = device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, true);
2814    if (result != VK_SUCCESS)
2815       return vk_error(device, result);
2816
2817    device->border_color_data.colors_gpu_ptr = device->ws->buffer_map(device->border_color_data.bo);
2818    if (!device->border_color_data.colors_gpu_ptr)
2819       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2820    mtx_init(&device->border_color_data.mutex, mtx_plain);
2821
2822    return VK_SUCCESS;
2823 }
2824
2825 static void
2826 radv_device_finish_border_color(struct radv_device *device)
2827 {
2828    if (device->border_color_data.bo) {
2829       device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, false);
2830       device->ws->buffer_destroy(device->ws, device->border_color_data.bo);
2831
2832       mtx_destroy(&device->border_color_data.mutex);
2833    }
2834 }
2835
2836 static VkResult
2837 radv_device_init_vs_prologs(struct radv_device *device)
2838 {
2839    u_rwlock_init(&device->vs_prologs_lock);
2840    device->vs_prologs = _mesa_hash_table_create(NULL, &radv_hash_vs_prolog, &radv_cmp_vs_prolog);
2841    if (!device->vs_prologs)
2842       return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2843
2844    /* don't pre-compile prologs if we want to print them */
2845    if (device->instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS)
2846       return VK_SUCCESS;
2847
2848    struct radv_vs_input_state state;
2849    state.nontrivial_divisors = 0;
2850    memset(state.offsets, 0, sizeof(state.offsets));
2851    state.alpha_adjust_lo = 0;
2852    state.alpha_adjust_hi = 0;
2853    memset(state.formats, 0, sizeof(state.formats));
2854
2855    struct radv_vs_prolog_key key;
2856    key.state = &state;
2857    key.misaligned_mask = 0;
2858    key.as_ls = false;
2859    key.is_ngg = device->physical_device->use_ngg;
2860    key.next_stage = MESA_SHADER_VERTEX;
2861    key.wave32 = device->physical_device->ge_wave_size == 32;
2862
2863    for (unsigned i = 1; i <= MAX_VERTEX_ATTRIBS; i++) {
2864       state.attribute_mask = BITFIELD_MASK(i);
2865       state.instance_rate_inputs = 0;
2866
2867       key.num_attributes = i;
2868
2869       device->simple_vs_prologs[i - 1] = radv_create_vs_prolog(device, &key);
2870       if (!device->simple_vs_prologs[i - 1])
2871          return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2872    }
2873
2874    unsigned idx = 0;
2875    for (unsigned num_attributes = 1; num_attributes <= 16; num_attributes++) {
2876       state.attribute_mask = BITFIELD_MASK(num_attributes);
2877
2878       for (unsigned i = 0; i < num_attributes; i++)
2879          state.divisors[i] = 1;
2880
2881       for (unsigned count = 1; count <= num_attributes; count++) {
2882          for (unsigned start = 0; start <= (num_attributes - count); start++) {
2883             state.instance_rate_inputs = u_bit_consecutive(start, count);
2884
2885             key.num_attributes = num_attributes;
2886
2887             struct radv_shader_part *prolog = radv_create_vs_prolog(device, &key);
2888             if (!prolog)
2889                return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2890
2891             assert(idx ==
2892                    radv_instance_rate_prolog_index(num_attributes, state.instance_rate_inputs));
2893             device->instance_rate_vs_prologs[idx++] = prolog;
2894          }
2895       }
2896    }
2897    assert(idx == ARRAY_SIZE(device->instance_rate_vs_prologs));
2898
2899    return VK_SUCCESS;
2900 }
2901
2902 static void
2903 radv_device_finish_vs_prologs(struct radv_device *device)
2904 {
2905    if (device->vs_prologs) {
2906       hash_table_foreach(device->vs_prologs, entry)
2907       {
2908          free((void *)entry->key);
2909          radv_shader_part_destroy(device, entry->data);
2910       }
2911       _mesa_hash_table_destroy(device->vs_prologs, NULL);
2912    }
2913
2914    for (unsigned i = 0; i < ARRAY_SIZE(device->simple_vs_prologs); i++)
2915       radv_shader_part_destroy(device, device->simple_vs_prologs[i]);
2916
2917    for (unsigned i = 0; i < ARRAY_SIZE(device->instance_rate_vs_prologs); i++)
2918       radv_shader_part_destroy(device, device->instance_rate_vs_prologs[i]);
2919 }
2920
2921 VkResult
2922 radv_device_init_vrs_state(struct radv_device *device)
2923 {
2924    /* FIXME: 4k depth buffers should be large enough for now but we might want to adjust this
2925     * dynamically at some point.
2926     */
2927    uint32_t width = 4096, height = 4096;
2928    VkDeviceMemory mem;
2929    VkBuffer buffer;
2930    VkResult result;
2931    VkImage image;
2932
2933    VkImageCreateInfo image_create_info = {
2934       .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2935       .imageType = VK_IMAGE_TYPE_2D,
2936       .format = VK_FORMAT_D16_UNORM,
2937       .extent = {width, height, 1},
2938       .mipLevels = 1,
2939       .arrayLayers = 1,
2940       .samples = VK_SAMPLE_COUNT_1_BIT,
2941       .tiling = VK_IMAGE_TILING_OPTIMAL,
2942       .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
2943       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2944       .queueFamilyIndexCount = 0,
2945       .pQueueFamilyIndices = NULL,
2946       .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2947    };
2948
2949    result = radv_CreateImage(radv_device_to_handle(device), &image_create_info,
2950                              &device->meta_state.alloc, &image);
2951    if (result != VK_SUCCESS)
2952       return result;
2953
2954    VkBufferCreateInfo buffer_create_info = {
2955       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
2956       .size = radv_image_from_handle(image)->planes[0].surface.meta_size,
2957       .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
2958       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2959    };
2960
2961    result = radv_CreateBuffer(radv_device_to_handle(device), &buffer_create_info,
2962                               &device->meta_state.alloc, &buffer);
2963    if (result != VK_SUCCESS)
2964       goto fail_create;
2965
2966    VkBufferMemoryRequirementsInfo2 info = {
2967       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
2968       .buffer = buffer,
2969    };
2970    VkMemoryRequirements2 mem_req = {
2971       .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2972    };
2973    radv_GetBufferMemoryRequirements2(radv_device_to_handle(device), &info, &mem_req);
2974
2975    VkMemoryAllocateInfo alloc_info = {
2976       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2977       .allocationSize = mem_req.memoryRequirements.size,
2978    };
2979
2980    result = radv_AllocateMemory(radv_device_to_handle(device), &alloc_info,
2981                                 &device->meta_state.alloc, &mem);
2982    if (result != VK_SUCCESS)
2983       goto fail_alloc;
2984
2985    VkBindBufferMemoryInfo bind_info = {
2986       .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
2987       .buffer = buffer,
2988       .memory = mem,
2989       .memoryOffset = 0
2990    };
2991
2992    result = radv_BindBufferMemory2(radv_device_to_handle(device), 1, &bind_info);
2993    if (result != VK_SUCCESS)
2994       goto fail_bind;
2995
2996    device->vrs.image = radv_image_from_handle(image);
2997    device->vrs.buffer = radv_buffer_from_handle(buffer);
2998    device->vrs.mem = radv_device_memory_from_handle(mem);
2999
3000    return VK_SUCCESS;
3001
3002 fail_bind:
3003    radv_FreeMemory(radv_device_to_handle(device), mem, &device->meta_state.alloc);
3004 fail_alloc:
3005    radv_DestroyBuffer(radv_device_to_handle(device), buffer, &device->meta_state.alloc);
3006 fail_create:
3007    radv_DestroyImage(radv_device_to_handle(device), image, &device->meta_state.alloc);
3008
3009    return result;
3010 }
3011
3012 static void
3013 radv_device_finish_vrs_image(struct radv_device *device)
3014 {
3015    if (!device->vrs.image)
3016       return;
3017
3018    radv_FreeMemory(radv_device_to_handle(device), radv_device_memory_to_handle(device->vrs.mem),
3019                    &device->meta_state.alloc);
3020    radv_DestroyBuffer(radv_device_to_handle(device), radv_buffer_to_handle(device->vrs.buffer),
3021                      &device->meta_state.alloc);
3022    radv_DestroyImage(radv_device_to_handle(device), radv_image_to_handle(device->vrs.image),
3023                      &device->meta_state.alloc);
3024 }
3025
3026 static enum radv_force_vrs
3027 radv_parse_vrs_rates(const char *str)
3028 {
3029    if (!strcmp(str, "2x2")) {
3030       return RADV_FORCE_VRS_2x2;
3031    } else if (!strcmp(str, "2x1")) {
3032       return RADV_FORCE_VRS_2x1;
3033    } else if (!strcmp(str, "1x2")) {
3034       return RADV_FORCE_VRS_1x2;
3035    } else if (!strcmp(str, "1x1")) {
3036       return RADV_FORCE_VRS_1x1;
3037    }
3038
3039    fprintf(stderr, "radv: Invalid VRS rates specified (valid values are 2x2, 2x1, 1x2 and 1x1)\n");
3040    return RADV_FORCE_VRS_1x1;
3041 }
3042
3043 static const char *
3044 radv_get_force_vrs_config_file(void)
3045 {
3046    return getenv("RADV_FORCE_VRS_CONFIG_FILE");
3047 }
3048
3049 static enum radv_force_vrs
3050 radv_parse_force_vrs_config_file(const char *config_file)
3051 {
3052    enum radv_force_vrs force_vrs = RADV_FORCE_VRS_1x1;
3053    char buf[4];
3054    FILE *f;
3055
3056    f = fopen(config_file, "r");
3057    if (!f) {
3058       fprintf(stderr, "radv: Can't open file: '%s'.\n", config_file);
3059       return force_vrs;
3060    }
3061
3062    if (fread(buf, sizeof(buf), 1, f) == 1) {
3063       buf[3] = '\0';
3064       force_vrs = radv_parse_vrs_rates(buf);
3065    }
3066
3067    fclose(f);
3068    return force_vrs;
3069 }
3070
3071 #ifdef __linux__
3072
3073 #define BUF_LEN ((10 * (sizeof(struct inotify_event) + NAME_MAX + 1)))
3074
3075 static int
3076 radv_notifier_thread_run(void *data)
3077 {
3078    struct radv_device *device = data;
3079    struct radv_notifier *notifier = &device->notifier;
3080    char buf[BUF_LEN];
3081
3082    while (!notifier->quit) {
3083       const char *file = radv_get_force_vrs_config_file();
3084       struct timespec tm = { .tv_nsec = 100000000 }; /* 1OOms */
3085       int length, i = 0;
3086
3087       length = read(notifier->fd, buf, BUF_LEN);
3088       while (i < length) {
3089          struct inotify_event *event = (struct inotify_event *)&buf[i];
3090
3091          i += sizeof(struct inotify_event) + event->len;
3092          if (event->mask & IN_MODIFY || event->mask & IN_DELETE_SELF) {
3093             /* Sleep 100ms for editors that use a temporary file and delete the original. */
3094             thrd_sleep(&tm, NULL);
3095             device->force_vrs = radv_parse_force_vrs_config_file(file);
3096
3097             fprintf(stderr, "radv: Updated the per-vertex VRS rate to '%d'.\n", device->force_vrs);
3098
3099             if (event->mask & IN_DELETE_SELF) {
3100                inotify_rm_watch(notifier->fd, notifier->watch);
3101                notifier->watch = inotify_add_watch(notifier->fd, file, IN_MODIFY | IN_DELETE_SELF);
3102             }
3103          }
3104       }
3105
3106       thrd_sleep(&tm, NULL);
3107    }
3108
3109    return 0;
3110 }
3111
3112 #endif
3113
3114 static int
3115 radv_device_init_notifier(struct radv_device *device)
3116 {
3117 #ifndef __linux__
3118    return true;
3119 #else
3120    struct radv_notifier *notifier = &device->notifier;
3121    const char *file = radv_get_force_vrs_config_file();
3122    int ret;
3123
3124    notifier->fd = inotify_init1(IN_NONBLOCK);
3125    if (notifier->fd < 0)
3126       return false;
3127
3128    notifier->watch = inotify_add_watch(notifier->fd, file, IN_MODIFY | IN_DELETE_SELF);
3129    if (notifier->watch < 0)
3130       goto fail_watch;
3131
3132    ret = thrd_create(&notifier->thread, radv_notifier_thread_run, device);
3133    if (ret)
3134       goto fail_thread;
3135
3136    return true;
3137
3138 fail_thread:
3139    inotify_rm_watch(notifier->fd, notifier->watch);
3140 fail_watch:
3141    close(notifier->fd);
3142
3143    return false;
3144 #endif
3145 }
3146
3147 static void
3148 radv_device_finish_notifier(struct radv_device *device)
3149 {
3150 #ifdef __linux__
3151    struct radv_notifier *notifier = &device->notifier;
3152
3153    if (!notifier->thread)
3154       return;
3155
3156    notifier->quit = true;
3157    thrd_join(notifier->thread, NULL);
3158    inotify_rm_watch(notifier->fd, notifier->watch);
3159    close(notifier->fd);
3160 #endif
3161 }
3162
3163 static void
3164 radv_device_finish_perf_counter_lock_cs(struct radv_device *device)
3165 {
3166    if (!device->perf_counter_lock_cs)
3167       return;
3168
3169    for (unsigned i = 0; i < 2 * PERF_CTR_MAX_PASSES; ++i) {
3170       if (device->perf_counter_lock_cs[i])
3171          device->ws->cs_destroy(device->perf_counter_lock_cs[i]);
3172    }
3173
3174    free(device->perf_counter_lock_cs);
3175 }
3176
3177 VKAPI_ATTR VkResult VKAPI_CALL
3178 radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo,
3179                   const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
3180 {
3181    RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
3182    VkResult result;
3183    struct radv_device *device;
3184
3185    bool keep_shader_info = false;
3186    bool robust_buffer_access = false;
3187    bool robust_buffer_access2 = false;
3188    bool overallocation_disallowed = false;
3189    bool custom_border_colors = false;
3190    bool attachment_vrs_enabled = false;
3191    bool image_float32_atomics = false;
3192    bool vs_prologs = false;
3193    bool global_bo_list = false;
3194    bool image_2d_view_of_3d = false;
3195    bool primitives_generated_query = false;
3196    bool use_perf_counters = false;
3197
3198    /* Check enabled features */
3199    if (pCreateInfo->pEnabledFeatures) {
3200       if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
3201          robust_buffer_access = true;
3202    }
3203
3204    vk_foreach_struct_const(ext, pCreateInfo->pNext)
3205    {
3206       switch (ext->sType) {
3207       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
3208          const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
3209          if (features->features.robustBufferAccess)
3210             robust_buffer_access = true;
3211          break;
3212       }
3213       case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
3214          const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
3215          if (overallocation->overallocationBehavior ==
3216              VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
3217             overallocation_disallowed = true;
3218          break;
3219       }
3220       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
3221          const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features =
3222             (const void *)ext;
3223          custom_border_colors = border_color_features->customBorderColors;
3224          break;
3225       }
3226       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
3227          const VkPhysicalDeviceFragmentShadingRateFeaturesKHR *vrs = (const void *)ext;
3228          attachment_vrs_enabled = vrs->attachmentFragmentShadingRate;
3229          break;
3230       }
3231       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
3232          const VkPhysicalDeviceRobustness2FeaturesEXT *features = (const void *)ext;
3233          if (features->robustBufferAccess2)
3234             robust_buffer_access2 = true;
3235          break;
3236       }
3237       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
3238          const VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features = (const void *)ext;
3239          if (features->shaderImageFloat32Atomics ||
3240              features->sparseImageFloat32Atomics)
3241             image_float32_atomics = true;
3242          break;
3243       }
3244       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: {
3245          const VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features = (const void *)ext;
3246          if (features->shaderImageFloat32AtomicMinMax ||
3247              features->sparseImageFloat32AtomicMinMax)
3248             image_float32_atomics = true;
3249          break;
3250       }
3251       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT: {
3252          const VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *features = (const void *)ext;
3253          if (features->vertexInputDynamicState)
3254             vs_prologs = true;
3255          break;
3256       }
3257       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: {
3258          const VkPhysicalDeviceVulkan12Features *features = (const void *)ext;
3259          if (features->bufferDeviceAddress || features->descriptorIndexing)
3260             global_bo_list = true;
3261          break;
3262       }
3263       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT: {
3264          const VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *features = (const void *)ext;
3265          if (features->image2DViewOf3D)
3266             image_2d_view_of_3d = true;
3267          break;
3268       }
3269       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: {
3270          const VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features = (const void *)ext;
3271          if (features->primitivesGeneratedQuery ||
3272              features->primitivesGeneratedQueryWithRasterizerDiscard ||
3273              features->primitivesGeneratedQueryWithNonZeroStreams)
3274             primitives_generated_query = true;
3275          break;
3276       }
3277       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
3278          const VkPhysicalDevicePerformanceQueryFeaturesKHR *features = (const void *)ext;
3279          if (features->performanceCounterQueryPools)
3280             use_perf_counters = true;
3281          break;
3282       }
3283       default:
3284          break;
3285       }
3286    }
3287
3288    device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
3289                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3290    if (!device)
3291       return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
3292
3293    struct vk_device_dispatch_table dispatch_table;
3294
3295    if (physical_device->instance->vk.app_info.app_name &&
3296        !strcmp(physical_device->instance->vk.app_info.app_name, "metroexodus")) {
3297       /* Metro Exodus (Linux native) calls vkGetSemaphoreCounterValue() with a NULL semaphore and it
3298        * crashes sometimes.  Workaround this game bug by enabling an internal layer. Remove this
3299        * when the game is fixed.
3300        */
3301       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &metro_exodus_device_entrypoints, true);
3302       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false);
3303    } else if (radv_thread_trace_enabled()) {
3304       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &sqtt_device_entrypoints, true);
3305       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false);
3306    } else {
3307       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, true);
3308    }
3309    vk_device_dispatch_table_from_entrypoints(&dispatch_table, &wsi_device_entrypoints, false);
3310
3311    result =
3312       vk_device_init(&device->vk, &physical_device->vk, &dispatch_table, pCreateInfo, pAllocator);
3313    if (result != VK_SUCCESS) {
3314       vk_free(&device->vk.alloc, device);
3315       return result;
3316    }
3317
3318    device->instance = physical_device->instance;
3319    device->physical_device = physical_device;
3320    simple_mtx_init(&device->trace_mtx, mtx_plain);
3321    simple_mtx_init(&device->pstate_mtx, mtx_plain);
3322
3323    device->ws = physical_device->ws;
3324    vk_device_set_drm_fd(&device->vk, device->ws->get_fd(device->ws));
3325
3326    /* With update after bind we can't attach bo's to the command buffer
3327     * from the descriptor set anymore, so we have to use a global BO list.
3328     */
3329    device->use_global_bo_list = global_bo_list ||
3330                                 (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
3331                                 device->vk.enabled_extensions.EXT_descriptor_indexing ||
3332                                 device->vk.enabled_extensions.EXT_buffer_device_address ||
3333                                 device->vk.enabled_extensions.KHR_buffer_device_address ||
3334                                 device->vk.enabled_extensions.KHR_ray_tracing_pipeline ||
3335                                 device->vk.enabled_extensions.KHR_acceleration_structure ||
3336                                 device->vk.enabled_extensions.VALVE_descriptor_set_host_mapping;
3337
3338    device->robust_buffer_access = robust_buffer_access || robust_buffer_access2;
3339    device->robust_buffer_access2 = robust_buffer_access2;
3340
3341    device->attachment_vrs_enabled = attachment_vrs_enabled;
3342
3343    device->image_float32_atomics = image_float32_atomics;
3344
3345    device->image_2d_view_of_3d = image_2d_view_of_3d;
3346
3347    device->primitives_generated_query = primitives_generated_query;
3348
3349    radv_init_shader_arenas(device);
3350
3351    device->overallocation_disallowed = overallocation_disallowed;
3352    mtx_init(&device->overallocation_mutex, mtx_plain);
3353
3354    /* Create one context per queue priority. */
3355    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3356       const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
3357       const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
3358          vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
3359       enum radeon_ctx_priority priority = radv_get_queue_global_priority(global_priority);
3360
3361       if (device->hw_ctx[priority])
3362          continue;
3363
3364       result = device->ws->ctx_create(device->ws, priority, &device->hw_ctx[priority]);
3365       if (result != VK_SUCCESS)
3366          goto fail;
3367    }
3368
3369    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3370       const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
3371       uint32_t qfi = queue_create->queueFamilyIndex;
3372       const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
3373          vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
3374
3375       device->queues[qfi] =
3376          vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(struct radv_queue), 8,
3377                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3378       if (!device->queues[qfi]) {
3379          result = VK_ERROR_OUT_OF_HOST_MEMORY;
3380          goto fail;
3381       }
3382
3383       memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
3384
3385       device->queue_count[qfi] = queue_create->queueCount;
3386
3387       for (unsigned q = 0; q < queue_create->queueCount; q++) {
3388          result = radv_queue_init(device, &device->queues[qfi][q], q, queue_create, global_priority);
3389          if (result != VK_SUCCESS)
3390             goto fail;
3391       }
3392    }
3393    device->private_sdma_queue = VK_NULL_HANDLE;
3394
3395    device->pbb_allowed = device->physical_device->rad_info.gfx_level >= GFX9 &&
3396                          !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
3397
3398    /* The maximum number of scratch waves. Scratch space isn't divided
3399     * evenly between CUs. The number is only a function of the number of CUs.
3400     * We can decrease the constant to decrease the scratch buffer size.
3401     *
3402     * sctx->scratch_waves must be >= the maximum possible size of
3403     * 1 threadgroup, so that the hw doesn't hang from being unable
3404     * to start any.
3405     *
3406     * The recommended value is 4 per CU at most. Higher numbers don't
3407     * bring much benefit, but they still occupy chip resources (think
3408     * async compute). I've seen ~2% performance difference between 4 and 32.
3409     */
3410    uint32_t max_threads_per_block = 2048;
3411    device->scratch_waves =
3412       MAX2(32 * physical_device->rad_info.num_good_compute_units, max_threads_per_block / 64);
3413
3414    device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
3415
3416    if (device->physical_device->rad_info.gfx_level >= GFX7) {
3417       /* If the KMD allows it (there is a KMD hw register for it),
3418        * allow launching waves out-of-order.
3419        */
3420       device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
3421    }
3422
3423    if (device->instance->debug_flags & RADV_DEBUG_HANG) {
3424       /* Enable GPU hangs detection and dump logs if a GPU hang is
3425        * detected.
3426        */
3427       keep_shader_info = true;
3428
3429       if (!radv_init_trace(device))
3430          goto fail;
3431
3432       fprintf(stderr,
3433               "*****************************************************************************\n");
3434       fprintf(stderr,
3435               "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
3436       fprintf(stderr,
3437               "*****************************************************************************\n");
3438
3439       /* Wait for idle after every draw/dispatch to identify the
3440        * first bad call.
3441        */
3442       device->instance->debug_flags |= RADV_DEBUG_SYNC_SHADERS;
3443
3444       radv_dump_enabled_options(device, stderr);
3445    }
3446
3447    if (radv_thread_trace_enabled()) {
3448       if (device->physical_device->rad_info.gfx_level < GFX8 ||
3449           device->physical_device->rad_info.gfx_level > GFX10_3) {
3450          fprintf(stderr, "GPU hardware not supported: refer to "
3451                          "the RGP documentation for the list of "
3452                          "supported GPUs!\n");
3453          abort();
3454       }
3455
3456       if (!radv_thread_trace_init(device))
3457          goto fail;
3458
3459       fprintf(stderr, "radv: Thread trace support is enabled (initial buffer size: %u MiB, "
3460                       "instruction timing: %s, cache counters: %s).\n",
3461               device->thread_trace.buffer_size / (1024 * 1024),
3462               radv_is_instruction_timing_enabled() ? "enabled" : "disabled",
3463               radv_spm_trace_enabled() ? "enabled" : "disabled");
3464
3465       if (radv_spm_trace_enabled()) {
3466          if (device->physical_device->rad_info.gfx_level >= GFX10) {
3467             if (!radv_spm_init(device))
3468                goto fail;
3469          } else {
3470             fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n",
3471                     device->physical_device->name);
3472          }
3473       }
3474    }
3475
3476    if (getenv("RADV_TRAP_HANDLER")) {
3477       /* TODO: Add support for more hardware. */
3478       assert(device->physical_device->rad_info.gfx_level == GFX8);
3479
3480       fprintf(stderr, "**********************************************************************\n");
3481       fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
3482       fprintf(stderr, "**********************************************************************\n");
3483
3484       /* To get the disassembly of the faulty shaders, we have to
3485        * keep some shader info around.
3486        */
3487       keep_shader_info = true;
3488
3489       if (!radv_trap_handler_init(device))
3490          goto fail;
3491    }
3492
3493    if (device->physical_device->rad_info.gfx_level >= GFX10_3) {
3494       if (getenv("RADV_FORCE_VRS_CONFIG_FILE")) {
3495          const char *file = radv_get_force_vrs_config_file();
3496
3497          device->force_vrs = radv_parse_force_vrs_config_file(file);
3498
3499          if (radv_device_init_notifier(device)) {
3500             device->force_vrs_enabled = true;
3501          } else {
3502             fprintf(stderr, "radv: Failed to initialize the notifier for RADV_FORCE_VRS_CONFIG_FILE!\n");
3503          }
3504       } else if (getenv("RADV_FORCE_VRS")) {
3505          const char *vrs_rates = getenv("RADV_FORCE_VRS");
3506
3507          device->force_vrs = radv_parse_vrs_rates(vrs_rates);
3508          device->force_vrs_enabled = device->force_vrs != RADV_FORCE_VRS_1x1;
3509       }
3510    }
3511
3512    /* PKT3_LOAD_SH_REG_INDEX is supported on GFX8+, but it hangs with compute queues until GFX10.3. */
3513    device->load_grid_size_from_user_sgpr = device->physical_device->rad_info.gfx_level >= GFX10_3;
3514
3515    device->keep_shader_info = keep_shader_info;
3516    result = radv_device_init_meta(device);
3517    if (result != VK_SUCCESS)
3518       goto fail;
3519
3520    radv_device_init_msaa(device);
3521
3522    /* If the border color extension is enabled, let's create the buffer we need. */
3523    if (custom_border_colors) {
3524       result = radv_device_init_border_color(device);
3525       if (result != VK_SUCCESS)
3526          goto fail;
3527    }
3528
3529    if (vs_prologs) {
3530       result = radv_device_init_vs_prologs(device);
3531       if (result != VK_SUCCESS)
3532          goto fail;
3533    }
3534
3535    if (device->physical_device->rad_info.gfx_level >= GFX7)
3536       cik_create_gfx_config(device);
3537
3538    VkPipelineCacheCreateInfo ci;
3539    ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
3540    ci.pNext = NULL;
3541    ci.flags = 0;
3542    ci.pInitialData = NULL;
3543    ci.initialDataSize = 0;
3544    VkPipelineCache pc;
3545    result = radv_CreatePipelineCache(radv_device_to_handle(device), &ci, NULL, &pc);
3546    if (result != VK_SUCCESS)
3547       goto fail_meta;
3548
3549    device->mem_cache = radv_pipeline_cache_from_handle(pc);
3550
3551    device->force_aniso = MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
3552    if (device->force_aniso >= 0) {
3553       fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
3554               1 << util_logbase2(device->force_aniso));
3555    }
3556
3557    if (use_perf_counters) {
3558       size_t bo_size = PERF_CTR_BO_PASS_OFFSET + sizeof(uint64_t) * PERF_CTR_MAX_PASSES;
3559       result =
3560          device->ws->buffer_create(device->ws, bo_size, 4096, RADEON_DOMAIN_GTT,
3561                                    RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
3562                                    RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->perf_counter_bo);
3563       if (result != VK_SUCCESS)
3564          goto fail_cache;
3565
3566       device->perf_counter_lock_cs =
3567          calloc(sizeof(struct radeon_winsys_cs *), 2 * PERF_CTR_MAX_PASSES);
3568       if (!device->perf_counter_lock_cs) {
3569          result = VK_ERROR_OUT_OF_HOST_MEMORY;
3570          goto fail_cache;
3571       }
3572    }
3573
3574    *pDevice = radv_device_to_handle(device);
3575    return VK_SUCCESS;
3576
3577 fail_cache:
3578    radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
3579 fail_meta:
3580    radv_device_finish_meta(device);
3581 fail:
3582    radv_thread_trace_finish(device);
3583
3584    radv_spm_finish(device);
3585
3586    radv_trap_handler_finish(device);
3587    radv_finish_trace(device);
3588
3589    radv_device_finish_perf_counter_lock_cs(device);
3590    if (device->perf_counter_bo)
3591       device->ws->buffer_destroy(device->ws, device->perf_counter_bo);
3592    if (device->gfx_init)
3593       device->ws->buffer_destroy(device->ws, device->gfx_init);
3594
3595    radv_device_finish_notifier(device);
3596    radv_device_finish_vs_prologs(device);
3597    radv_device_finish_border_color(device);
3598
3599    for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3600       for (unsigned q = 0; q < device->queue_count[i]; q++)
3601          radv_queue_finish(&device->queues[i][q]);
3602       if (device->queue_count[i])
3603          vk_free(&device->vk.alloc, device->queues[i]);
3604    }
3605
3606    for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
3607       if (device->hw_ctx[i])
3608          device->ws->ctx_destroy(device->hw_ctx[i]);
3609    }
3610
3611    simple_mtx_destroy(&device->pstate_mtx);
3612    simple_mtx_destroy(&device->trace_mtx);
3613    mtx_destroy(&device->overallocation_mutex);
3614
3615    vk_device_finish(&device->vk);
3616    vk_free(&device->vk.alloc, device);
3617    return result;
3618 }
3619
3620 VKAPI_ATTR void VKAPI_CALL
3621 radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
3622 {
3623    RADV_FROM_HANDLE(radv_device, device, _device);
3624
3625    if (!device)
3626       return;
3627
3628    radv_device_finish_perf_counter_lock_cs(device);
3629    if (device->perf_counter_bo)
3630       device->ws->buffer_destroy(device->ws, device->perf_counter_bo);
3631
3632    if (device->gfx_init)
3633       device->ws->buffer_destroy(device->ws, device->gfx_init);
3634
3635    radv_device_finish_notifier(device);
3636    radv_device_finish_vs_prologs(device);
3637    radv_device_finish_border_color(device);
3638    radv_device_finish_vrs_image(device);
3639
3640    for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3641       for (unsigned q = 0; q < device->queue_count[i]; q++)
3642          radv_queue_finish(&device->queues[i][q]);
3643       if (device->queue_count[i])
3644          vk_free(&device->vk.alloc, device->queues[i]);
3645    }
3646    if (device->private_sdma_queue != VK_NULL_HANDLE) {
3647       radv_queue_finish(device->private_sdma_queue);
3648       vk_free(&device->vk.alloc, device->private_sdma_queue);
3649    }
3650
3651    for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
3652       if (device->hw_ctx[i])
3653          device->ws->ctx_destroy(device->hw_ctx[i]);
3654    }
3655
3656    mtx_destroy(&device->overallocation_mutex);
3657    simple_mtx_destroy(&device->pstate_mtx);
3658    simple_mtx_destroy(&device->trace_mtx);
3659
3660    radv_device_finish_meta(device);
3661
3662    VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
3663    radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
3664
3665    radv_trap_handler_finish(device);
3666    radv_finish_trace(device);
3667
3668    radv_destroy_shader_arenas(device);
3669
3670    radv_thread_trace_finish(device);
3671
3672    radv_spm_finish(device);
3673
3674    vk_device_finish(&device->vk);
3675    vk_free(&device->vk.alloc, device);
3676 }
3677
3678 VKAPI_ATTR VkResult VKAPI_CALL
3679 radv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, VkLayerProperties *pProperties)
3680 {
3681    if (pProperties == NULL) {
3682       *pPropertyCount = 0;
3683       return VK_SUCCESS;
3684    }
3685
3686    /* None supported at this time */
3687    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3688 }
3689
3690 VKAPI_ATTR VkResult VKAPI_CALL
3691 radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount,
3692                                     VkLayerProperties *pProperties)
3693 {
3694    if (pProperties == NULL) {
3695       *pPropertyCount = 0;
3696       return VK_SUCCESS;
3697    }
3698
3699    /* None supported at this time */
3700    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3701 }
3702
3703 static void
3704 radv_fill_shader_rings(struct radv_device *device, uint32_t *map, bool add_sample_positions,
3705                        uint32_t esgs_ring_size, struct radeon_winsys_bo *esgs_ring_bo,
3706                        uint32_t gsvs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo,
3707                        struct radeon_winsys_bo *tess_rings_bo,
3708                        struct radeon_winsys_bo *task_rings_bo,
3709                        struct radeon_winsys_bo *mesh_scratch_ring_bo)
3710 {
3711    uint32_t *desc = &map[4];
3712
3713    if (esgs_ring_bo) {
3714       uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
3715
3716       /* stride 0, num records - size, add tid, swizzle, elsize4,
3717          index stride 64 */
3718       desc[0] = esgs_va;
3719       desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
3720       desc[2] = esgs_ring_size;
3721       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3722                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3723                 S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1);
3724
3725       if (device->physical_device->rad_info.gfx_level >= GFX11)
3726          desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
3727       else
3728          desc[1] |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
3729
3730       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3731          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
3732                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
3733       } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
3734          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3735                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3736       } else {
3737          desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3738                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
3739       }
3740
3741       /* GS entry for ES->GS ring */
3742       /* stride 0, num records - size, elsize0,
3743          index stride 0 */
3744       desc[4] = esgs_va;
3745       desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
3746       desc[6] = esgs_ring_size;
3747       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3748                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3749
3750       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3751          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
3752                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
3753       } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
3754          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3755                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3756       } else {
3757          desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3758                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3759       }
3760    }
3761
3762    desc += 8;
3763
3764    if (gsvs_ring_bo) {
3765       uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
3766
3767       /* VS entry for GS->VS ring */
3768       /* stride 0, num records - size, elsize0,
3769          index stride 0 */
3770       desc[0] = gsvs_va;
3771       desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
3772       desc[2] = gsvs_ring_size;
3773       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3774                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3775
3776       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3777          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
3778                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
3779       } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
3780          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3781                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3782       } else {
3783          desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3784                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3785       }
3786
3787       /* stride gsvs_itemsize, num records 64
3788          elsize 4, index stride 16 */
3789       /* shader will patch stride and desc[2] */
3790       desc[4] = gsvs_va;
3791       desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
3792       desc[6] = 0;
3793       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3794                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3795                 S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true);
3796
3797       if (device->physical_device->rad_info.gfx_level >= GFX11)
3798          desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
3799       else
3800          desc[5] |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
3801
3802       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3803          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
3804                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
3805       } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
3806          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3807                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3808       } else {
3809          desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3810                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
3811       }
3812    }
3813
3814    desc += 8;
3815
3816    if (tess_rings_bo) {
3817       uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
3818       uint64_t tess_offchip_va = tess_va + device->physical_device->hs.tess_offchip_ring_offset;
3819
3820       desc[0] = tess_va;
3821       desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
3822       desc[2] = device->physical_device->hs.tess_factor_ring_size;
3823       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3824                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3825
3826       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3827          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
3828                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
3829       } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
3830          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3831                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
3832       } else {
3833          desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3834                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3835       }
3836
3837       desc[4] = tess_offchip_va;
3838       desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
3839       desc[6] = device->physical_device->hs.tess_offchip_ring_size;
3840       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3841                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3842
3843       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3844          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
3845                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
3846       } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
3847          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3848                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
3849       } else {
3850          desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3851                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3852       }
3853    }
3854
3855    desc += 8;
3856
3857    if (task_rings_bo) {
3858       uint64_t task_va = radv_buffer_get_va(task_rings_bo);
3859       uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset;
3860       uint64_t task_payload_ring_va = task_va + device->physical_device->task_info.payload_ring_offset;
3861
3862       desc[0] = task_draw_ring_va;
3863       desc[1] = S_008F04_BASE_ADDRESS_HI(task_draw_ring_va >> 32);
3864       desc[2] = device->physical_device->task_info.num_entries * AC_TASK_DRAW_ENTRY_BYTES;
3865       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3866                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3867
3868       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3869          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) |
3870                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
3871       } else {
3872          assert(device->physical_device->rad_info.gfx_level >= GFX10_3);
3873          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) |
3874                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3875       }
3876
3877       desc[4] = task_payload_ring_va;
3878       desc[5] = S_008F04_BASE_ADDRESS_HI(task_payload_ring_va >> 32);
3879       desc[6] = device->physical_device->task_info.num_entries * AC_TASK_PAYLOAD_ENTRY_BYTES;
3880       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3881                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3882
3883       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3884          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) |
3885                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
3886       } else {
3887          assert(device->physical_device->rad_info.gfx_level >= GFX10_3);
3888          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) |
3889                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3890       }
3891    }
3892
3893    desc += 8;
3894
3895    if (mesh_scratch_ring_bo) {
3896       uint64_t va = radv_buffer_get_va(mesh_scratch_ring_bo);
3897
3898       desc[0] = va;
3899       desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
3900       desc[2] = RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES;
3901       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3902                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3903
3904       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3905          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_UINT) |
3906                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED);
3907       } else {
3908          assert(device->physical_device->rad_info.gfx_level >= GFX10_3);
3909          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT) |
3910                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3911       }
3912    }
3913
3914    desc += 4;
3915
3916    if (add_sample_positions) {
3917       /* add sample positions after all rings */
3918       memcpy(desc, device->sample_locations_1x, 8);
3919       desc += 2;
3920       memcpy(desc, device->sample_locations_2x, 16);
3921       desc += 4;
3922       memcpy(desc, device->sample_locations_4x, 32);
3923       desc += 8;
3924       memcpy(desc, device->sample_locations_8x, 64);
3925    }
3926 }
3927
3928 static void
3929 radv_emit_gs_ring_sizes(struct radv_device *device, struct radeon_cmdbuf *cs,
3930                         struct radeon_winsys_bo *esgs_ring_bo, uint32_t esgs_ring_size,
3931                         struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size)
3932 {
3933    if (!esgs_ring_bo && !gsvs_ring_bo)
3934       return;
3935
3936    if (esgs_ring_bo)
3937       radv_cs_add_buffer(device->ws, cs, esgs_ring_bo);
3938
3939    if (gsvs_ring_bo)
3940       radv_cs_add_buffer(device->ws, cs, gsvs_ring_bo);
3941
3942    if (device->physical_device->rad_info.gfx_level >= GFX7) {
3943       radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
3944       radeon_emit(cs, esgs_ring_size >> 8);
3945       radeon_emit(cs, gsvs_ring_size >> 8);
3946    } else {
3947       radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
3948       radeon_emit(cs, esgs_ring_size >> 8);
3949       radeon_emit(cs, gsvs_ring_size >> 8);
3950    }
3951 }
3952
3953 static void
3954 radv_emit_tess_factor_ring(struct radv_device *device, struct radeon_cmdbuf *cs,
3955                            struct radeon_winsys_bo *tess_rings_bo)
3956 {
3957    uint64_t tf_va;
3958    uint32_t tf_ring_size;
3959    if (!tess_rings_bo)
3960       return;
3961
3962    tf_ring_size = device->physical_device->hs.tess_factor_ring_size / 4;
3963    tf_va = radv_buffer_get_va(tess_rings_bo);
3964
3965    radv_cs_add_buffer(device->ws, cs, tess_rings_bo);
3966
3967    if (device->physical_device->rad_info.gfx_level >= GFX7) {
3968       if (device->physical_device->rad_info.gfx_level >= GFX11) {
3969          /* TF_RING_SIZE is per SE on GFX11. */
3970          tf_ring_size /= device->physical_device->rad_info.max_se;
3971       }
3972
3973       radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size));
3974       radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, tf_va >> 8);
3975
3976       if (device->physical_device->rad_info.gfx_level >= GFX10) {
3977          radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI,
3978                                 S_030984_BASE_HI(tf_va >> 40));
3979       } else if (device->physical_device->rad_info.gfx_level == GFX9) {
3980          radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));
3981       }
3982
3983       radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param);
3984    } else {
3985       radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size));
3986       radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);
3987       radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, device->physical_device->hs.hs_offchip_param);
3988    }
3989 }
3990
3991 static VkResult
3992 radv_initialise_task_control_buffer(struct radv_device *device,
3993                                     struct radeon_winsys_bo *task_rings_bo)
3994 {
3995    uint32_t *ptr = (uint32_t *)device->ws->buffer_map(task_rings_bo);
3996    if (!ptr)
3997       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
3998
3999    const uint32_t num_entries = device->physical_device->task_info.num_entries;
4000    const uint64_t task_va = radv_buffer_get_va(task_rings_bo);
4001    const uint64_t task_draw_ring_va = task_va + device->physical_device->task_info.draw_ring_offset;
4002    assert((task_draw_ring_va & 0xFFFFFF00) == (task_draw_ring_va & 0xFFFFFFFF));
4003
4004    /* 64-bit write_ptr */
4005    ptr[0] = num_entries;
4006    ptr[1] = 0;
4007    /* 64-bit read_ptr */
4008    ptr[2] = num_entries;
4009    ptr[3] = 0;
4010    /* 64-bit dealloc_ptr */
4011    ptr[4] = num_entries;
4012    ptr[5] = 0;
4013    /* num_entries */
4014    ptr[6] = num_entries;
4015    /* 64-bit draw ring address */
4016    ptr[7] = task_draw_ring_va;
4017    ptr[8] = task_draw_ring_va >> 32;
4018
4019    device->ws->buffer_unmap(task_rings_bo);
4020    return VK_SUCCESS;
4021 }
4022
4023 static void
4024 radv_emit_task_rings(struct radv_device *device, struct radeon_cmdbuf *cs,
4025                      struct radeon_winsys_bo *task_rings_bo, bool compute)
4026 {
4027    if (!task_rings_bo)
4028       return;
4029
4030    const uint64_t task_ctrlbuf_va = radv_buffer_get_va(task_rings_bo);
4031    assert(radv_is_aligned(task_ctrlbuf_va, 256));
4032    radv_cs_add_buffer(device->ws, cs, task_rings_bo);
4033
4034    /* Tell the GPU where the task control buffer is. */
4035    radeon_emit(cs, PKT3(PKT3_DISPATCH_TASK_STATE_INIT, 1, 0) | PKT3_SHADER_TYPE_S(!!compute));
4036    /* bits [31:8]: control buffer address lo, bits[7:0]: reserved (set to zero) */
4037    radeon_emit(cs, task_ctrlbuf_va & 0xFFFFFF00);
4038    /* bits [31:0]: control buffer address hi */
4039    radeon_emit(cs, task_ctrlbuf_va >> 32);
4040 }
4041
4042 static void
4043 radv_emit_graphics_scratch(struct radv_device *device, struct radeon_cmdbuf *cs,
4044                            uint32_t size_per_wave, uint32_t waves,
4045                            struct radeon_winsys_bo *scratch_bo)
4046 {
4047    struct radeon_info *info = &device->physical_device->rad_info;
4048
4049    if (!scratch_bo)
4050       return;
4051
4052    radv_cs_add_buffer(device->ws, cs, scratch_bo);
4053
4054    if (info->gfx_level >= GFX11) {
4055       uint64_t va = radv_buffer_get_va(scratch_bo);
4056
4057       /* WAVES is per SE for SPI_TMPRING_SIZE. */
4058       waves /= info->num_se;
4059
4060       radeon_set_context_reg_seq(cs, R_0286E8_SPI_TMPRING_SIZE, 3);
4061       radeon_emit(cs, S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 256)));
4062       radeon_emit(cs, va >> 8);  /* SPI_GFX_SCRATCH_BASE_LO */
4063       radeon_emit(cs, va >> 40); /* SPI_GFX_SCRATCH_BASE_HI */
4064    } else {
4065       radeon_set_context_reg(
4066          cs, R_0286E8_SPI_TMPRING_SIZE,
4067          S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
4068    }
4069 }
4070
4071 static void
4072 radv_emit_compute_scratch(struct radv_device *device, struct radeon_cmdbuf *cs,
4073                           uint32_t size_per_wave, uint32_t waves,
4074                           struct radeon_winsys_bo *compute_scratch_bo)
4075 {
4076    struct radeon_info *info = &device->physical_device->rad_info;
4077    uint64_t scratch_va;
4078    uint32_t rsrc1;
4079
4080    if (!compute_scratch_bo)
4081       return;
4082
4083    scratch_va = radv_buffer_get_va(compute_scratch_bo);
4084    rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
4085
4086    if (device->physical_device->rad_info.gfx_level >= GFX11)
4087       rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
4088    else
4089       rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
4090
4091    radv_cs_add_buffer(device->ws, cs, compute_scratch_bo);
4092
4093    if (info->gfx_level >= GFX11) {
4094       radeon_set_sh_reg_seq(cs, R_00B840_COMPUTE_DISPATCH_SCRATCH_BASE_LO, 4);
4095       radeon_emit(cs, scratch_va >> 8);
4096       radeon_emit(cs, scratch_va >> 40);
4097    } else {
4098       radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
4099    }
4100
4101    radeon_emit(cs, scratch_va);
4102    radeon_emit(cs, rsrc1);
4103
4104    radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
4105                      S_00B860_WAVES(waves) |
4106                      S_00B860_WAVESIZE(round_up_u32(size_per_wave, info->gfx_level >= GFX11 ? 256 : 1024)));
4107 }
4108
4109 static void
4110 radv_emit_compute_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs,
4111                                   struct radeon_winsys_bo *descriptor_bo)
4112 {
4113    if (!descriptor_bo)
4114       return;
4115
4116    uint64_t va = radv_buffer_get_va(descriptor_bo);
4117    radv_cs_add_buffer(device->ws, cs, descriptor_bo);
4118
4119    /* Compute shader user data 0-1 have the scratch pointer (unlike GFX shaders),
4120     * so emit the descriptor pointer to user data 2-3 instead (task_ring_offsets arg).
4121     */
4122    radv_emit_shader_pointer(device, cs, R_00B908_COMPUTE_USER_DATA_2, va, true);
4123 }
4124
4125 static void
4126 radv_emit_graphics_shader_pointers(struct radv_device *device, struct radeon_cmdbuf *cs,
4127                                    struct radeon_winsys_bo *descriptor_bo)
4128 {
4129    uint64_t va;
4130
4131    if (!descriptor_bo)
4132       return;
4133
4134    va = radv_buffer_get_va(descriptor_bo);
4135
4136    radv_cs_add_buffer(device->ws, cs, descriptor_bo);
4137
4138    if (device->physical_device->rad_info.gfx_level >= GFX11) {
4139       uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
4140                          R_00B420_SPI_SHADER_PGM_LO_HS,
4141                          R_00B220_SPI_SHADER_PGM_LO_GS};
4142
4143       for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
4144          radv_emit_shader_pointer(device, cs, regs[i], va, true);
4145       }
4146    } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
4147       uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
4148                          R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
4149                          R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
4150
4151       for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
4152          radv_emit_shader_pointer(device, cs, regs[i], va, true);
4153       }
4154    } else if (device->physical_device->rad_info.gfx_level == GFX9) {
4155       uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
4156                          R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
4157                          R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
4158
4159       for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
4160          radv_emit_shader_pointer(device, cs, regs[i], va, true);
4161       }
4162    } else {
4163       uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
4164                          R_00B230_SPI_SHADER_USER_DATA_GS_0, R_00B330_SPI_SHADER_USER_DATA_ES_0,
4165                          R_00B430_SPI_SHADER_USER_DATA_HS_0, R_00B530_SPI_SHADER_USER_DATA_LS_0};
4166
4167       for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
4168          radv_emit_shader_pointer(device, cs, regs[i], va, true);
4169       }
4170    }
4171 }
4172
4173 static void
4174 radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_device *device)
4175 {
4176    if (device->gfx_init) {
4177       uint64_t va = radv_buffer_get_va(device->gfx_init);
4178
4179       radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
4180       radeon_emit(cs, va);
4181       radeon_emit(cs, va >> 32);
4182       radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
4183
4184       radv_cs_add_buffer(device->ws, cs, device->gfx_init);
4185    } else {
4186       si_emit_graphics(device, cs);
4187    }
4188 }
4189
4190 static void
4191 radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_device *device)
4192 {
4193    si_emit_compute(device, cs);
4194 }
4195
4196 static VkResult
4197 radv_update_preamble_cs(struct radv_queue_state *queue, struct radv_device *device,
4198                         const struct radv_queue_ring_info *needs)
4199 {
4200    struct radeon_winsys *ws = device->ws;
4201    struct radeon_winsys_bo *scratch_bo = queue->scratch_bo;
4202    struct radeon_winsys_bo *descriptor_bo = queue->descriptor_bo;
4203    struct radeon_winsys_bo *compute_scratch_bo = queue->compute_scratch_bo;
4204    struct radeon_winsys_bo *esgs_ring_bo = queue->esgs_ring_bo;
4205    struct radeon_winsys_bo *gsvs_ring_bo = queue->gsvs_ring_bo;
4206    struct radeon_winsys_bo *tess_rings_bo = queue->tess_rings_bo;
4207    struct radeon_winsys_bo *task_rings_bo = queue->task_rings_bo;
4208    struct radeon_winsys_bo *mesh_scratch_ring_bo = queue->mesh_scratch_ring_bo;
4209    struct radeon_winsys_bo *gds_bo = queue->gds_bo;
4210    struct radeon_winsys_bo *gds_oa_bo = queue->gds_oa_bo;
4211    struct radeon_cmdbuf *dest_cs[3] = {0};
4212    const uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
4213    VkResult result = VK_SUCCESS;
4214
4215    const bool add_sample_positions = !queue->ring_info.sample_positions && needs->sample_positions;
4216    const uint32_t scratch_size = needs->scratch_size_per_wave * needs->scratch_waves;
4217    const uint32_t queue_scratch_size =
4218       queue->ring_info.scratch_size_per_wave * queue->ring_info.scratch_waves;
4219
4220    if (scratch_size > queue_scratch_size) {
4221       result = ws->buffer_create(ws, scratch_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags,
4222                                  RADV_BO_PRIORITY_SCRATCH, 0, &scratch_bo);
4223       if (result != VK_SUCCESS)
4224          goto fail;
4225    }
4226
4227    const uint32_t compute_scratch_size =
4228       needs->compute_scratch_size_per_wave * needs->compute_scratch_waves;
4229    const uint32_t compute_queue_scratch_size =
4230       queue->ring_info.compute_scratch_size_per_wave * queue->ring_info.compute_scratch_waves;
4231    if (compute_scratch_size > compute_queue_scratch_size) {
4232       result = ws->buffer_create(ws, compute_scratch_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags,
4233                                  RADV_BO_PRIORITY_SCRATCH, 0, &compute_scratch_bo);
4234       if (result != VK_SUCCESS)
4235          goto fail;
4236    }
4237
4238    if (needs->esgs_ring_size > queue->ring_info.esgs_ring_size) {
4239       result = ws->buffer_create(ws, needs->esgs_ring_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags,
4240                                  RADV_BO_PRIORITY_SCRATCH, 0, &esgs_ring_bo);
4241       if (result != VK_SUCCESS)
4242          goto fail;
4243    }
4244
4245    if (needs->gsvs_ring_size > queue->ring_info.gsvs_ring_size) {
4246       result = ws->buffer_create(ws, needs->gsvs_ring_size, 4096, RADEON_DOMAIN_VRAM, ring_bo_flags,
4247                                  RADV_BO_PRIORITY_SCRATCH, 0, &gsvs_ring_bo);
4248       if (result != VK_SUCCESS)
4249          goto fail;
4250    }
4251
4252    if (!queue->ring_info.tess_rings && needs->tess_rings) {
4253       result = ws->buffer_create(
4254          ws, device->physical_device->hs.tess_offchip_ring_offset + device->physical_device->hs.tess_offchip_ring_size, 256,
4255          RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &tess_rings_bo);
4256       if (result != VK_SUCCESS)
4257          goto fail;
4258    }
4259
4260    if (!queue->ring_info.task_rings && needs->task_rings) {
4261       assert(device->physical_device->rad_info.gfx_level >= GFX10_3);
4262
4263       /* We write the control buffer from the CPU, so need to grant CPU access to the BO.
4264        * The draw ring needs to be zero-initialized otherwise the ready bits will be incorrect.
4265        */
4266       uint32_t task_rings_bo_flags =
4267          RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM;
4268
4269       result = ws->buffer_create(ws, device->physical_device->task_info.bo_size_bytes, 256,
4270                                  RADEON_DOMAIN_VRAM, task_rings_bo_flags, RADV_BO_PRIORITY_SCRATCH,
4271                                  0, &task_rings_bo);
4272       if (result != VK_SUCCESS)
4273          goto fail;
4274
4275       result = radv_initialise_task_control_buffer(device, task_rings_bo);
4276       if (result != VK_SUCCESS)
4277          goto fail;
4278    }
4279
4280    if (!queue->ring_info.mesh_scratch_ring && needs->mesh_scratch_ring) {
4281       assert(device->physical_device->rad_info.gfx_level >= GFX10_3);
4282       result =
4283          ws->buffer_create(ws, RADV_MESH_SCRATCH_NUM_ENTRIES * RADV_MESH_SCRATCH_ENTRY_BYTES, 256,
4284                            RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &mesh_scratch_ring_bo);
4285
4286       if (result != VK_SUCCESS)
4287          goto fail;
4288    }
4289
4290    if (!queue->ring_info.gds && needs->gds) {
4291       assert(device->physical_device->rad_info.gfx_level >= GFX10);
4292
4293       /* 4 streamout GDS counters.
4294        * We need 256B (64 dw) of GDS, otherwise streamout hangs.
4295        */
4296       result = ws->buffer_create(ws, 256, 4, RADEON_DOMAIN_GDS, ring_bo_flags,
4297                                  RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo);
4298       if (result != VK_SUCCESS)
4299          goto fail;
4300    }
4301
4302    if (!queue->ring_info.gds_oa && needs->gds_oa) {
4303       assert(device->physical_device->rad_info.gfx_level >= GFX10);
4304
4305       result = ws->buffer_create(ws, 4, 1, RADEON_DOMAIN_OA, ring_bo_flags,
4306                                  RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo);
4307       if (result != VK_SUCCESS)
4308          goto fail;
4309    }
4310
4311    /* Re-initialize the descriptor BO when any ring BOs changed.
4312     *
4313     * Additionally, make sure to create the descriptor BO for the compute queue
4314     * when it uses the task shader rings. The task rings BO is shared between the
4315     * GFX and compute queues and already initialized here.
4316     */
4317    if ((queue->qf == RADV_QUEUE_COMPUTE && !descriptor_bo && task_rings_bo) ||
4318        scratch_bo != queue->scratch_bo || esgs_ring_bo != queue->esgs_ring_bo ||
4319        gsvs_ring_bo != queue->gsvs_ring_bo || tess_rings_bo != queue->tess_rings_bo ||
4320        task_rings_bo != queue->task_rings_bo || mesh_scratch_ring_bo != queue->mesh_scratch_ring_bo ||
4321        add_sample_positions) {
4322       uint32_t size = 0;
4323       if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo || add_sample_positions) {
4324          size = 160; /* 2 dword + 2 padding + 4 dword * 9 */
4325          if (add_sample_positions)
4326             size += 128; /* 64+32+16+8 = 120 bytes */
4327       } else if (scratch_bo) {
4328          size = 8; /* 2 dword */
4329       }
4330
4331       result = ws->buffer_create(
4332          ws, size, 4096, RADEON_DOMAIN_VRAM,
4333          RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
4334          RADV_BO_PRIORITY_DESCRIPTOR, 0, &descriptor_bo);
4335       if (result != VK_SUCCESS)
4336          goto fail;
4337    }
4338
4339    if (descriptor_bo != queue->descriptor_bo) {
4340       uint32_t *map = (uint32_t *)ws->buffer_map(descriptor_bo);
4341       if (!map)
4342          goto fail;
4343
4344       if (scratch_bo) {
4345          uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
4346          uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32);
4347
4348          if (device->physical_device->rad_info.gfx_level >= GFX11)
4349             rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX11(1);
4350          else
4351             rsrc1 |= S_008F04_SWIZZLE_ENABLE_GFX6(1);
4352
4353          map[0] = scratch_va;
4354          map[1] = rsrc1;
4355       }
4356
4357       if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || task_rings_bo || mesh_scratch_ring_bo || add_sample_positions)
4358          radv_fill_shader_rings(device, map, add_sample_positions, needs->esgs_ring_size,
4359                                 esgs_ring_bo, needs->gsvs_ring_size, gsvs_ring_bo, tess_rings_bo,
4360                                 task_rings_bo, mesh_scratch_ring_bo);
4361
4362       ws->buffer_unmap(descriptor_bo);
4363    }
4364
4365    for (int i = 0; i < 3; ++i) {
4366       /* Don't create continue preamble when it's not necessary. */
4367       if (i == 2) {
4368          /* We only need the continue preamble when we can't use indirect buffers. */
4369          if (!(device->instance->debug_flags & RADV_DEBUG_NO_IBS) &&
4370              device->physical_device->rad_info.gfx_level >= GFX7)
4371             continue;
4372          /* Continue preamble is unnecessary when no shader rings are used. */
4373          if (!needs->scratch_size_per_wave && !needs->compute_scratch_size_per_wave &&
4374              !needs->esgs_ring_size && !needs->gsvs_ring_size && !needs->tess_rings &&
4375              !needs->task_rings && !needs->mesh_scratch_ring && !needs->gds && !needs->gds_oa && !needs->sample_positions)
4376             continue;
4377       }
4378
4379       enum rgp_flush_bits sqtt_flush_bits = 0;
4380       struct radeon_cmdbuf *cs = NULL;
4381       cs = ws->cs_create(ws, radv_queue_family_to_ring(device->physical_device, queue->qf));
4382       if (!cs) {
4383          result = VK_ERROR_OUT_OF_HOST_MEMORY;
4384          goto fail;
4385       }
4386
4387       dest_cs[i] = cs;
4388
4389       if (scratch_bo)
4390          radv_cs_add_buffer(ws, cs, scratch_bo);
4391
4392       /* Emit initial configuration. */
4393       switch (queue->qf) {
4394       case RADV_QUEUE_GENERAL:
4395          radv_init_graphics_state(cs, device);
4396
4397          if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || task_rings_bo) {
4398             radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
4399             radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
4400
4401             radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
4402             radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
4403          }
4404
4405          radv_emit_gs_ring_sizes(device, cs, esgs_ring_bo, needs->esgs_ring_size, gsvs_ring_bo,
4406                                  needs->gsvs_ring_size);
4407          radv_emit_tess_factor_ring(device, cs, tess_rings_bo);
4408          radv_emit_task_rings(device, cs, task_rings_bo, false);
4409          radv_emit_graphics_shader_pointers(device, cs, descriptor_bo);
4410          radv_emit_compute_scratch(device, cs, needs->compute_scratch_size_per_wave,
4411                                    needs->compute_scratch_waves, compute_scratch_bo);
4412          radv_emit_graphics_scratch(device, cs, needs->scratch_size_per_wave, needs->scratch_waves,
4413                                     scratch_bo);
4414          break;
4415       case RADV_QUEUE_COMPUTE:
4416          radv_init_compute_state(cs, device);
4417
4418          if (task_rings_bo) {
4419             radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
4420             radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
4421          }
4422
4423          radv_emit_task_rings(device, cs, task_rings_bo, true);
4424          radv_emit_compute_shader_pointers(device, cs, descriptor_bo);
4425          radv_emit_compute_scratch(device, cs, needs->compute_scratch_size_per_wave,
4426                                    needs->compute_scratch_waves, compute_scratch_bo);
4427          break;
4428       default:
4429          break;
4430       }
4431
4432       if (gds_bo)
4433          radv_cs_add_buffer(ws, cs, gds_bo);
4434       if (gds_oa_bo)
4435          radv_cs_add_buffer(ws, cs, gds_oa_bo);
4436
4437       if (i < 2) {
4438          /* The two initial preambles have a cache flush at the beginning. */
4439          const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level;
4440          const bool is_mec = queue->qf == RADV_QUEUE_COMPUTE && gfx_level >= GFX7;
4441          enum radv_cmd_flush_bits flush_bits = RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE |
4442                                                RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 |
4443                                                RADV_CMD_FLAG_START_PIPELINE_STATS;
4444
4445          if (i == 0) {
4446             /* The full flush preamble should also wait for previous shader work to finish. */
4447             flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
4448             if (queue->qf == RADV_QUEUE_GENERAL)
4449                flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
4450          }
4451
4452          si_cs_emit_cache_flush(cs, gfx_level, NULL, 0, is_mec, flush_bits, &sqtt_flush_bits, 0);
4453       }
4454
4455       result = ws->cs_finalize(cs);
4456       if (result != VK_SUCCESS)
4457          goto fail;
4458    }
4459
4460    if (queue->initial_full_flush_preamble_cs)
4461       ws->cs_destroy(queue->initial_full_flush_preamble_cs);
4462
4463    if (queue->initial_preamble_cs)
4464       ws->cs_destroy(queue->initial_preamble_cs);
4465
4466    if (queue->continue_preamble_cs)
4467       ws->cs_destroy(queue->continue_preamble_cs);
4468
4469    queue->initial_full_flush_preamble_cs = dest_cs[0];
4470    queue->initial_preamble_cs = dest_cs[1];
4471    queue->continue_preamble_cs = dest_cs[2];
4472
4473    if (scratch_bo != queue->scratch_bo) {
4474       if (queue->scratch_bo)
4475          ws->buffer_destroy(ws, queue->scratch_bo);
4476       queue->scratch_bo = scratch_bo;
4477    }
4478
4479    if (compute_scratch_bo != queue->compute_scratch_bo) {
4480       if (queue->compute_scratch_bo)
4481          ws->buffer_destroy(ws, queue->compute_scratch_bo);
4482       queue->compute_scratch_bo = compute_scratch_bo;
4483    }
4484
4485    if (esgs_ring_bo != queue->esgs_ring_bo) {
4486       if (queue->esgs_ring_bo)
4487          ws->buffer_destroy(ws, queue->esgs_ring_bo);
4488       queue->esgs_ring_bo = esgs_ring_bo;
4489    }
4490
4491    if (gsvs_ring_bo != queue->gsvs_ring_bo) {
4492       if (queue->gsvs_ring_bo)
4493          ws->buffer_destroy(ws, queue->gsvs_ring_bo);
4494       queue->gsvs_ring_bo = gsvs_ring_bo;
4495    }
4496
4497    if (descriptor_bo != queue->descriptor_bo) {
4498       if (queue->descriptor_bo)
4499          ws->buffer_destroy(ws, queue->descriptor_bo);
4500       queue->descriptor_bo = descriptor_bo;
4501    }
4502
4503    queue->tess_rings_bo = tess_rings_bo;
4504    queue->task_rings_bo = task_rings_bo;
4505    queue->mesh_scratch_ring_bo = mesh_scratch_ring_bo;
4506    queue->gds_bo = gds_bo;
4507    queue->gds_oa_bo = gds_oa_bo;
4508    queue->ring_info = *needs;
4509    return VK_SUCCESS;
4510 fail:
4511    for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
4512       if (dest_cs[i])
4513          ws->cs_destroy(dest_cs[i]);
4514    if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
4515       ws->buffer_destroy(ws, descriptor_bo);
4516    if (scratch_bo && scratch_bo != queue->scratch_bo)
4517       ws->buffer_destroy(ws, scratch_bo);
4518    if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
4519       ws->buffer_destroy(ws, compute_scratch_bo);
4520    if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
4521       ws->buffer_destroy(ws, esgs_ring_bo);
4522    if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
4523       ws->buffer_destroy(ws, gsvs_ring_bo);
4524    if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
4525       ws->buffer_destroy(ws, tess_rings_bo);
4526    if (task_rings_bo && task_rings_bo != queue->task_rings_bo)
4527       ws->buffer_destroy(ws, task_rings_bo);
4528    if (gds_bo && gds_bo != queue->gds_bo)
4529       ws->buffer_destroy(ws, gds_bo);
4530    if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
4531       ws->buffer_destroy(ws, gds_oa_bo);
4532
4533    return vk_error(queue, result);
4534 }
4535
4536 static struct radeon_cmdbuf *
4537 radv_create_perf_counter_lock_cs(struct radv_device *device, unsigned pass, bool unlock)
4538 {
4539    struct radeon_cmdbuf **cs_ref = &device->perf_counter_lock_cs[pass * 2 + (unlock ? 1 : 0)];
4540    struct radeon_cmdbuf *cs;
4541
4542    if (*cs_ref)
4543       return *cs_ref;
4544
4545    cs = device->ws->cs_create(device->ws, AMD_IP_GFX);
4546    if (!cs)
4547       return NULL;
4548
4549    ASSERTED unsigned cdw = radeon_check_space(device->ws, cs, 21);
4550
4551    if (!unlock) {
4552       uint64_t mutex_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_LOCK_OFFSET;
4553       radeon_emit(cs, PKT3(PKT3_ATOMIC_MEM, 7, 0));
4554       radeon_emit(cs, ATOMIC_OP(TC_OP_ATOMIC_CMPSWAP_32) | ATOMIC_COMMAND(ATOMIC_COMMAND_LOOP));
4555       radeon_emit(cs, mutex_va);       /* addr lo */
4556       radeon_emit(cs, mutex_va >> 32); /* addr hi */
4557       radeon_emit(cs, 1);              /* data lo */
4558       radeon_emit(cs, 0);              /* data hi */
4559       radeon_emit(cs, 0);              /* compare data lo */
4560       radeon_emit(cs, 0);              /* compare data hi */
4561       radeon_emit(cs, 10);             /* loop interval */
4562    }
4563
4564    uint64_t va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_PASS_OFFSET;
4565    uint64_t unset_va = va + (unlock ? 8 * pass : 0);
4566    uint64_t set_va = va + (unlock ? 0 : 8 * pass);
4567
4568    radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
4569    radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
4570                       COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM);
4571    radeon_emit(cs, 0); /* immediate */
4572    radeon_emit(cs, 0);
4573    radeon_emit(cs, unset_va);
4574    radeon_emit(cs, unset_va >> 32);
4575
4576    radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
4577    radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
4578                       COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM);
4579    radeon_emit(cs, 1); /* immediate */
4580    radeon_emit(cs, 0);
4581    radeon_emit(cs, set_va);
4582    radeon_emit(cs, set_va >> 32);
4583
4584    if (unlock) {
4585       uint64_t mutex_va = radv_buffer_get_va(device->perf_counter_bo) + PERF_CTR_BO_LOCK_OFFSET;
4586
4587       radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
4588       radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
4589                          COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM);
4590       radeon_emit(cs, 0); /* immediate */
4591       radeon_emit(cs, 0);
4592       radeon_emit(cs, mutex_va);
4593       radeon_emit(cs, mutex_va >> 32);
4594    }
4595
4596    assert(cs->cdw <= cdw);
4597
4598    VkResult result = device->ws->cs_finalize(cs);
4599    if (result != VK_SUCCESS) {
4600       device->ws->cs_destroy(cs);
4601       return NULL;
4602    }
4603
4604    /* All the casts are to avoid MSVC errors around pointer truncation in a non-taken
4605     * alternative.
4606     */
4607    if (p_atomic_cmpxchg((uintptr_t*)cs_ref, 0, (uintptr_t)cs) != 0) {
4608       device->ws->cs_destroy(cs);
4609    }
4610
4611    return *cs_ref;
4612 }
4613
4614 static VkResult
4615 radv_sparse_buffer_bind_memory(struct radv_device *device, const VkSparseBufferMemoryBindInfo *bind)
4616 {
4617    RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
4618    VkResult result;
4619
4620    for (uint32_t i = 0; i < bind->bindCount; ++i) {
4621       struct radv_device_memory *mem = NULL;
4622
4623       if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4624          mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4625
4626       result = device->ws->buffer_virtual_bind(device->ws, buffer->bo,
4627                                                bind->pBinds[i].resourceOffset, bind->pBinds[i].size,
4628                                                mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
4629       if (result != VK_SUCCESS)
4630          return result;
4631    }
4632
4633    return VK_SUCCESS;
4634 }
4635
4636 static VkResult
4637 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
4638                                      const VkSparseImageOpaqueMemoryBindInfo *bind)
4639 {
4640    RADV_FROM_HANDLE(radv_image, image, bind->image);
4641    VkResult result;
4642
4643    for (uint32_t i = 0; i < bind->bindCount; ++i) {
4644       struct radv_device_memory *mem = NULL;
4645
4646       if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4647          mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4648
4649       result = device->ws->buffer_virtual_bind(device->ws, image->bo,
4650                                                bind->pBinds[i].resourceOffset, bind->pBinds[i].size,
4651                                                mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
4652       if (result != VK_SUCCESS)
4653          return result;
4654    }
4655
4656    return VK_SUCCESS;
4657 }
4658
4659 static VkResult
4660 radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMemoryBindInfo *bind)
4661 {
4662    RADV_FROM_HANDLE(radv_image, image, bind->image);
4663    struct radeon_surf *surface = &image->planes[0].surface;
4664    uint32_t bs = vk_format_get_blocksize(image->vk.format);
4665    VkResult result;
4666
4667    for (uint32_t i = 0; i < bind->bindCount; ++i) {
4668       struct radv_device_memory *mem = NULL;
4669       uint32_t offset, pitch;
4670       uint32_t mem_offset = bind->pBinds[i].memoryOffset;
4671       const uint32_t layer = bind->pBinds[i].subresource.arrayLayer;
4672       const uint32_t level = bind->pBinds[i].subresource.mipLevel;
4673
4674       VkExtent3D bind_extent = bind->pBinds[i].extent;
4675       bind_extent.width =
4676          DIV_ROUND_UP(bind_extent.width, vk_format_get_blockwidth(image->vk.format));
4677       bind_extent.height =
4678          DIV_ROUND_UP(bind_extent.height, vk_format_get_blockheight(image->vk.format));
4679
4680       VkOffset3D bind_offset = bind->pBinds[i].offset;
4681       bind_offset.x /= vk_format_get_blockwidth(image->vk.format);
4682       bind_offset.y /= vk_format_get_blockheight(image->vk.format);
4683
4684       if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4685          mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4686
4687       if (device->physical_device->rad_info.gfx_level >= GFX9) {
4688          offset = surface->u.gfx9.surf_slice_size * layer + surface->u.gfx9.prt_level_offset[level];
4689          pitch = surface->u.gfx9.prt_level_pitch[level];
4690       } else {
4691          offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
4692                   surface->u.legacy.level[level].slice_size_dw * 4 * layer;
4693          pitch = surface->u.legacy.level[level].nblk_x;
4694       }
4695
4696       offset += (bind_offset.y * pitch * bs) + (bind_offset.x * surface->prt_tile_height * bs);
4697
4698       uint32_t aligned_extent_width = ALIGN(bind_extent.width, surface->prt_tile_width);
4699
4700       bool whole_subres = bind_offset.x == 0 && aligned_extent_width == pitch;
4701
4702       if (whole_subres) {
4703          uint32_t aligned_extent_height = ALIGN(bind_extent.height, surface->prt_tile_height);
4704
4705          uint32_t size = aligned_extent_width * aligned_extent_height * bs;
4706          result = device->ws->buffer_virtual_bind(device->ws, image->bo, offset, size,
4707                                                   mem ? mem->bo : NULL, mem_offset);
4708          if (result != VK_SUCCESS)
4709             return result;
4710       } else {
4711          uint32_t img_increment = pitch * bs;
4712          uint32_t mem_increment = aligned_extent_width * bs;
4713          uint32_t size = mem_increment * surface->prt_tile_height;
4714          for (unsigned y = 0; y < bind_extent.height; y += surface->prt_tile_height) {
4715             result = device->ws->buffer_virtual_bind(
4716                device->ws, image->bo, offset + img_increment * y, size, mem ? mem->bo : NULL,
4717                mem_offset + mem_increment * y);
4718             if (result != VK_SUCCESS)
4719                return result;
4720          }
4721       }
4722    }
4723
4724    return VK_SUCCESS;
4725 }
4726
4727 static VkResult
4728 radv_update_preambles(struct radv_queue_state *queue, struct radv_device *device,
4729                       struct vk_command_buffer *const *cmd_buffers, uint32_t cmd_buffer_count,
4730                       bool *uses_perf_counters)
4731 {
4732    if (queue->qf == RADV_QUEUE_TRANSFER)
4733       return VK_SUCCESS;
4734
4735    /* Figure out the needs of the current submission.
4736     * Start by copying the queue's current info.
4737     * This is done because we only allow two possible behaviours for these buffers:
4738     * - Grow when the newly needed amount is larger than what we had
4739     * - Allocate the max size and reuse it, but don't free it until the queue is destroyed
4740     */
4741    struct radv_queue_ring_info needs = queue->ring_info;
4742    *uses_perf_counters = false;
4743    for (uint32_t j = 0; j < cmd_buffer_count; j++) {
4744       struct radv_cmd_buffer *cmd_buffer = container_of(cmd_buffers[j], struct radv_cmd_buffer, vk);
4745
4746       needs.scratch_size_per_wave =
4747          MAX2(needs.scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
4748       needs.scratch_waves = MAX2(needs.scratch_waves, cmd_buffer->scratch_waves_wanted);
4749       needs.compute_scratch_size_per_wave = MAX2(needs.compute_scratch_size_per_wave,
4750                                                  cmd_buffer->compute_scratch_size_per_wave_needed);
4751       needs.compute_scratch_waves =
4752          MAX2(needs.compute_scratch_waves, cmd_buffer->compute_scratch_waves_wanted);
4753       needs.esgs_ring_size = MAX2(needs.esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
4754       needs.gsvs_ring_size = MAX2(needs.gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
4755       needs.tess_rings |= cmd_buffer->tess_rings_needed;
4756       needs.task_rings |= cmd_buffer->task_rings_needed;
4757       needs.mesh_scratch_ring |= cmd_buffer->mesh_scratch_ring_needed;
4758       needs.gds |= cmd_buffer->gds_needed;
4759       needs.gds_oa |= cmd_buffer->gds_oa_needed;
4760       needs.sample_positions |= cmd_buffer->sample_positions_needed;
4761       *uses_perf_counters |= cmd_buffer->state.uses_perf_counters;
4762    }
4763
4764    /* Sanitize scratch size information. */
4765    needs.scratch_waves = needs.scratch_size_per_wave
4766                             ? MIN2(needs.scratch_waves, UINT32_MAX / needs.scratch_size_per_wave)
4767                             : 0;
4768    needs.compute_scratch_waves =
4769       needs.compute_scratch_size_per_wave
4770          ? MIN2(needs.compute_scratch_waves, UINT32_MAX / needs.compute_scratch_size_per_wave)
4771          : 0;
4772
4773    /* Return early if we already match these needs.
4774     * Note that it's not possible for any of the needed values to be less
4775     * than what the queue already had, because we only ever increase the allocated size.
4776     */
4777    if (queue->initial_full_flush_preamble_cs &&
4778        queue->ring_info.scratch_size_per_wave == needs.scratch_size_per_wave &&
4779        queue->ring_info.scratch_waves == needs.scratch_waves &&
4780        queue->ring_info.compute_scratch_size_per_wave == needs.compute_scratch_size_per_wave &&
4781        queue->ring_info.compute_scratch_waves == needs.compute_scratch_waves &&
4782        queue->ring_info.esgs_ring_size == needs.esgs_ring_size &&
4783        queue->ring_info.gsvs_ring_size == needs.gsvs_ring_size &&
4784        queue->ring_info.tess_rings == needs.tess_rings &&
4785        queue->ring_info.task_rings == needs.task_rings &&
4786        queue->ring_info.mesh_scratch_ring == needs.mesh_scratch_ring &&
4787        queue->ring_info.gds == needs.gds &&
4788        queue->ring_info.gds_oa == needs.gds_oa &&
4789        queue->ring_info.sample_positions == needs.sample_positions)
4790       return VK_SUCCESS;
4791
4792    return radv_update_preamble_cs(queue, device, &needs);
4793 }
4794
4795 struct radv_deferred_queue_submission {
4796    struct radv_queue *queue;
4797    VkCommandBuffer *cmd_buffers;
4798    uint32_t cmd_buffer_count;
4799
4800    /* Sparse bindings that happen on a queue. */
4801    VkSparseBufferMemoryBindInfo *buffer_binds;
4802    uint32_t buffer_bind_count;
4803    VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4804    uint32_t image_opaque_bind_count;
4805    VkSparseImageMemoryBindInfo *image_binds;
4806    uint32_t image_bind_count;
4807
4808    bool flush_caches;
4809    VkPipelineStageFlags2 wait_dst_stage_mask;
4810    struct radv_semaphore_part **wait_semaphores;
4811    uint32_t wait_semaphore_count;
4812    struct radv_semaphore_part **signal_semaphores;
4813    uint32_t signal_semaphore_count;
4814    VkFence fence;
4815
4816    uint64_t *wait_values;
4817    uint64_t *signal_values;
4818
4819    struct radv_semaphore_part *temporary_semaphore_parts;
4820    uint32_t temporary_semaphore_part_count;
4821
4822    struct list_head queue_pending_list;
4823    uint32_t submission_wait_count;
4824
4825    struct list_head processing_list;
4826 };
4827
4828 static VkResult
4829 radv_queue_submit_bind_sparse_memory(struct radv_device *device, struct vk_queue_submit *submission)
4830 {
4831    for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
4832       VkResult result = radv_sparse_buffer_bind_memory(device, submission->buffer_binds + i);
4833       if (result != VK_SUCCESS)
4834          return result;
4835    }
4836
4837    for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
4838       VkResult result =
4839          radv_sparse_image_opaque_bind_memory(device, submission->image_opaque_binds + i);
4840       if (result != VK_SUCCESS)
4841          return result;
4842    }
4843
4844    for (uint32_t i = 0; i < submission->image_bind_count; ++i) {
4845       VkResult result = radv_sparse_image_bind_memory(device, submission->image_binds + i);
4846       if (result != VK_SUCCESS)
4847          return result;
4848    }
4849
4850    return VK_SUCCESS;
4851 }
4852
4853 static VkResult
4854 radv_queue_submit_empty(struct radv_queue *queue, struct vk_queue_submit *submission)
4855 {
4856    struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4857    struct radv_winsys_submit_info submit = {
4858       .ip_type = radv_queue_ring(queue),
4859       .queue_index = queue->vk.index_in_family,
4860    };
4861
4862    return queue->device->ws->cs_submit(ctx, 1, &submit, submission->wait_count, submission->waits,
4863                                        submission->signal_count, submission->signals, false);
4864 }
4865
4866 static VkResult
4867 radv_queue_submit_normal(struct radv_queue *queue, struct vk_queue_submit *submission)
4868 {
4869    struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4870    uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
4871    bool can_patch = true;
4872    uint32_t advance;
4873    VkResult result;
4874    bool uses_perf_counters = false;
4875    unsigned cmd_buffer_count = submission->command_buffer_count;
4876
4877    result = radv_update_preambles(&queue->state, queue->device, submission->command_buffers,
4878                                   submission->command_buffer_count, &uses_perf_counters);
4879    if (result != VK_SUCCESS)
4880       return result;
4881
4882    if (queue->device->trace_bo)
4883       simple_mtx_lock(&queue->device->trace_mtx);
4884
4885    if (uses_perf_counters)
4886       cmd_buffer_count += 2;
4887
4888    struct radeon_cmdbuf **cs_array = malloc(sizeof(struct radeon_cmdbuf *) * cmd_buffer_count);
4889    if (!cs_array)
4890       goto fail;
4891
4892    for (uint32_t j = 0; j < submission->command_buffer_count; j++) {
4893       struct radv_cmd_buffer *cmd_buffer = (struct radv_cmd_buffer *)submission->command_buffers[j];
4894       assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4895
4896       cs_array[j + (uses_perf_counters ? 1 : 0)] = cmd_buffer->cs;
4897       if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
4898          can_patch = false;
4899
4900       cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
4901    }
4902
4903    if (uses_perf_counters) {
4904       cs_array[0] =
4905          radv_create_perf_counter_lock_cs(queue->device, submission->perf_pass_index, false);
4906       cs_array[cmd_buffer_count - 1] =
4907          radv_create_perf_counter_lock_cs(queue->device, submission->perf_pass_index, true);
4908       can_patch = false;
4909       if (!cs_array[0] || !cs_array[cmd_buffer_count - 1]) {
4910          result = VK_ERROR_OUT_OF_HOST_MEMORY;
4911          goto fail;
4912       }
4913    }
4914
4915    /* For fences on the same queue/vm amdgpu doesn't wait till all processing is finished
4916     * before starting the next cmdbuffer, so we need to do it here. */
4917    bool need_wait = submission->wait_count > 0;
4918
4919    struct radv_winsys_submit_info submit = {
4920       .ip_type = radv_queue_ring(queue),
4921       .queue_index = queue->vk.index_in_family,
4922       .cs_array = cs_array,
4923       .cs_count = 0,
4924       .initial_preamble_cs =
4925          need_wait ? queue->state.initial_full_flush_preamble_cs : queue->state.initial_preamble_cs,
4926       .continue_preamble_cs = queue->state.continue_preamble_cs,
4927    };
4928
4929    for (uint32_t j = 0; j < cmd_buffer_count; j += advance) {
4930       advance = MIN2(max_cs_submission, cmd_buffer_count - j);
4931       bool last_submit = j + advance == cmd_buffer_count;
4932
4933       if (queue->device->trace_bo)
4934          *queue->device->trace_id_ptr = 0;
4935
4936       submit.cs_count = advance;
4937
4938       result = queue->device->ws->cs_submit(
4939          ctx, 1, &submit, j == 0 ? submission->wait_count : 0, submission->waits,
4940          last_submit ? submission->signal_count : 0, submission->signals, can_patch);
4941
4942       if (result != VK_SUCCESS)
4943          goto fail;
4944
4945       if (queue->device->trace_bo) {
4946          radv_check_gpu_hangs(queue, cs_array[j]);
4947       }
4948
4949       if (queue->device->tma_bo) {
4950          radv_check_trap_handler(queue);
4951       }
4952
4953       submit.cs_array += advance;
4954       submit.initial_preamble_cs = queue->state.initial_preamble_cs;
4955    }
4956
4957 fail:
4958    free(cs_array);
4959    if (queue->device->trace_bo)
4960       simple_mtx_unlock(&queue->device->trace_mtx);
4961
4962    return result;
4963 }
4964
4965 static VkResult
4966 radv_queue_submit(struct vk_queue *vqueue, struct vk_queue_submit *submission)
4967 {
4968    struct radv_queue *queue = (struct radv_queue *)vqueue;
4969    VkResult result;
4970
4971    result = radv_queue_submit_bind_sparse_memory(queue->device, submission);
4972    if (result != VK_SUCCESS)
4973       goto fail;
4974
4975    if (!submission->command_buffer_count && !submission->wait_count && !submission->signal_count)
4976       return VK_SUCCESS;
4977
4978    if (!submission->command_buffer_count) {
4979       result = radv_queue_submit_empty(queue, submission);
4980    } else {
4981       result = radv_queue_submit_normal(queue, submission);
4982    }
4983
4984 fail:
4985    if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
4986       /* When something bad happened during the submission, such as
4987        * an out of memory issue, it might be hard to recover from
4988        * this inconsistent state. To avoid this sort of problem, we
4989        * assume that we are in a really bad situation and return
4990        * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
4991        * to submit the same job again to this device.
4992        */
4993       result = vk_device_set_lost(&queue->device->vk, "vkQueueSubmit() failed");
4994    }
4995    return result;
4996 }
4997
4998 bool
4999 radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs)
5000 {
5001    struct radeon_winsys_ctx *ctx = queue->hw_ctx;
5002    struct radv_winsys_submit_info submit = {
5003       .ip_type = radv_queue_ring(queue),
5004       .queue_index = queue->vk.index_in_family,
5005       .cs_array = &cs,
5006       .cs_count = 1,
5007    };
5008
5009    VkResult result = queue->device->ws->cs_submit(ctx, 1, &submit, 0, NULL, 0, NULL, false);
5010    if (result != VK_SUCCESS)
5011       return false;
5012
5013    return true;
5014 }
5015
5016 VKAPI_ATTR VkResult VKAPI_CALL
5017 radv_EnumerateInstanceExtensionProperties(const char *pLayerName, uint32_t *pPropertyCount,
5018                                           VkExtensionProperties *pProperties)
5019 {
5020    if (pLayerName)
5021       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
5022
5023    return vk_enumerate_instance_extension_properties(&radv_instance_extensions_supported,
5024                                                      pPropertyCount, pProperties);
5025 }
5026
5027 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
5028 radv_GetInstanceProcAddr(VkInstance _instance, const char *pName)
5029 {
5030    RADV_FROM_HANDLE(radv_instance, instance, _instance);
5031
5032    /* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly
5033     * when we have to return valid function pointers, NULL, or it's left
5034     * undefined.  See the table for exact details.
5035     */
5036    if (pName == NULL)
5037       return NULL;
5038
5039 #define LOOKUP_RADV_ENTRYPOINT(entrypoint)                                                         \
5040    if (strcmp(pName, "vk" #entrypoint) == 0)                                                       \
5041    return (PFN_vkVoidFunction)radv_##entrypoint
5042
5043    LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
5044    LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties);
5045    LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion);
5046    LOOKUP_RADV_ENTRYPOINT(CreateInstance);
5047
5048    /* GetInstanceProcAddr() can also be called with a NULL instance.
5049     * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057
5050     */
5051    LOOKUP_RADV_ENTRYPOINT(GetInstanceProcAddr);
5052
5053 #undef LOOKUP_RADV_ENTRYPOINT
5054
5055    if (instance == NULL)
5056       return NULL;
5057
5058    return vk_instance_get_proc_addr(&instance->vk, &radv_instance_entrypoints, pName);
5059 }
5060
5061 /* Windows will use a dll definition file to avoid build errors. */
5062 #ifdef _WIN32
5063 #undef PUBLIC
5064 #define PUBLIC
5065 #endif
5066
5067 /* The loader wants us to expose a second GetInstanceProcAddr function
5068  * to work around certain LD_PRELOAD issues seen in apps.
5069  */
5070 PUBLIC
5071 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
5072 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
5073 {
5074    return radv_GetInstanceProcAddr(instance, pName);
5075 }
5076
5077 PUBLIC
5078 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
5079 vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName)
5080 {
5081    RADV_FROM_HANDLE(radv_instance, instance, _instance);
5082    return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
5083 }
5084
5085 bool
5086 radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD)
5087 {
5088    /* Only set BO metadata for the first plane */
5089    if (memory->image && memory->image->offset == 0) {
5090       struct radeon_bo_metadata metadata;
5091       radv_init_metadata(device, memory->image, &metadata);
5092       device->ws->buffer_set_metadata(device->ws, memory->bo, &metadata);
5093    }
5094
5095    return device->ws->buffer_get_fd(device->ws, memory->bo, pFD);
5096 }
5097
5098 void
5099 radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device,
5100                         struct radeon_winsys_bo *bo)
5101 {
5102    memset(mem, 0, sizeof(*mem));
5103    vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY);
5104
5105    mem->bo = bo;
5106 }
5107
5108 void
5109 radv_device_memory_finish(struct radv_device_memory *mem)
5110 {
5111    vk_object_base_finish(&mem->base);
5112 }
5113
5114 void
5115 radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
5116                  struct radv_device_memory *mem)
5117 {
5118    if (mem == NULL)
5119       return;
5120
5121 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5122    if (mem->android_hardware_buffer)
5123       AHardwareBuffer_release(mem->android_hardware_buffer);
5124 #endif
5125
5126    if (mem->bo) {
5127       if (device->overallocation_disallowed) {
5128          mtx_lock(&device->overallocation_mutex);
5129          device->allocated_memory_size[mem->heap_index] -= mem->alloc_size;
5130          mtx_unlock(&device->overallocation_mutex);
5131       }
5132
5133       if (device->use_global_bo_list)
5134          device->ws->buffer_make_resident(device->ws, mem->bo, false);
5135       device->ws->buffer_destroy(device->ws, mem->bo);
5136       mem->bo = NULL;
5137    }
5138
5139    radv_device_memory_finish(mem);
5140    vk_free2(&device->vk.alloc, pAllocator, mem);
5141 }
5142
5143 static VkResult
5144 radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAllocateInfo,
5145                   const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
5146 {
5147    struct radv_device_memory *mem;
5148    VkResult result;
5149    enum radeon_bo_domain domain;
5150    uint32_t flags = 0;
5151
5152    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
5153
5154    const VkImportMemoryFdInfoKHR *import_info =
5155       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
5156    const VkMemoryDedicatedAllocateInfo *dedicate_info =
5157       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
5158    const VkExportMemoryAllocateInfo *export_info =
5159       vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
5160    const struct VkImportAndroidHardwareBufferInfoANDROID *ahb_import_info =
5161       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID);
5162    const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
5163       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
5164
5165    const struct wsi_memory_allocate_info *wsi_info =
5166       vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
5167
5168    if (pAllocateInfo->allocationSize == 0 && !ahb_import_info &&
5169        !(export_info && (export_info->handleTypes &
5170                          VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {
5171       /* Apparently, this is allowed */
5172       *pMem = VK_NULL_HANDLE;
5173       return VK_SUCCESS;
5174    }
5175
5176    mem =
5177       vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5178    if (mem == NULL)
5179       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
5180
5181    radv_device_memory_init(mem, device, NULL);
5182
5183    if (wsi_info) {
5184       if(wsi_info->implicit_sync)
5185          flags |= RADEON_FLAG_IMPLICIT_SYNC;
5186
5187       /* In case of prime, linear buffer is allocated in default heap which is VRAM.
5188        * Due to this when display is connected to iGPU and render on dGPU, ddx
5189        * function amdgpu_present_check_flip() fails due to which there is blit
5190        * instead of flip. Setting the flag RADEON_FLAG_GTT_WC allows kernel to
5191        * allocate GTT memory in supported hardware where GTT can be directly scanout.
5192        * Using wsi_info variable check to set the flag RADEON_FLAG_GTT_WC so that
5193        * only for memory allocated by driver this flag is set.
5194        */
5195       flags |= RADEON_FLAG_GTT_WC;
5196    }
5197
5198    if (dedicate_info) {
5199       mem->image = radv_image_from_handle(dedicate_info->image);
5200       mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
5201    } else {
5202       mem->image = NULL;
5203       mem->buffer = NULL;
5204    }
5205
5206    if (wsi_info && wsi_info->implicit_sync && mem->buffer) {
5207       /* Mark the linear prime buffer (aka the destination of the prime blit
5208        * as uncached.
5209        */
5210       flags |= RADEON_FLAG_VA_UNCACHED;
5211    }
5212
5213    float priority_float = 0.5;
5214    const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
5215       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
5216    if (priority_ext)
5217       priority_float = priority_ext->priority;
5218
5219    uint64_t replay_address = 0;
5220    const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info =
5221       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO);
5222    if (replay_info && replay_info->opaqueCaptureAddress)
5223       replay_address = replay_info->opaqueCaptureAddress;
5224
5225    unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
5226                             (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
5227
5228    mem->user_ptr = NULL;
5229
5230 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5231    mem->android_hardware_buffer = NULL;
5232 #endif
5233
5234    if (ahb_import_info) {
5235       result = radv_import_ahb_memory(device, mem, priority, ahb_import_info);
5236       if (result != VK_SUCCESS)
5237          goto fail;
5238    } else if (export_info && (export_info->handleTypes &
5239                               VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {
5240       result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo);
5241       if (result != VK_SUCCESS)
5242          goto fail;
5243    } else if (import_info) {
5244       assert(import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
5245              import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
5246       result = device->ws->buffer_from_fd(device->ws, import_info->fd, priority, &mem->bo, NULL);
5247       if (result != VK_SUCCESS) {
5248          goto fail;
5249       } else {
5250          close(import_info->fd);
5251       }
5252
5253       if (mem->image && mem->image->plane_count == 1 &&
5254           !vk_format_is_depth_or_stencil(mem->image->vk.format) && mem->image->info.samples == 1 &&
5255           mem->image->vk.tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
5256          struct radeon_bo_metadata metadata;
5257          device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);
5258
5259          struct radv_image_create_info create_info = {.no_metadata_planes = true,
5260                                                       .bo_metadata = &metadata};
5261
5262          /* This gives a basic ability to import radeonsi images
5263           * that don't have DCC. This is not guaranteed by any
5264           * spec and can be removed after we support modifiers. */
5265          result = radv_image_create_layout(device, create_info, NULL, mem->image);
5266          if (result != VK_SUCCESS) {
5267             device->ws->buffer_destroy(device->ws, mem->bo);
5268             goto fail;
5269          }
5270       }
5271    } else if (host_ptr_info) {
5272       assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
5273       result = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
5274                                            pAllocateInfo->allocationSize, priority, &mem->bo);
5275       if (result != VK_SUCCESS) {
5276          goto fail;
5277       } else {
5278          mem->user_ptr = host_ptr_info->pHostPointer;
5279       }
5280    } else {
5281       uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
5282       uint32_t heap_index;
5283
5284       heap_index =
5285          device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex]
5286             .heapIndex;
5287       domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
5288       flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
5289
5290       if (!import_info && (!export_info || !export_info->handleTypes)) {
5291          flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
5292          if (device->use_global_bo_list) {
5293             flags |= RADEON_FLAG_PREFER_LOCAL_BO;
5294          }
5295       }
5296
5297       const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO);
5298       if (flags_info && flags_info->flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)
5299          flags |= RADEON_FLAG_REPLAYABLE;
5300
5301       if (device->instance->zero_vram)
5302          flags |= RADEON_FLAG_ZERO_VRAM;
5303
5304       if (device->overallocation_disallowed) {
5305          uint64_t total_size =
5306             device->physical_device->memory_properties.memoryHeaps[heap_index].size;
5307
5308          mtx_lock(&device->overallocation_mutex);
5309          if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
5310             mtx_unlock(&device->overallocation_mutex);
5311             result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
5312             goto fail;
5313          }
5314          device->allocated_memory_size[heap_index] += alloc_size;
5315          mtx_unlock(&device->overallocation_mutex);
5316       }
5317
5318       result = device->ws->buffer_create(device->ws, alloc_size,
5319                                          device->physical_device->rad_info.max_alignment, domain,
5320                                          flags, priority, replay_address, &mem->bo);
5321
5322       if (result != VK_SUCCESS) {
5323          if (device->overallocation_disallowed) {
5324             mtx_lock(&device->overallocation_mutex);
5325             device->allocated_memory_size[heap_index] -= alloc_size;
5326             mtx_unlock(&device->overallocation_mutex);
5327          }
5328          goto fail;
5329       }
5330
5331       mem->heap_index = heap_index;
5332       mem->alloc_size = alloc_size;
5333    }
5334
5335    if (!wsi_info) {
5336       if (device->use_global_bo_list) {
5337          result = device->ws->buffer_make_resident(device->ws, mem->bo, true);
5338          if (result != VK_SUCCESS)
5339             goto fail;
5340       }
5341    }
5342
5343    *pMem = radv_device_memory_to_handle(mem);
5344
5345    return VK_SUCCESS;
5346
5347 fail:
5348    radv_free_memory(device, pAllocator, mem);
5349
5350    return result;
5351 }
5352
5353 VKAPI_ATTR VkResult VKAPI_CALL
5354 radv_AllocateMemory(VkDevice _device, const VkMemoryAllocateInfo *pAllocateInfo,
5355                     const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
5356 {
5357    RADV_FROM_HANDLE(radv_device, device, _device);
5358    return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
5359 }
5360
5361 VKAPI_ATTR void VKAPI_CALL
5362 radv_FreeMemory(VkDevice _device, VkDeviceMemory _mem, const VkAllocationCallbacks *pAllocator)
5363 {
5364    RADV_FROM_HANDLE(radv_device, device, _device);
5365    RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
5366
5367    radv_free_memory(device, pAllocator, mem);
5368 }
5369
5370 VKAPI_ATTR VkResult VKAPI_CALL
5371 radv_MapMemory(VkDevice _device, VkDeviceMemory _memory, VkDeviceSize offset, VkDeviceSize size,
5372                VkMemoryMapFlags flags, void **ppData)
5373 {
5374    RADV_FROM_HANDLE(radv_device, device, _device);
5375    RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5376
5377    if (mem->user_ptr)
5378       *ppData = mem->user_ptr;
5379    else
5380       *ppData = device->ws->buffer_map(mem->bo);
5381
5382    if (*ppData) {
5383       *ppData = (uint8_t *)*ppData + offset;
5384       return VK_SUCCESS;
5385    }
5386
5387    return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
5388 }
5389
5390 VKAPI_ATTR void VKAPI_CALL
5391 radv_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
5392 {
5393    RADV_FROM_HANDLE(radv_device, device, _device);
5394    RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5395
5396    if (mem->user_ptr == NULL)
5397       device->ws->buffer_unmap(mem->bo);
5398 }
5399
5400 VKAPI_ATTR VkResult VKAPI_CALL
5401 radv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
5402                              const VkMappedMemoryRange *pMemoryRanges)
5403 {
5404    return VK_SUCCESS;
5405 }
5406
5407 VKAPI_ATTR VkResult VKAPI_CALL
5408 radv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
5409                                   const VkMappedMemoryRange *pMemoryRanges)
5410 {
5411    return VK_SUCCESS;
5412 }
5413
5414 static void
5415 radv_get_buffer_memory_requirements(struct radv_device *device, VkDeviceSize size,
5416                                     VkBufferCreateFlags flags, VkBufferCreateFlags usage,
5417                                     VkMemoryRequirements2 *pMemoryRequirements)
5418 {
5419    pMemoryRequirements->memoryRequirements.memoryTypeBits =
5420       (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5421
5422    if (flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
5423       pMemoryRequirements->memoryRequirements.alignment = 4096;
5424    else
5425       pMemoryRequirements->memoryRequirements.alignment = 16;
5426
5427    /* Top level acceleration structures need the bottom 6 bits to store
5428     * the root ids of instances. The hardware also needs bvh nodes to
5429     * be 64 byte aligned.
5430     */
5431    if (usage & VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR)
5432       pMemoryRequirements->memoryRequirements.alignment =
5433          MAX2(pMemoryRequirements->memoryRequirements.alignment, 64);
5434
5435    pMemoryRequirements->memoryRequirements.size =
5436       align64(size, pMemoryRequirements->memoryRequirements.alignment);
5437
5438    vk_foreach_struct(ext, pMemoryRequirements->pNext)
5439    {
5440       switch (ext->sType) {
5441       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5442          VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
5443          req->requiresDedicatedAllocation = false;
5444          req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5445          break;
5446       }
5447       default:
5448          break;
5449       }
5450    }
5451 }
5452
5453 VKAPI_ATTR void VKAPI_CALL
5454 radv_GetBufferMemoryRequirements2(VkDevice _device, const VkBufferMemoryRequirementsInfo2 *pInfo,
5455                                   VkMemoryRequirements2 *pMemoryRequirements)
5456 {
5457    RADV_FROM_HANDLE(radv_device, device, _device);
5458    RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
5459
5460    radv_get_buffer_memory_requirements(device, buffer->vk.size, buffer->vk.create_flags,
5461                                        buffer->vk.usage, pMemoryRequirements);
5462 }
5463
5464 VKAPI_ATTR void VKAPI_CALL
5465 radv_GetDeviceBufferMemoryRequirements(VkDevice _device,
5466                                        const VkDeviceBufferMemoryRequirements *pInfo,
5467                                        VkMemoryRequirements2 *pMemoryRequirements)
5468 {
5469    RADV_FROM_HANDLE(radv_device, device, _device);
5470
5471    radv_get_buffer_memory_requirements(device, pInfo->pCreateInfo->size, pInfo->pCreateInfo->flags,
5472                                        pInfo->pCreateInfo->usage, pMemoryRequirements);
5473 }
5474
5475 VKAPI_ATTR void VKAPI_CALL
5476 radv_GetImageMemoryRequirements2(VkDevice _device, const VkImageMemoryRequirementsInfo2 *pInfo,
5477                                  VkMemoryRequirements2 *pMemoryRequirements)
5478 {
5479    RADV_FROM_HANDLE(radv_device, device, _device);
5480    RADV_FROM_HANDLE(radv_image, image, pInfo->image);
5481
5482    pMemoryRequirements->memoryRequirements.memoryTypeBits =
5483       (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5484
5485    pMemoryRequirements->memoryRequirements.size = image->size;
5486    pMemoryRequirements->memoryRequirements.alignment = image->alignment;
5487
5488    vk_foreach_struct(ext, pMemoryRequirements->pNext)
5489    {
5490       switch (ext->sType) {
5491       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5492          VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
5493          req->requiresDedicatedAllocation =
5494             image->shareable && image->vk.tiling != VK_IMAGE_TILING_LINEAR;
5495          req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5496          break;
5497       }
5498       default:
5499          break;
5500       }
5501    }
5502 }
5503
5504 VKAPI_ATTR void VKAPI_CALL
5505 radv_GetDeviceImageMemoryRequirements(VkDevice device,
5506                                       const VkDeviceImageMemoryRequirements *pInfo,
5507                                       VkMemoryRequirements2 *pMemoryRequirements)
5508 {
5509    UNUSED VkResult result;
5510    VkImage image;
5511
5512    /* Determining the image size/alignment require to create a surface, which is complicated without
5513     * creating an image.
5514     * TODO: Avoid creating an image.
5515     */
5516    result = radv_CreateImage(device, pInfo->pCreateInfo, NULL, &image);
5517    assert(result == VK_SUCCESS);
5518
5519    VkImageMemoryRequirementsInfo2 info2 = {
5520       .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
5521       .image = image,
5522    };
5523
5524    radv_GetImageMemoryRequirements2(device, &info2, pMemoryRequirements);
5525
5526    radv_DestroyImage(device, image, NULL);
5527 }
5528
5529 VKAPI_ATTR void VKAPI_CALL
5530 radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory,
5531                                VkDeviceSize *pCommittedMemoryInBytes)
5532 {
5533    *pCommittedMemoryInBytes = 0;
5534 }
5535
5536 VKAPI_ATTR VkResult VKAPI_CALL
5537 radv_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount,
5538                        const VkBindBufferMemoryInfo *pBindInfos)
5539 {
5540    RADV_FROM_HANDLE(radv_device, device, _device);
5541
5542    for (uint32_t i = 0; i < bindInfoCount; ++i) {
5543       RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5544       RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
5545
5546       if (mem->alloc_size) {
5547          VkBufferMemoryRequirementsInfo2 info = {
5548             .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
5549             .buffer = pBindInfos[i].buffer,
5550          };
5551          VkMemoryRequirements2 reqs = {
5552             .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
5553          };
5554
5555          radv_GetBufferMemoryRequirements2(_device, &info, &reqs);
5556
5557          if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) {
5558             return vk_errorf(device, VK_ERROR_UNKNOWN,
5559                              "Device memory object too small for the buffer.\n");
5560          }
5561       }
5562
5563       buffer->bo = mem->bo;
5564       buffer->offset = pBindInfos[i].memoryOffset;
5565    }
5566    return VK_SUCCESS;
5567 }
5568
5569 VKAPI_ATTR VkResult VKAPI_CALL
5570 radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount,
5571                       const VkBindImageMemoryInfo *pBindInfos)
5572 {
5573    RADV_FROM_HANDLE(radv_device, device, _device);
5574
5575    for (uint32_t i = 0; i < bindInfoCount; ++i) {
5576       RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5577       RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
5578
5579       if (mem->alloc_size) {
5580          VkImageMemoryRequirementsInfo2 info = {
5581             .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
5582             .image = pBindInfos[i].image,
5583          };
5584          VkMemoryRequirements2 reqs = {
5585             .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
5586          };
5587
5588          radv_GetImageMemoryRequirements2(_device, &info, &reqs);
5589
5590          if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) {
5591             return vk_errorf(device, VK_ERROR_UNKNOWN,
5592                              "Device memory object too small for the image.\n");
5593          }
5594       }
5595
5596       image->bo = mem->bo;
5597       image->offset = pBindInfos[i].memoryOffset;
5598    }
5599    return VK_SUCCESS;
5600 }
5601
5602 static void
5603 radv_destroy_event(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
5604                    struct radv_event *event)
5605 {
5606    if (event->bo)
5607       device->ws->buffer_destroy(device->ws, event->bo);
5608
5609    vk_object_base_finish(&event->base);
5610    vk_free2(&device->vk.alloc, pAllocator, event);
5611 }
5612
5613 VKAPI_ATTR VkResult VKAPI_CALL
5614 radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo,
5615                  const VkAllocationCallbacks *pAllocator, VkEvent *pEvent)
5616 {
5617    RADV_FROM_HANDLE(radv_device, device, _device);
5618    enum radeon_bo_domain bo_domain;
5619    enum radeon_bo_flag bo_flags;
5620    struct radv_event *event;
5621    VkResult result;
5622
5623    event = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8,
5624                      VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5625    if (!event)
5626       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
5627
5628    vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);
5629
5630    if (pCreateInfo->flags & VK_EVENT_CREATE_DEVICE_ONLY_BIT) {
5631       bo_domain = RADEON_DOMAIN_VRAM;
5632       bo_flags = RADEON_FLAG_NO_CPU_ACCESS;
5633    } else {
5634       bo_domain = RADEON_DOMAIN_GTT;
5635       bo_flags = RADEON_FLAG_CPU_ACCESS;
5636    }
5637
5638    result = device->ws->buffer_create(
5639       device->ws, 8, 8, bo_domain,
5640       RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_NO_INTERPROCESS_SHARING | bo_flags,
5641       RADV_BO_PRIORITY_FENCE, 0, &event->bo);
5642    if (result != VK_SUCCESS) {
5643       radv_destroy_event(device, pAllocator, event);
5644       return vk_error(device, result);
5645    }
5646
5647    if (!(pCreateInfo->flags & VK_EVENT_CREATE_DEVICE_ONLY_BIT)) {
5648       event->map = (uint64_t *)device->ws->buffer_map(event->bo);
5649       if (!event->map) {
5650          radv_destroy_event(device, pAllocator, event);
5651          return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
5652       }
5653    }
5654
5655    *pEvent = radv_event_to_handle(event);
5656
5657    return VK_SUCCESS;
5658 }
5659
5660 VKAPI_ATTR void VKAPI_CALL
5661 radv_DestroyEvent(VkDevice _device, VkEvent _event, const VkAllocationCallbacks *pAllocator)
5662 {
5663    RADV_FROM_HANDLE(radv_device, device, _device);
5664    RADV_FROM_HANDLE(radv_event, event, _event);
5665
5666    if (!event)
5667       return;
5668
5669    radv_destroy_event(device, pAllocator, event);
5670 }
5671
5672 VKAPI_ATTR VkResult VKAPI_CALL
5673 radv_GetEventStatus(VkDevice _device, VkEvent _event)
5674 {
5675    RADV_FROM_HANDLE(radv_device, device, _device);
5676    RADV_FROM_HANDLE(radv_event, event, _event);
5677
5678    if (vk_device_is_lost(&device->vk))
5679       return VK_ERROR_DEVICE_LOST;
5680
5681    if (*event->map == 1)
5682       return VK_EVENT_SET;
5683    return VK_EVENT_RESET;
5684 }
5685
5686 VKAPI_ATTR VkResult VKAPI_CALL
5687 radv_SetEvent(VkDevice _device, VkEvent _event)
5688 {
5689    RADV_FROM_HANDLE(radv_event, event, _event);
5690    *event->map = 1;
5691
5692    return VK_SUCCESS;
5693 }
5694
5695 VKAPI_ATTR VkResult VKAPI_CALL
5696 radv_ResetEvent(VkDevice _device, VkEvent _event)
5697 {
5698    RADV_FROM_HANDLE(radv_event, event, _event);
5699    *event->map = 0;
5700
5701    return VK_SUCCESS;
5702 }
5703
5704 void
5705 radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device,
5706                  struct radeon_winsys_bo *bo, uint64_t size,
5707                  uint64_t offset)
5708 {
5709    VkBufferCreateInfo createInfo = {
5710       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
5711       .size = size,
5712    };
5713
5714    vk_buffer_init(&device->vk, &buffer->vk, &createInfo);
5715
5716    buffer->bo = bo;
5717    buffer->offset = offset;
5718 }
5719
5720 void
5721 radv_buffer_finish(struct radv_buffer *buffer)
5722 {
5723    vk_buffer_finish(&buffer->vk);
5724 }
5725
5726 static void
5727 radv_destroy_buffer(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
5728                     struct radv_buffer *buffer)
5729 {
5730    if ((buffer->vk.create_flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo)
5731       device->ws->buffer_destroy(device->ws, buffer->bo);
5732
5733    radv_buffer_finish(buffer);
5734    vk_free2(&device->vk.alloc, pAllocator, buffer);
5735 }
5736
5737 VKAPI_ATTR VkResult VKAPI_CALL
5738 radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo,
5739                   const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer)
5740 {
5741    RADV_FROM_HANDLE(radv_device, device, _device);
5742    struct radv_buffer *buffer;
5743
5744    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
5745
5746    buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
5747                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5748    if (buffer == NULL)
5749       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
5750
5751    vk_buffer_init(&device->vk, &buffer->vk, pCreateInfo);
5752    buffer->bo = NULL;
5753    buffer->offset = 0;
5754
5755    if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
5756       enum radeon_bo_flag flags = RADEON_FLAG_VIRTUAL;
5757       if (pCreateInfo->flags & VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)
5758          flags |= RADEON_FLAG_REPLAYABLE;
5759
5760       uint64_t replay_address = 0;
5761       const VkBufferOpaqueCaptureAddressCreateInfo *replay_info =
5762          vk_find_struct_const(pCreateInfo->pNext, BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO);
5763       if (replay_info && replay_info->opaqueCaptureAddress)
5764          replay_address = replay_info->opaqueCaptureAddress;
5765
5766       VkResult result =
5767          device->ws->buffer_create(device->ws, align64(buffer->vk.size, 4096), 4096, 0, flags,
5768                                    RADV_BO_PRIORITY_VIRTUAL, replay_address, &buffer->bo);
5769       if (result != VK_SUCCESS) {
5770          radv_destroy_buffer(device, pAllocator, buffer);
5771          return vk_error(device, result);
5772       }
5773    }
5774
5775    *pBuffer = radv_buffer_to_handle(buffer);
5776
5777    return VK_SUCCESS;
5778 }
5779
5780 VKAPI_ATTR void VKAPI_CALL
5781 radv_DestroyBuffer(VkDevice _device, VkBuffer _buffer, const VkAllocationCallbacks *pAllocator)
5782 {
5783    RADV_FROM_HANDLE(radv_device, device, _device);
5784    RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
5785
5786    if (!buffer)
5787       return;
5788
5789    radv_destroy_buffer(device, pAllocator, buffer);
5790 }
5791
5792 VKAPI_ATTR VkDeviceAddress VKAPI_CALL
5793 radv_GetBufferDeviceAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)
5794 {
5795    RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
5796    return radv_buffer_get_va(buffer->bo) + buffer->offset;
5797 }
5798
5799 VKAPI_ATTR uint64_t VKAPI_CALL
5800 radv_GetBufferOpaqueCaptureAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)
5801 {
5802    RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
5803    return buffer->bo ? radv_buffer_get_va(buffer->bo) + buffer->offset : 0;
5804 }
5805
5806 VKAPI_ATTR uint64_t VKAPI_CALL
5807 radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,
5808                                          const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo)
5809 {
5810    RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory);
5811    return radv_buffer_get_va(mem->bo);
5812 }
5813
5814 static inline unsigned
5815 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
5816 {
5817    if (stencil)
5818       return plane->surface.u.legacy.zs.stencil_tiling_index[level];
5819    else
5820       return plane->surface.u.legacy.tiling_index[level];
5821 }
5822
5823 static uint32_t
5824 radv_surface_max_layer_count(struct radv_image_view *iview)
5825 {
5826    return iview->vk.view_type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth
5827                                                        : (iview->vk.base_array_layer + iview->vk.layer_count);
5828 }
5829
5830 static unsigned
5831 get_dcc_max_uncompressed_block_size(const struct radv_device *device,
5832                                     const struct radv_image_view *iview)
5833 {
5834    if (device->physical_device->rad_info.gfx_level < GFX10 && iview->image->info.samples > 1) {
5835       if (iview->image->planes[0].surface.bpe == 1)
5836          return V_028C78_MAX_BLOCK_SIZE_64B;
5837       else if (iview->image->planes[0].surface.bpe == 2)
5838          return V_028C78_MAX_BLOCK_SIZE_128B;
5839    }
5840
5841    return V_028C78_MAX_BLOCK_SIZE_256B;
5842 }
5843
5844 static unsigned
5845 get_dcc_min_compressed_block_size(const struct radv_device *device)
5846 {
5847    if (!device->physical_device->rad_info.has_dedicated_vram) {
5848       /* amdvlk: [min-compressed-block-size] should be set to 32 for
5849        * dGPU and 64 for APU because all of our APUs to date use
5850        * DIMMs which have a request granularity size of 64B while all
5851        * other chips have a 32B request size.
5852        */
5853       return V_028C78_MIN_BLOCK_SIZE_64B;
5854    }
5855
5856    return V_028C78_MIN_BLOCK_SIZE_32B;
5857 }
5858
5859 static uint32_t
5860 radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview)
5861 {
5862    unsigned max_uncompressed_block_size = get_dcc_max_uncompressed_block_size(device, iview);
5863    unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device);
5864    unsigned max_compressed_block_size;
5865    unsigned independent_128b_blocks;
5866    unsigned independent_64b_blocks;
5867
5868    if (!radv_dcc_enabled(iview->image, iview->vk.base_mip_level))
5869       return 0;
5870
5871    /* For GFX9+ ac_surface computes values for us (except min_compressed
5872     * and max_uncompressed) */
5873    if (device->physical_device->rad_info.gfx_level >= GFX9) {
5874       max_compressed_block_size =
5875          iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size;
5876       independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks;
5877       independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks;
5878    } else {
5879       independent_128b_blocks = 0;
5880
5881       if (iview->image->vk.usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
5882                                     VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
5883          /* If this DCC image is potentially going to be used in texture
5884           * fetches, we need some special settings.
5885           */
5886          independent_64b_blocks = 1;
5887          max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
5888       } else {
5889          /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
5890           * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
5891           * big as possible for better compression state.
5892           */
5893          independent_64b_blocks = 0;
5894          max_compressed_block_size = max_uncompressed_block_size;
5895       }
5896    }
5897
5898    uint32_t result = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
5899                      S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
5900                      S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
5901                      S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
5902
5903    if (device->physical_device->rad_info.gfx_level >= GFX11) {
5904       result |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(independent_128b_blocks) |
5905                 S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) |
5906                 S_028C78_FDCC_ENABLE(radv_dcc_enabled(iview->image, iview->vk.base_mip_level));
5907    } else {
5908       result |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX10(independent_128b_blocks);
5909    }
5910
5911    return result;
5912 }
5913
5914 void
5915 radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
5916                               struct radv_image_view *iview)
5917 {
5918    const struct util_format_description *desc;
5919    unsigned ntype, format, swap, endian;
5920    unsigned blend_clamp = 0, blend_bypass = 0;
5921    uint64_t va;
5922    const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
5923    const struct radeon_surf *surf = &plane->surface;
5924
5925    desc = vk_format_description(iview->vk.format);
5926
5927    memset(cb, 0, sizeof(*cb));
5928
5929    /* Intensity is implemented as Red, so treat it that way. */
5930    if (device->physical_device->rad_info.gfx_level >= GFX11)
5931       cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX11(desc->swizzle[3] == PIPE_SWIZZLE_1);
5932    else
5933       cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX6(desc->swizzle[3] == PIPE_SWIZZLE_1);
5934
5935    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
5936
5937    cb->cb_color_base = va >> 8;
5938
5939    if (device->physical_device->rad_info.gfx_level >= GFX9) {
5940       if (device->physical_device->rad_info.gfx_level >= GFX11) {
5941          cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
5942                                  S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
5943       } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
5944          cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
5945                                  S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
5946                                  S_028EE0_CMASK_PIPE_ALIGNED(1) |
5947                                  S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
5948       } else {
5949          struct gfx9_surf_meta_flags meta = {
5950             .rb_aligned = 1,
5951             .pipe_aligned = 1,
5952          };
5953
5954          if (surf->meta_offset)
5955             meta = surf->u.gfx9.color.dcc;
5956
5957          cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
5958                                 S_028C74_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
5959                                 S_028C74_RB_ALIGNED(meta.rb_aligned) |
5960                                 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
5961          cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.epitch);
5962       }
5963
5964       cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
5965       cb->cb_color_base |= surf->tile_swizzle;
5966    } else {
5967       const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->vk.base_mip_level];
5968       unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
5969
5970       cb->cb_color_base += level_info->offset_256B;
5971       if (level_info->mode == RADEON_SURF_MODE_2D)
5972          cb->cb_color_base |= surf->tile_swizzle;
5973
5974       pitch_tile_max = level_info->nblk_x / 8 - 1;
5975       slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
5976       tile_mode_index = si_tile_mode_index(plane, iview->vk.base_mip_level, false);
5977
5978       cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
5979       cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
5980       cb->cb_color_cmask_slice = surf->u.legacy.color.cmask_slice_tile_max;
5981
5982       cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
5983
5984       if (radv_image_has_fmask(iview->image)) {
5985          if (device->physical_device->rad_info.gfx_level >= GFX7)
5986             cb->cb_color_pitch |=
5987                S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
5988          cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index);
5989          cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max);
5990       } else {
5991          /* This must be set for fast clear to work without FMASK. */
5992          if (device->physical_device->rad_info.gfx_level >= GFX7)
5993             cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
5994          cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
5995          cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
5996       }
5997    }
5998
5999    /* CMASK variables */
6000    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6001    va += surf->cmask_offset;
6002    cb->cb_color_cmask = va >> 8;
6003
6004    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6005    va += surf->meta_offset;
6006
6007    if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) &&
6008        device->physical_device->rad_info.gfx_level <= GFX8)
6009       va += plane->surface.u.legacy.color.dcc_level[iview->vk.base_mip_level].dcc_offset;
6010
6011    unsigned dcc_tile_swizzle = surf->tile_swizzle;
6012    dcc_tile_swizzle &= ((1 << surf->meta_alignment_log2) - 1) >> 8;
6013
6014    cb->cb_dcc_base = va >> 8;
6015    cb->cb_dcc_base |= dcc_tile_swizzle;
6016
6017    /* GFX10 field has the same base shift as the GFX6 field. */
6018    uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6019    cb->cb_color_view =
6020       S_028C6C_SLICE_START(iview->vk.base_array_layer) | S_028C6C_SLICE_MAX_GFX10(max_slice);
6021
6022    if (iview->image->info.samples > 1) {
6023       unsigned log_samples = util_logbase2(iview->image->info.samples);
6024
6025       if (device->physical_device->rad_info.gfx_level >= GFX11)
6026          cb->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_samples);
6027       else
6028          cb->cb_color_attrib |=
6029             S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_samples);
6030    }
6031
6032    if (radv_image_has_fmask(iview->image)) {
6033       va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->fmask_offset;
6034       cb->cb_color_fmask = va >> 8;
6035       cb->cb_color_fmask |= surf->fmask_tile_swizzle;
6036    } else {
6037       cb->cb_color_fmask = cb->cb_color_base;
6038    }
6039
6040    ntype = radv_translate_color_numformat(iview->vk.format, desc,
6041                                           vk_format_get_first_non_void_channel(iview->vk.format));
6042    format = radv_translate_colorformat(iview->vk.format);
6043    assert(format != V_028C70_COLOR_INVALID);
6044
6045    swap = radv_translate_colorswap(iview->vk.format, false);
6046    endian = radv_colorformat_endian_swap(format);
6047
6048    /* blend clamp should be set for all NORM/SRGB types */
6049    if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||
6050        ntype == V_028C70_NUMBER_SRGB)
6051       blend_clamp = 1;
6052
6053    /* set blend bypass according to docs if SINT/UINT or
6054       8/24 COLOR variants */
6055    if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
6056        format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
6057        format == V_028C70_COLOR_X24_8_32_FLOAT) {
6058       blend_clamp = 0;
6059       blend_bypass = 1;
6060    }
6061 #if 0
6062         if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
6063             (format == V_028C70_COLOR_8 ||
6064              format == V_028C70_COLOR_8_8 ||
6065              format == V_028C70_COLOR_8_8_8_8))
6066                 ->color_is_int8 = true;
6067 #endif
6068    cb->cb_color_info =
6069       S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) |
6070       S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) |
6071       S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM &&
6072                           ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 &&
6073                           format != V_028C70_COLOR_24_8) |
6074       S_028C70_NUMBER_TYPE(ntype);
6075
6076    if (device->physical_device->rad_info.gfx_level >= GFX11)
6077       cb->cb_color_info |= S_028C70_FORMAT_GFX11(format);
6078    else
6079       cb->cb_color_info |= S_028C70_FORMAT_GFX6(format) | S_028C70_ENDIAN(endian);
6080
6081    if (radv_image_has_fmask(iview->image)) {
6082       cb->cb_color_info |= S_028C70_COMPRESSION(1);
6083       if (device->physical_device->rad_info.gfx_level == GFX6) {
6084          unsigned fmask_bankh = util_logbase2(surf->u.legacy.color.fmask.bankh);
6085          cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
6086       }
6087
6088       if (radv_image_is_tc_compat_cmask(iview->image)) {
6089          /* Allow the texture block to read FMASK directly
6090           * without decompressing it. This bit must be cleared
6091           * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
6092           * otherwise the operation doesn't happen.
6093           */
6094          cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
6095
6096          if (device->physical_device->rad_info.gfx_level == GFX8) {
6097             /* Set CMASK into a tiling format that allows
6098              * the texture block to read it.
6099              */
6100             cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
6101          }
6102       }
6103    }
6104
6105    if (radv_image_has_cmask(iview->image) &&
6106        !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
6107       cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
6108
6109    if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && !iview->disable_dcc_mrt &&
6110        device->physical_device->rad_info.gfx_level < GFX11)
6111       cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
6112
6113    cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
6114
6115    /* This must be set for fast clear to work without FMASK. */
6116    if (!radv_image_has_fmask(iview->image) && device->physical_device->rad_info.gfx_level == GFX6) {
6117       unsigned bankh = util_logbase2(surf->u.legacy.bankh);
6118       cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
6119    }
6120
6121    if (device->physical_device->rad_info.gfx_level >= GFX9) {
6122       unsigned mip0_depth = iview->image->vk.image_type == VK_IMAGE_TYPE_3D
6123                                ? (iview->extent.depth - 1)
6124                                : (iview->image->info.array_size - 1);
6125       unsigned width =
6126          vk_format_get_plane_width(iview->image->vk.format, iview->plane_id, iview->extent.width);
6127       unsigned height =
6128          vk_format_get_plane_height(iview->image->vk.format, iview->plane_id, iview->extent.height);
6129
6130       if (device->physical_device->rad_info.gfx_level >= GFX10) {
6131          cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->vk.base_mip_level);
6132
6133          cb->cb_color_attrib3 |=
6134             S_028EE0_MIP0_DEPTH(mip0_depth) | S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
6135             S_028EE0_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level >= GFX11 ? 0 : 1);
6136       } else {
6137          cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->vk.base_mip_level);
6138          cb->cb_color_attrib |=
6139             S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
6140       }
6141
6142       cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) | S_028C68_MIP0_HEIGHT(height - 1) |
6143                              S_028C68_MAX_MIP(iview->image->info.levels - 1);
6144    }
6145 }
6146
6147 static unsigned
6148 radv_calc_decompress_on_z_planes(struct radv_device *device, struct radv_image_view *iview)
6149 {
6150    unsigned max_zplanes = 0;
6151
6152    assert(radv_image_is_tc_compat_htile(iview->image));
6153
6154    if (device->physical_device->rad_info.gfx_level >= GFX9) {
6155       /* Default value for 32-bit depth surfaces. */
6156       max_zplanes = 4;
6157
6158       if (iview->vk.format == VK_FORMAT_D16_UNORM && iview->image->info.samples > 1)
6159          max_zplanes = 2;
6160
6161       /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
6162       if (device->physical_device->rad_info.has_two_planes_iterate256_bug &&
6163           radv_image_get_iterate256(device, iview->image) &&
6164           !radv_image_tile_stencil_disabled(device, iview->image) &&
6165           iview->image->info.samples == 4) {
6166          max_zplanes = 1;
6167       }
6168
6169       max_zplanes = max_zplanes + 1;
6170    } else {
6171       if (iview->vk.format == VK_FORMAT_D16_UNORM) {
6172          /* Do not enable Z plane compression for 16-bit depth
6173           * surfaces because isn't supported on GFX8. Only
6174           * 32-bit depth surfaces are supported by the hardware.
6175           * This allows to maintain shader compatibility and to
6176           * reduce the number of depth decompressions.
6177           */
6178          max_zplanes = 1;
6179       } else {
6180          if (iview->image->info.samples <= 1)
6181             max_zplanes = 5;
6182          else if (iview->image->info.samples <= 4)
6183             max_zplanes = 3;
6184          else
6185             max_zplanes = 2;
6186       }
6187    }
6188
6189    return max_zplanes;
6190 }
6191
6192 void
6193 radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
6194                             struct radv_ds_buffer_info *ds)
6195 {
6196    const struct radeon_surf *surf = &image->planes[0].surface;
6197
6198    assert(image->vk.format == VK_FORMAT_D16_UNORM);
6199    memset(ds, 0, sizeof(*ds));
6200
6201    ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
6202
6203    ds->db_z_info = S_028038_FORMAT(V_028040_Z_16) |
6204                    S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
6205                    S_028038_ZRANGE_PRECISION(1) |
6206                    S_028038_TILE_SURFACE_ENABLE(1);
6207    ds->db_stencil_info = S_02803C_FORMAT(V_028044_STENCIL_INVALID);
6208
6209    ds->db_depth_size = S_02801C_X_MAX(image->info.width - 1) |
6210                        S_02801C_Y_MAX(image->info.height - 1);
6211
6212    ds->db_htile_data_base = radv_buffer_get_va(htile_buffer->bo) >> 8;
6213    ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1) |
6214                           S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
6215 }
6216
6217 void
6218 radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
6219                            struct radv_image_view *iview)
6220 {
6221    unsigned level = iview->vk.base_mip_level;
6222    unsigned format, stencil_format;
6223    uint64_t va, s_offs, z_offs;
6224    bool stencil_only = iview->image->vk.format == VK_FORMAT_S8_UINT;
6225    const struct radv_image_plane *plane = &iview->image->planes[0];
6226    const struct radeon_surf *surf = &plane->surface;
6227
6228    assert(vk_format_get_plane_count(iview->image->vk.format) == 1);
6229
6230    memset(ds, 0, sizeof(*ds));
6231    if (!device->instance->absolute_depth_bias) {
6232       switch (iview->image->vk.format) {
6233       case VK_FORMAT_D24_UNORM_S8_UINT:
6234       case VK_FORMAT_X8_D24_UNORM_PACK32:
6235          ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
6236          break;
6237       case VK_FORMAT_D16_UNORM:
6238       case VK_FORMAT_D16_UNORM_S8_UINT:
6239          ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
6240          break;
6241       case VK_FORMAT_D32_SFLOAT:
6242       case VK_FORMAT_D32_SFLOAT_S8_UINT:
6243          ds->pa_su_poly_offset_db_fmt_cntl =
6244             S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
6245          break;
6246       default:
6247          break;
6248       }
6249    }
6250
6251    format = radv_translate_dbformat(iview->image->vk.format);
6252    stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
6253
6254    uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6255    ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) |
6256                        S_028008_SLICE_MAX(max_slice);
6257    if (device->physical_device->rad_info.gfx_level >= GFX10) {
6258       ds->db_depth_view |= S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) |
6259                            S_028008_SLICE_MAX_HI(max_slice >> 11);
6260    }
6261
6262    ds->db_htile_data_base = 0;
6263    ds->db_htile_surface = 0;
6264
6265    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6266    s_offs = z_offs = va;
6267
6268    if (device->physical_device->rad_info.gfx_level >= GFX9) {
6269       assert(surf->u.gfx9.surf_offset == 0);
6270       s_offs += surf->u.gfx9.zs.stencil_offset;
6271
6272       ds->db_z_info = S_028038_FORMAT(format) |
6273                       S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
6274                       S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
6275                       S_028038_MAXMIP(iview->image->info.levels - 1) |
6276                       S_028038_ZRANGE_PRECISION(1) |
6277                       S_028040_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11);
6278       ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
6279                             S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) |
6280                             S_028044_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11);
6281
6282       if (device->physical_device->rad_info.gfx_level == GFX9) {
6283          ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch);
6284          ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch);
6285       }
6286
6287       ds->db_depth_view |= S_028008_MIPID(level);
6288       ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
6289                           S_02801C_Y_MAX(iview->image->info.height - 1);
6290
6291       if (radv_htile_enabled(iview->image, level)) {
6292          ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
6293
6294          if (radv_image_is_tc_compat_htile(iview->image)) {
6295             unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
6296
6297             ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
6298
6299             if (device->physical_device->rad_info.gfx_level >= GFX10) {
6300                bool iterate256 = radv_image_get_iterate256(device, iview->image);
6301
6302                ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
6303                ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
6304                ds->db_z_info |= S_028040_ITERATE_256(iterate256);
6305                ds->db_stencil_info |= S_028044_ITERATE_256(iterate256);
6306             } else {
6307                ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
6308                ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
6309             }
6310          }
6311
6312          if (radv_image_tile_stencil_disabled(device, iview->image)) {
6313             ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
6314          }
6315
6316          va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->meta_offset;
6317          ds->db_htile_data_base = va >> 8;
6318          ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
6319
6320          if (device->physical_device->rad_info.gfx_level == GFX9) {
6321             ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
6322          }
6323
6324          if (radv_image_has_vrs_htile(device, iview->image)) {
6325             ds->db_htile_surface |= S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
6326          }
6327       }
6328    } else {
6329       const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
6330
6331       if (stencil_only)
6332          level_info = &surf->u.legacy.zs.stencil_level[level];
6333
6334       z_offs += (uint64_t)surf->u.legacy.level[level].offset_256B * 256;
6335       s_offs += (uint64_t)surf->u.legacy.zs.stencil_level[level].offset_256B * 256;
6336
6337       ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
6338       ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
6339       ds->db_stencil_info = S_028044_FORMAT(stencil_format);
6340
6341       if (iview->image->info.samples > 1)
6342          ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
6343
6344       if (device->physical_device->rad_info.gfx_level >= GFX7) {
6345          struct radeon_info *info = &device->physical_device->rad_info;
6346          unsigned tiling_index = surf->u.legacy.tiling_index[level];
6347          unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level];
6348          unsigned macro_index = surf->u.legacy.macro_tile_index;
6349          unsigned tile_mode = info->si_tile_mode_array[tiling_index];
6350          unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
6351          unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
6352
6353          if (stencil_only)
6354             tile_mode = stencil_tile_mode;
6355
6356          ds->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
6357                               S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
6358                               S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
6359                               S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
6360                               S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
6361                               S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
6362          ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
6363          ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
6364       } else {
6365          unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
6366          ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
6367          tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
6368          ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
6369          if (stencil_only)
6370             ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
6371       }
6372
6373       ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
6374                           S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
6375       ds->db_depth_slice =
6376          S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
6377
6378       if (radv_htile_enabled(iview->image, level)) {
6379          ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
6380
6381          if (radv_image_tile_stencil_disabled(device, iview->image)) {
6382             ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
6383          }
6384
6385          va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->meta_offset;
6386          ds->db_htile_data_base = va >> 8;
6387          ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
6388
6389          if (radv_image_is_tc_compat_htile(iview->image)) {
6390             unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
6391
6392             ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
6393             ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
6394          }
6395       }
6396    }
6397
6398    ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
6399    ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
6400 }
6401
6402 static unsigned
6403 radv_tex_wrap(VkSamplerAddressMode address_mode)
6404 {
6405    switch (address_mode) {
6406    case VK_SAMPLER_ADDRESS_MODE_REPEAT:
6407       return V_008F30_SQ_TEX_WRAP;
6408    case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
6409       return V_008F30_SQ_TEX_MIRROR;
6410    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
6411       return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
6412    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
6413       return V_008F30_SQ_TEX_CLAMP_BORDER;
6414    case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
6415       return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
6416    default:
6417       unreachable("illegal tex wrap mode");
6418       break;
6419    }
6420 }
6421
6422 static unsigned
6423 radv_tex_compare(VkCompareOp op)
6424 {
6425    switch (op) {
6426    case VK_COMPARE_OP_NEVER:
6427       return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
6428    case VK_COMPARE_OP_LESS:
6429       return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
6430    case VK_COMPARE_OP_EQUAL:
6431       return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
6432    case VK_COMPARE_OP_LESS_OR_EQUAL:
6433       return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
6434    case VK_COMPARE_OP_GREATER:
6435       return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
6436    case VK_COMPARE_OP_NOT_EQUAL:
6437       return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
6438    case VK_COMPARE_OP_GREATER_OR_EQUAL:
6439       return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
6440    case VK_COMPARE_OP_ALWAYS:
6441       return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
6442    default:
6443       unreachable("illegal compare mode");
6444       break;
6445    }
6446 }
6447
6448 static unsigned
6449 radv_tex_filter(VkFilter filter, unsigned max_ansio)
6450 {
6451    switch (filter) {
6452    case VK_FILTER_NEAREST:
6453       return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT
6454                             : V_008F38_SQ_TEX_XY_FILTER_POINT);
6455    case VK_FILTER_LINEAR:
6456       return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR
6457                             : V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
6458    case VK_FILTER_CUBIC_IMG:
6459    default:
6460       fprintf(stderr, "illegal texture filter");
6461       return 0;
6462    }
6463 }
6464
6465 static unsigned
6466 radv_tex_mipfilter(VkSamplerMipmapMode mode)
6467 {
6468    switch (mode) {
6469    case VK_SAMPLER_MIPMAP_MODE_NEAREST:
6470       return V_008F38_SQ_TEX_Z_FILTER_POINT;
6471    case VK_SAMPLER_MIPMAP_MODE_LINEAR:
6472       return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
6473    default:
6474       return V_008F38_SQ_TEX_Z_FILTER_NONE;
6475    }
6476 }
6477
6478 static unsigned
6479 radv_tex_bordercolor(VkBorderColor bcolor)
6480 {
6481    switch (bcolor) {
6482    case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
6483    case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
6484       return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
6485    case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
6486    case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
6487       return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
6488    case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
6489    case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
6490       return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
6491    case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
6492    case VK_BORDER_COLOR_INT_CUSTOM_EXT:
6493       return V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
6494    default:
6495       break;
6496    }
6497    return 0;
6498 }
6499
6500 static unsigned
6501 radv_tex_aniso_filter(unsigned filter)
6502 {
6503    return MIN2(util_logbase2(filter), 4);
6504 }
6505
6506 static unsigned
6507 radv_tex_filter_mode(VkSamplerReductionMode mode)
6508 {
6509    switch (mode) {
6510    case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
6511       return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
6512    case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
6513       return V_008F30_SQ_IMG_FILTER_MODE_MIN;
6514    case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
6515       return V_008F30_SQ_IMG_FILTER_MODE_MAX;
6516    default:
6517       break;
6518    }
6519    return 0;
6520 }
6521
6522 static uint32_t
6523 radv_get_max_anisotropy(struct radv_device *device, const VkSamplerCreateInfo *pCreateInfo)
6524 {
6525    if (device->force_aniso >= 0)
6526       return device->force_aniso;
6527
6528    if (pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0f)
6529       return (uint32_t)pCreateInfo->maxAnisotropy;
6530
6531    return 0;
6532 }
6533
6534 static uint32_t
6535 radv_register_border_color(struct radv_device *device, VkClearColorValue value)
6536 {
6537    uint32_t slot;
6538
6539    mtx_lock(&device->border_color_data.mutex);
6540
6541    for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) {
6542       if (!device->border_color_data.used[slot]) {
6543          /* Copy to the GPU wrt endian-ness. */
6544          util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot], &value,
6545                                  sizeof(VkClearColorValue));
6546
6547          device->border_color_data.used[slot] = true;
6548          break;
6549       }
6550    }
6551
6552    mtx_unlock(&device->border_color_data.mutex);
6553
6554    return slot;
6555 }
6556
6557 static void
6558 radv_unregister_border_color(struct radv_device *device, uint32_t slot)
6559 {
6560    mtx_lock(&device->border_color_data.mutex);
6561
6562    device->border_color_data.used[slot] = false;
6563
6564    mtx_unlock(&device->border_color_data.mutex);
6565 }
6566
6567 static void
6568 radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler,
6569                   const VkSamplerCreateInfo *pCreateInfo)
6570 {
6571    uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
6572    uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
6573    bool compat_mode = device->physical_device->rad_info.gfx_level == GFX8 ||
6574                       device->physical_device->rad_info.gfx_level == GFX9;
6575    unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
6576    unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
6577    bool trunc_coord =
6578       pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST;
6579    bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
6580                             pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
6581                             pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
6582    VkBorderColor border_color =
6583       uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
6584    uint32_t border_color_ptr;
6585    bool disable_cube_wrap = pCreateInfo->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT;
6586
6587    const struct VkSamplerReductionModeCreateInfo *sampler_reduction =
6588       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);
6589    if (sampler_reduction)
6590       filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
6591
6592    if (pCreateInfo->compareEnable)
6593       depth_compare_func = radv_tex_compare(pCreateInfo->compareOp);
6594
6595    sampler->border_color_slot = RADV_BORDER_COLOR_COUNT;
6596
6597    if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ||
6598        border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
6599       const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
6600          vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
6601
6602       assert(custom_border_color);
6603
6604       sampler->border_color_slot =
6605          radv_register_border_color(device, custom_border_color->customBorderColor);
6606
6607       /* Did we fail to find a slot? */
6608       if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) {
6609          fprintf(stderr, "WARNING: no free border color slots, defaulting to TRANS_BLACK.\n");
6610          border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
6611       }
6612    }
6613
6614    /* If we don't have a custom color, set the ptr to 0 */
6615    border_color_ptr =
6616       sampler->border_color_slot != RADV_BORDER_COLOR_COUNT ? sampler->border_color_slot : 0;
6617
6618    sampler->state[0] =
6619       (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
6620        S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
6621        S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
6622        S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |
6623        S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
6624        S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |
6625        S_008F30_DISABLE_CUBE_WRAP(disable_cube_wrap) | S_008F30_COMPAT_MODE(compat_mode) |
6626        S_008F30_FILTER_MODE(filter_mode) | S_008F30_TRUNC_COORD(trunc_coord));
6627    sampler->state[1] = (S_008F34_MIN_LOD(radv_float_to_ufixed(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
6628                         S_008F34_MAX_LOD(radv_float_to_ufixed(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
6629                         S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
6630    sampler->state[2] = (S_008F38_LOD_BIAS(radv_float_to_sfixed(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
6631                         S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
6632                         S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
6633                         S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)));
6634    sampler->state[3] = S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color));
6635
6636    if (device->physical_device->rad_info.gfx_level >= GFX10) {
6637       sampler->state[2] |=
6638          S_008F38_ANISO_OVERRIDE_GFX10(device->instance->disable_aniso_single_level);
6639    } else {
6640       sampler->state[2] |=
6641          S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.gfx_level <= GFX8) |
6642          S_008F38_FILTER_PREC_FIX(1) |
6643          S_008F38_ANISO_OVERRIDE_GFX8(device->instance->disable_aniso_single_level &&
6644                                       device->physical_device->rad_info.gfx_level >= GFX8);
6645    }
6646
6647    if (device->physical_device->rad_info.gfx_level >= GFX11) {
6648       sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX11(border_color_ptr);
6649    } else {
6650       sampler->state[3] |= S_008F3C_BORDER_COLOR_PTR_GFX6(border_color_ptr);
6651    }
6652 }
6653
6654 VKAPI_ATTR VkResult VKAPI_CALL
6655 radv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo,
6656                    const VkAllocationCallbacks *pAllocator, VkSampler *pSampler)
6657 {
6658    RADV_FROM_HANDLE(radv_device, device, _device);
6659    struct radv_sampler *sampler;
6660
6661    const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
6662       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
6663
6664    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
6665
6666    sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
6667                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6668    if (!sampler)
6669       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
6670
6671    vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER);
6672
6673    radv_init_sampler(device, sampler, pCreateInfo);
6674
6675    sampler->ycbcr_sampler =
6676       ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion)
6677                        : NULL;
6678    *pSampler = radv_sampler_to_handle(sampler);
6679
6680    return VK_SUCCESS;
6681 }
6682
6683 VKAPI_ATTR void VKAPI_CALL
6684 radv_DestroySampler(VkDevice _device, VkSampler _sampler, const VkAllocationCallbacks *pAllocator)
6685 {
6686    RADV_FROM_HANDLE(radv_device, device, _device);
6687    RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
6688
6689    if (!sampler)
6690       return;
6691
6692    if (sampler->border_color_slot != RADV_BORDER_COLOR_COUNT)
6693       radv_unregister_border_color(device, sampler->border_color_slot);
6694
6695    vk_object_base_finish(&sampler->base);
6696    vk_free2(&device->vk.alloc, pAllocator, sampler);
6697 }
6698
6699 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
6700 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
6701 {
6702    /* For the full details on loader interface versioning, see
6703     * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
6704     * What follows is a condensed summary, to help you navigate the large and
6705     * confusing official doc.
6706     *
6707     *   - Loader interface v0 is incompatible with later versions. We don't
6708     *     support it.
6709     *
6710     *   - In loader interface v1:
6711     *       - The first ICD entrypoint called by the loader is
6712     *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
6713     *         entrypoint.
6714     *       - The ICD must statically expose no other Vulkan symbol unless it is
6715     *         linked with -Bsymbolic.
6716     *       - Each dispatchable Vulkan handle created by the ICD must be
6717     *         a pointer to a struct whose first member is VK_LOADER_DATA. The
6718     *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
6719     *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
6720     *         vkDestroySurfaceKHR(). The ICD must be capable of working with
6721     *         such loader-managed surfaces.
6722     *
6723     *    - Loader interface v2 differs from v1 in:
6724     *       - The first ICD entrypoint called by the loader is
6725     *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
6726     *         statically expose this entrypoint.
6727     *
6728     *    - Loader interface v3 differs from v2 in:
6729     *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
6730     *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
6731     *          because the loader no longer does so.
6732     *
6733     *    - Loader interface v4 differs from v3 in:
6734     *        - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
6735     *
6736     *    - Loader interface v5 differs from v4 in:
6737     *        - The ICD must support Vulkan API version 1.1 and must not return
6738     *          VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a
6739     *          Vulkan Loader with interface v4 or smaller is being used and the
6740     *          application provides an API version that is greater than 1.0.
6741     */
6742    *pSupportedVersion = MIN2(*pSupportedVersion, 5u);
6743    return VK_SUCCESS;
6744 }
6745
6746 VKAPI_ATTR VkResult VKAPI_CALL
6747 radv_GetMemoryFdKHR(VkDevice _device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFD)
6748 {
6749    RADV_FROM_HANDLE(radv_device, device, _device);
6750    RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
6751
6752    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
6753
6754    /* At the moment, we support only the below handle types. */
6755    assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
6756           pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
6757
6758    bool ret = radv_get_memory_fd(device, memory, pFD);
6759    if (ret == false)
6760       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6761    return VK_SUCCESS;
6762 }
6763
6764 static uint32_t
6765 radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev,
6766                                         enum radeon_bo_domain domains, enum radeon_bo_flag flags,
6767                                         enum radeon_bo_flag ignore_flags)
6768 {
6769    /* Don't count GTT/CPU as relevant:
6770     *
6771     * - We're not fully consistent between the two.
6772     * - Sometimes VRAM gets VRAM|GTT.
6773     */
6774    const enum radeon_bo_domain relevant_domains =
6775       RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA;
6776    uint32_t bits = 0;
6777    for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
6778       if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
6779          continue;
6780
6781       if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))
6782          continue;
6783
6784       bits |= 1u << i;
6785    }
6786
6787    return bits;
6788 }
6789
6790 static uint32_t
6791 radv_compute_valid_memory_types(struct radv_physical_device *dev, enum radeon_bo_domain domains,
6792                                 enum radeon_bo_flag flags)
6793 {
6794    enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);
6795    uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
6796
6797    if (!bits) {
6798       ignore_flags |= RADEON_FLAG_GTT_WC;
6799       bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
6800    }
6801
6802    if (!bits) {
6803       ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;
6804       bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
6805    }
6806
6807    return bits;
6808 }
6809 VKAPI_ATTR VkResult VKAPI_CALL
6810 radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType,
6811                               int fd, VkMemoryFdPropertiesKHR *pMemoryFdProperties)
6812 {
6813    RADV_FROM_HANDLE(radv_device, device, _device);
6814
6815    switch (handleType) {
6816    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
6817       enum radeon_bo_domain domains;
6818       enum radeon_bo_flag flags;
6819       if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
6820          return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
6821
6822       pMemoryFdProperties->memoryTypeBits =
6823          radv_compute_valid_memory_types(device->physical_device, domains, flags);
6824       return VK_SUCCESS;
6825    }
6826    default:
6827       /* The valid usage section for this function says:
6828        *
6829        *    "handleType must not be one of the handle types defined as
6830        *    opaque."
6831        *
6832        * So opaque handle types fall into the default "unsupported" case.
6833        */
6834       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
6835    }
6836 }
6837
6838 VKAPI_ATTR void VKAPI_CALL
6839 radv_GetDeviceGroupPeerMemoryFeatures(VkDevice device, uint32_t heapIndex,
6840                                       uint32_t localDeviceIndex, uint32_t remoteDeviceIndex,
6841                                       VkPeerMemoryFeatureFlags *pPeerMemoryFeatures)
6842 {
6843    assert(localDeviceIndex == remoteDeviceIndex);
6844
6845    *pPeerMemoryFeatures =
6846       VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
6847       VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
6848 }
6849
6850 static const VkTimeDomainEXT radv_time_domains[] = {
6851    VK_TIME_DOMAIN_DEVICE_EXT,
6852    VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
6853 #ifdef CLOCK_MONOTONIC_RAW
6854    VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
6855 #endif
6856 };
6857
6858 VKAPI_ATTR VkResult VKAPI_CALL
6859 radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,
6860                                                   uint32_t *pTimeDomainCount,
6861                                                   VkTimeDomainEXT *pTimeDomains)
6862 {
6863    int d;
6864    VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
6865
6866    for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
6867       vk_outarray_append_typed(VkTimeDomainEXT, &out, i)
6868       {
6869          *i = radv_time_domains[d];
6870       }
6871    }
6872
6873    return vk_outarray_status(&out);
6874 }
6875
6876 #ifndef _WIN32
6877 static uint64_t
6878 radv_clock_gettime(clockid_t clock_id)
6879 {
6880    struct timespec current;
6881    int ret;
6882
6883    ret = clock_gettime(clock_id, &current);
6884 #ifdef CLOCK_MONOTONIC_RAW
6885    if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
6886       ret = clock_gettime(CLOCK_MONOTONIC, &current);
6887 #endif
6888    if (ret < 0)
6889       return 0;
6890
6891    return (uint64_t)current.tv_sec * 1000000000ULL + current.tv_nsec;
6892 }
6893
6894 VKAPI_ATTR VkResult VKAPI_CALL
6895 radv_GetCalibratedTimestampsEXT(VkDevice _device, uint32_t timestampCount,
6896                                 const VkCalibratedTimestampInfoEXT *pTimestampInfos,
6897                                 uint64_t *pTimestamps, uint64_t *pMaxDeviation)
6898 {
6899    RADV_FROM_HANDLE(radv_device, device, _device);
6900    uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
6901    int d;
6902    uint64_t begin, end;
6903    uint64_t max_clock_period = 0;
6904
6905 #ifdef CLOCK_MONOTONIC_RAW
6906    begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
6907 #else
6908    begin = radv_clock_gettime(CLOCK_MONOTONIC);
6909 #endif
6910
6911    for (d = 0; d < timestampCount; d++) {
6912       switch (pTimestampInfos[d].timeDomain) {
6913       case VK_TIME_DOMAIN_DEVICE_EXT:
6914          pTimestamps[d] = device->ws->query_value(device->ws, RADEON_TIMESTAMP);
6915          uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
6916          max_clock_period = MAX2(max_clock_period, device_period);
6917          break;
6918       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
6919          pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
6920          max_clock_period = MAX2(max_clock_period, 1);
6921          break;
6922
6923 #ifdef CLOCK_MONOTONIC_RAW
6924       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
6925          pTimestamps[d] = begin;
6926          break;
6927 #endif
6928       default:
6929          pTimestamps[d] = 0;
6930          break;
6931       }
6932    }
6933
6934 #ifdef CLOCK_MONOTONIC_RAW
6935    end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
6936 #else
6937    end = radv_clock_gettime(CLOCK_MONOTONIC);
6938 #endif
6939
6940    /*
6941     * The maximum deviation is the sum of the interval over which we
6942     * perform the sampling and the maximum period of any sampled
6943     * clock. That's because the maximum skew between any two sampled
6944     * clock edges is when the sampled clock with the largest period is
6945     * sampled at the end of that period but right at the beginning of the
6946     * sampling interval and some other clock is sampled right at the
6947     * begining of its sampling period and right at the end of the
6948     * sampling interval. Let's assume the GPU has the longest clock
6949     * period and that the application is sampling GPU and monotonic:
6950     *
6951     *                               s                 e
6952     *                    w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
6953     *   Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
6954     *
6955     *                               g
6956     *             0         1         2         3
6957     *   GPU       -----_____-----_____-----_____-----_____
6958     *
6959     *                                                m
6960     *                                       x y z 0 1 2 3 4 5 6 7 8 9 a b c
6961     *   Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
6962     *
6963     *   Interval                     <----------------->
6964     *   Deviation           <-------------------------->
6965     *
6966     *           s  = read(raw)       2
6967     *           g  = read(GPU)       1
6968     *           m  = read(monotonic) 2
6969     *           e  = read(raw)       b
6970     *
6971     * We round the sample interval up by one tick to cover sampling error
6972     * in the interval clock
6973     */
6974
6975    uint64_t sample_interval = end - begin + 1;
6976
6977    *pMaxDeviation = sample_interval + max_clock_period;
6978
6979    return VK_SUCCESS;
6980 }
6981 #endif
6982
6983 VKAPI_ATTR void VKAPI_CALL
6984 radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,
6985                                                VkSampleCountFlagBits samples,
6986                                                VkMultisamplePropertiesEXT *pMultisampleProperties)
6987 {
6988    VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT |
6989                                              VK_SAMPLE_COUNT_8_BIT;
6990
6991    if (samples & supported_samples) {
6992       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
6993    } else {
6994       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
6995    }
6996 }
6997
6998 VKAPI_ATTR VkResult VKAPI_CALL
6999 radv_GetPhysicalDeviceFragmentShadingRatesKHR(
7000    VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount,
7001    VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
7002 {
7003    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates,
7004                           pFragmentShadingRateCount);
7005
7006 #define append_rate(w, h, s)                                                                       \
7007    {                                                                                               \
7008       VkPhysicalDeviceFragmentShadingRateKHR rate = {                                              \
7009          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR,          \
7010          .sampleCounts = s,                                                                        \
7011          .fragmentSize = {.width = w, .height = h},                                                \
7012       };                                                                                           \
7013       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate;         \
7014    }
7015
7016    for (uint32_t x = 2; x >= 1; x--) {
7017       for (uint32_t y = 2; y >= 1; y--) {
7018          VkSampleCountFlagBits samples;
7019
7020          if (x == 1 && y == 1) {
7021             samples = ~0;
7022          } else {
7023             samples = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT |
7024                       VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
7025          }
7026
7027          append_rate(x, y, samples);
7028       }
7029    }
7030 #undef append_rate
7031
7032    return vk_outarray_status(&out);
7033 }
7034
7035 static bool
7036 radv_thread_trace_set_pstate(struct radv_device *device, bool enable)
7037 {
7038    struct radeon_winsys *ws = device->ws;
7039    enum radeon_ctx_pstate pstate = enable ? RADEON_CTX_PSTATE_PEAK : RADEON_CTX_PSTATE_NONE;
7040
7041    if (device->physical_device->rad_info.has_stable_pstate) {
7042       for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
7043          for (unsigned q = 0; q < device->queue_count[i]; q++) {
7044             struct radv_queue *queue = &device->queues[i][q];
7045
7046             if (ws->ctx_set_pstate(queue->hw_ctx, pstate) < 0)
7047                return false;
7048          }
7049       }
7050    }
7051
7052    return true;
7053 }
7054
7055 bool
7056 radv_device_acquire_performance_counters(struct radv_device *device)
7057 {
7058    bool result = true;
7059    simple_mtx_lock(&device->pstate_mtx);
7060
7061    if (device->pstate_cnt == 0) {
7062       result = radv_thread_trace_set_pstate(device, true);
7063       if (result)
7064          ++device->pstate_cnt;
7065    }
7066
7067    simple_mtx_unlock(&device->pstate_mtx);
7068    return result;
7069 }
7070
7071 void
7072 radv_device_release_performance_counters(struct radv_device *device)
7073 {
7074    simple_mtx_lock(&device->pstate_mtx);
7075
7076    if (--device->pstate_cnt == 0)
7077       radv_thread_trace_set_pstate(device, false);
7078
7079    simple_mtx_unlock(&device->pstate_mtx);
7080 }
7081
7082 VKAPI_ATTR VkResult VKAPI_CALL
7083 radv_AcquireProfilingLockKHR(VkDevice _device, const VkAcquireProfilingLockInfoKHR *pInfo)
7084 {
7085    RADV_FROM_HANDLE(radv_device, device, _device);
7086    bool result = radv_device_acquire_performance_counters(device);
7087    return result ? VK_SUCCESS : VK_ERROR_UNKNOWN;
7088 }
7089
7090 VKAPI_ATTR void VKAPI_CALL
7091 radv_ReleaseProfilingLockKHR(VkDevice _device)
7092 {
7093    RADV_FROM_HANDLE(radv_device, device, _device);
7094    radv_device_release_performance_counters(device);
7095 }