39fd8c593764dbb8c2e7bfcd6facb9a57e0eaf0d
[platform/upstream/mesa.git] / src / intel / vulkan / anv_device.c
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23
24 #include <assert.h>
25 #include <inttypes.h>
26 #include <stdbool.h>
27 #include <string.h>
28 #ifdef MAJOR_IN_MKDEV
29 #include <sys/mkdev.h>
30 #endif
31 #ifdef MAJOR_IN_SYSMACROS
32 #include <sys/sysmacros.h>
33 #endif
34 #include <sys/mman.h>
35 #include <sys/stat.h>
36 #include <unistd.h>
37 #include <fcntl.h>
38 #include "drm-uapi/drm_fourcc.h"
39 #include "drm-uapi/drm.h"
40 #include <xf86drm.h>
41
42 #include "anv_private.h"
43 #include "anv_measure.h"
44 #include "util/u_debug.h"
45 #include "util/build_id.h"
46 #include "util/disk_cache.h"
47 #include "util/mesa-sha1.h"
48 #include "util/os_file.h"
49 #include "util/os_misc.h"
50 #include "util/u_atomic.h"
51 #include "util/u_string.h"
52 #include "util/driconf.h"
53 #include "git_sha1.h"
54 #include "vk_util.h"
55 #include "vk_deferred_operation.h"
56 #include "vk_drm_syncobj.h"
57 #include "common/intel_aux_map.h"
58 #include "common/intel_defines.h"
59 #include "common/intel_uuid.h"
60 #include "perf/intel_perf.h"
61
62 #include "genxml/gen7_pack.h"
63 #include "genxml/genX_bits.h"
64
65 static const driOptionDescription anv_dri_options[] = {
66    DRI_CONF_SECTION_PERFORMANCE
67       DRI_CONF_ADAPTIVE_SYNC(true)
68       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
69       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
70       DRI_CONF_VK_KHR_PRESENT_WAIT(false)
71       DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
72       DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false)
73       DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
74       DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false)
75    DRI_CONF_SECTION_END
76
77    DRI_CONF_SECTION_DEBUG
78       DRI_CONF_ALWAYS_FLUSH_CACHE(false)
79       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
80       DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
81    DRI_CONF_SECTION_END
82
83    DRI_CONF_SECTION_QUALITY
84       DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE()
85    DRI_CONF_SECTION_END
86 };
87
88 /* This is probably far to big but it reflects the max size used for messages
89  * in OpenGLs KHR_debug.
90  */
91 #define MAX_DEBUG_MESSAGE_LENGTH    4096
92
93 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
94 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
95 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
96 #endif
97
98 static void
99 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
100 {
101    char str[MAX_DEBUG_MESSAGE_LENGTH];
102    struct anv_device *device = (struct anv_device *)data;
103    UNUSED struct anv_instance *instance = device->physical->instance;
104
105    va_list args;
106    va_start(args, fmt);
107    (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
108    va_end(args);
109
110    //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
111 }
112
113 static void
114 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
115 {
116    va_list args;
117    va_start(args, fmt);
118
119    if (INTEL_DEBUG(DEBUG_PERF))
120       mesa_logd_v(fmt, args);
121
122    va_end(args);
123 }
124
125 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
126     defined(VK_USE_PLATFORM_XCB_KHR) || \
127     defined(VK_USE_PLATFORM_XLIB_KHR) || \
128     defined(VK_USE_PLATFORM_DISPLAY_KHR)
129 #define ANV_USE_WSI_PLATFORM
130 #endif
131
132 #ifdef ANDROID
133 #define ANV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
134 #else
135 #define ANV_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
136 #endif
137
138 VkResult anv_EnumerateInstanceVersion(
139     uint32_t*                                   pApiVersion)
140 {
141     *pApiVersion = ANV_API_VERSION;
142     return VK_SUCCESS;
143 }
144
145 static const struct vk_instance_extension_table instance_extensions = {
146    .KHR_device_group_creation                = true,
147    .KHR_external_fence_capabilities          = true,
148    .KHR_external_memory_capabilities         = true,
149    .KHR_external_semaphore_capabilities      = true,
150    .KHR_get_physical_device_properties2      = true,
151    .EXT_debug_report                         = true,
152    .EXT_debug_utils                          = true,
153
154 #ifdef ANV_USE_WSI_PLATFORM
155    .KHR_get_surface_capabilities2            = true,
156    .KHR_surface                              = true,
157    .KHR_surface_protected_capabilities       = true,
158    .EXT_swapchain_colorspace                 = true,
159 #endif
160 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
161    .KHR_wayland_surface                      = true,
162 #endif
163 #ifdef VK_USE_PLATFORM_XCB_KHR
164    .KHR_xcb_surface                          = true,
165 #endif
166 #ifdef VK_USE_PLATFORM_XLIB_KHR
167    .KHR_xlib_surface                         = true,
168 #endif
169 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
170    .EXT_acquire_xlib_display                 = true,
171 #endif
172 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
173    .KHR_display                              = true,
174    .KHR_get_display_properties2              = true,
175    .EXT_direct_mode_display                  = true,
176    .EXT_display_surface_counter              = true,
177    .EXT_acquire_drm_display                  = true,
178 #endif
179 };
180
181 static void
182 get_device_extensions(const struct anv_physical_device *device,
183                       struct vk_device_extension_table *ext)
184 {
185    const bool has_syncobj_wait =
186       (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
187
188    const bool nv_mesh_shading_enabled =
189       debug_get_bool_option("ANV_EXPERIMENTAL_NV_MESH_SHADER", false);
190
191    *ext = (struct vk_device_extension_table) {
192       .KHR_8bit_storage                      = true,
193       .KHR_16bit_storage                     = true,
194       .KHR_acceleration_structure            = device->info.has_ray_tracing,
195       .KHR_acceleration_structure            = ANV_SUPPORT_RT &&
196                                                device->info.has_ray_tracing,
197       .KHR_bind_memory2                      = true,
198       .KHR_buffer_device_address             = true,
199       .KHR_copy_commands2                    = true,
200       .KHR_create_renderpass2                = true,
201       .KHR_dedicated_allocation              = true,
202       .KHR_deferred_host_operations          = true,
203       .KHR_depth_stencil_resolve             = true,
204       .KHR_descriptor_update_template        = true,
205       .KHR_device_group                      = true,
206       .KHR_draw_indirect_count               = true,
207       .KHR_driver_properties                 = true,
208       .KHR_dynamic_rendering                 = true,
209       .KHR_external_fence                    = has_syncobj_wait,
210       .KHR_external_fence_fd                 = has_syncobj_wait,
211       .KHR_external_memory                   = true,
212       .KHR_external_memory_fd                = true,
213       .KHR_external_semaphore                = true,
214       .KHR_external_semaphore_fd             = true,
215       .KHR_format_feature_flags2             = true,
216       .KHR_fragment_shading_rate             = device->info.ver >= 11,
217       .KHR_get_memory_requirements2          = true,
218       .KHR_image_format_list                 = true,
219       .KHR_imageless_framebuffer             = true,
220 #ifdef ANV_USE_WSI_PLATFORM
221       .KHR_incremental_present               = true,
222 #endif
223       .KHR_maintenance1                      = true,
224       .KHR_maintenance2                      = true,
225       .KHR_maintenance3                      = true,
226       .KHR_maintenance4                      = true,
227       .KHR_multiview                         = true,
228       .KHR_performance_query =
229          device->perf &&
230          (device->perf->i915_perf_version >= 3 ||
231           INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
232          device->use_call_secondary,
233       .KHR_pipeline_executable_properties    = true,
234       .KHR_pipeline_library                  = true,
235       /* Hide these behind dri configs for now since we cannot implement it reliably on
236        * all surfaces yet. There is no surface capability query for present wait/id,
237        * but the feature is useful enough to hide behind an opt-in mechanism for now.
238        * If the instance only enables surface extensions that unconditionally support present wait,
239        * we can also expose the extension that way. */
240       .KHR_present_id =
241          driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
242          wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
243       .KHR_present_wait =
244          driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
245          wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
246       .KHR_push_descriptor                   = true,
247       .KHR_ray_query                         =
248          ANV_SUPPORT_RT && device->info.has_ray_tracing,
249       .KHR_ray_tracing_pipeline              =
250          ANV_SUPPORT_RT && device->info.has_ray_tracing,
251       .KHR_relaxed_block_layout              = true,
252       .KHR_sampler_mirror_clamp_to_edge      = true,
253       .KHR_sampler_ycbcr_conversion          = true,
254       .KHR_separate_depth_stencil_layouts    = true,
255       .KHR_shader_atomic_int64               = true,
256       .KHR_shader_clock                      = true,
257       .KHR_shader_draw_parameters            = true,
258       .KHR_shader_float16_int8               = true,
259       .KHR_shader_float_controls             = true,
260       .KHR_shader_integer_dot_product        = true,
261       .KHR_shader_non_semantic_info          = true,
262       .KHR_shader_subgroup_extended_types    = true,
263       .KHR_shader_subgroup_uniform_control_flow = true,
264       .KHR_shader_terminate_invocation       = true,
265       .KHR_spirv_1_4                         = true,
266       .KHR_storage_buffer_storage_class      = true,
267 #ifdef ANV_USE_WSI_PLATFORM
268       .KHR_swapchain                         = true,
269       .KHR_swapchain_mutable_format          = true,
270 #endif
271       .KHR_synchronization2                  = true,
272       .KHR_timeline_semaphore                = true,
273       .KHR_uniform_buffer_standard_layout    = true,
274       .KHR_variable_pointers                 = true,
275       .KHR_vulkan_memory_model               = true,
276       .KHR_workgroup_memory_explicit_layout  = true,
277       .KHR_zero_initialize_workgroup_memory  = true,
278       .EXT_4444_formats                      = true,
279       .EXT_border_color_swizzle              = true,
280       .EXT_buffer_device_address             = true,
281       .EXT_calibrated_timestamps             = device->has_reg_timestamp,
282       .EXT_color_write_enable                = true,
283       .EXT_conditional_rendering             = true,
284       .EXT_conservative_rasterization        = true,
285       .EXT_custom_border_color               = true,
286       .EXT_depth_clamp_zero_one              = true,
287       .EXT_depth_clip_control                = true,
288       .EXT_depth_clip_enable                 = true,
289       .EXT_descriptor_indexing               = true,
290 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
291       .EXT_display_control                   = true,
292 #endif
293       .EXT_extended_dynamic_state            = true,
294       .EXT_extended_dynamic_state2           = true,
295       .EXT_extended_dynamic_state3           = true,
296       .EXT_external_memory_dma_buf           = true,
297       .EXT_external_memory_host              = true,
298       .EXT_fragment_shader_interlock         = true,
299       .EXT_global_priority                   = device->max_context_priority >=
300                                                VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
301       .EXT_global_priority_query             = device->max_context_priority >=
302                                                VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
303       .EXT_host_query_reset                  = true,
304       .EXT_image_2d_view_of_3d               = true,
305       .EXT_image_robustness                  = true,
306       .EXT_image_drm_format_modifier         = true,
307       .EXT_image_view_min_lod                = true,
308       .EXT_index_type_uint8                  = true,
309       .EXT_inline_uniform_block              = true,
310       .EXT_line_rasterization                = true,
311       /* Enable the extension only if we have support on both the local &
312        * system memory
313        */
314       .EXT_memory_budget                     = (!device->info.has_local_mem ||
315                                                 device->vram_mappable.available > 0) &&
316                                                device->sys.available,
317       .EXT_mesh_shader                       = device->info.has_mesh_shading,
318       .EXT_mutable_descriptor_type           = true,
319       .EXT_non_seamless_cube_map             = true,
320       .EXT_pci_bus_info                      = true,
321       .EXT_physical_device_drm               = true,
322       .EXT_pipeline_creation_cache_control   = true,
323       .EXT_pipeline_creation_feedback        = true,
324       .EXT_post_depth_coverage               = true,
325       .EXT_primitives_generated_query        = true,
326       .EXT_primitive_topology_list_restart   = true,
327       .EXT_private_data                      = true,
328       .EXT_provoking_vertex                  = true,
329       .EXT_queue_family_foreign              = true,
330       .EXT_robustness2                       = true,
331       .EXT_sample_locations                  = true,
332       .EXT_sampler_filter_minmax             = true,
333       .EXT_scalar_block_layout               = true,
334       .EXT_separate_stencil_usage            = true,
335       .EXT_shader_atomic_float               = true,
336       .EXT_shader_atomic_float2              = true,
337       .EXT_shader_demote_to_helper_invocation = true,
338       .EXT_shader_module_identifier          = true,
339       .EXT_shader_stencil_export             = true,
340       .EXT_shader_subgroup_ballot            = true,
341       .EXT_shader_subgroup_vote              = true,
342       .EXT_shader_viewport_index_layer       = true,
343       .EXT_subgroup_size_control             = true,
344       .EXT_texel_buffer_alignment            = true,
345       .EXT_tooling_info                      = true,
346       .EXT_transform_feedback                = true,
347       .EXT_vertex_attribute_divisor          = true,
348       .EXT_ycbcr_image_arrays                = true,
349 #ifdef ANDROID
350       .ANDROID_external_memory_android_hardware_buffer = true,
351       .ANDROID_native_buffer                 = true,
352 #endif
353       .GOOGLE_decorate_string                = true,
354       .GOOGLE_hlsl_functionality1            = true,
355       .GOOGLE_user_type                      = true,
356       .INTEL_performance_query               = device->perf &&
357                                                device->perf->i915_perf_version >= 3,
358       .INTEL_shader_integer_functions2       = true,
359       .EXT_multi_draw                        = true,
360       .NV_compute_shader_derivatives         = true,
361       .NV_mesh_shader                        = device->info.has_mesh_shading &&
362                                                nv_mesh_shading_enabled,
363       .VALVE_mutable_descriptor_type         = true,
364    };
365 }
366
367 static uint64_t
368 anv_compute_sys_heap_size(struct anv_physical_device *device,
369                           uint64_t total_ram)
370 {
371    /* We don't want to burn too much ram with the GPU.  If the user has 4GiB
372     * or less, we use at most half.  If they have more than 4GiB, we use 3/4.
373     */
374    uint64_t available_ram;
375    if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
376       available_ram = total_ram / 2;
377    else
378       available_ram = total_ram * 3 / 4;
379
380    /* We also want to leave some padding for things we allocate in the driver,
381     * so don't go over 3/4 of the GTT either.
382     */
383    available_ram = MIN2(available_ram, device->gtt_size * 3 / 4);
384
385    if (available_ram > (2ull << 30) && !device->supports_48bit_addresses) {
386       /* When running with an overridden PCI ID, we may get a GTT size from
387        * the kernel that is greater than 2 GiB but the execbuf check for 48bit
388        * address support can still fail.  Just clamp the address space size to
389        * 2 GiB if we don't have 48-bit support.
390        */
391       mesa_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but "
392                 "not support for 48-bit addresses",
393                 __FILE__, __LINE__);
394       available_ram = 2ull << 30;
395    }
396
397    return available_ram;
398 }
399
400 static VkResult MUST_CHECK
401 anv_init_meminfo(struct anv_physical_device *device, int fd)
402 {
403    const struct intel_device_info *devinfo = &device->info;
404
405    device->sys.region.memory_class = devinfo->mem.sram.mem_class;
406    device->sys.region.memory_instance = devinfo->mem.sram.mem_instance;
407    device->sys.size =
408       anv_compute_sys_heap_size(device, devinfo->mem.sram.mappable.size);
409    device->sys.available = devinfo->mem.sram.mappable.free;
410
411    device->vram_mappable.region.memory_class = devinfo->mem.vram.mem_class;
412    device->vram_mappable.region.memory_instance =
413       devinfo->mem.vram.mem_instance;
414    device->vram_mappable.size = devinfo->mem.vram.mappable.size;
415    device->vram_mappable.available = devinfo->mem.vram.mappable.free;
416
417    device->vram_non_mappable.region.memory_class =
418       devinfo->mem.vram.mem_class;
419    device->vram_non_mappable.region.memory_instance =
420       devinfo->mem.vram.mem_instance;
421    device->vram_non_mappable.size = devinfo->mem.vram.unmappable.size;
422    device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
423
424    return VK_SUCCESS;
425 }
426
427 static void
428 anv_update_meminfo(struct anv_physical_device *device, int fd)
429 {
430    if (!intel_device_info_update_memory_info(&device->info, fd))
431       return;
432
433    const struct intel_device_info *devinfo = &device->info;
434    device->sys.available = devinfo->mem.sram.mappable.free;
435    device->vram_mappable.available = devinfo->mem.vram.mappable.free;
436    device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
437 }
438
439
440 static VkResult
441 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
442 {
443    VkResult result = anv_init_meminfo(device, fd);
444    if (result != VK_SUCCESS)
445       return result;
446
447    assert(device->sys.size != 0);
448
449    if (anv_physical_device_has_vram(device)) {
450       /* We can create 2 or 3 different heaps when we have local memory
451        * support, first heap with local memory size and second with system
452        * memory size and the third is added only if part of the vram is
453        * mappable to the host.
454        */
455       device->memory.heap_count = 2;
456       device->memory.heaps[0] = (struct anv_memory_heap) {
457          /* If there is a vram_non_mappable, use that for the device only
458           * heap. Otherwise use the vram_mappable.
459           */
460          .size = device->vram_non_mappable.size != 0 ?
461                  device->vram_non_mappable.size : device->vram_mappable.size,
462          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
463          .is_local_mem = true,
464       };
465       device->memory.heaps[1] = (struct anv_memory_heap) {
466          .size = device->sys.size,
467          .flags = 0,
468          .is_local_mem = false,
469       };
470       /* Add an additional smaller vram mappable heap if we can't map all the
471        * vram to the host.
472        */
473       if (device->vram_non_mappable.size > 0) {
474          device->memory.heap_count++;
475          device->memory.heaps[2] = (struct anv_memory_heap) {
476             .size = device->vram_mappable.size,
477             .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
478             .is_local_mem = true,
479          };
480       }
481
482       device->memory.type_count = 3;
483       device->memory.types[0] = (struct anv_memory_type) {
484          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
485          .heapIndex = 0,
486       };
487       device->memory.types[1] = (struct anv_memory_type) {
488          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
489                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
490                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
491          .heapIndex = 1,
492       };
493       device->memory.types[2] = (struct anv_memory_type) {
494          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
495                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
496                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
497          /* This memory type either comes from heaps[0] if there is only
498           * mappable vram region, or from heaps[2] if there is both mappable &
499           * non-mappable vram regions.
500           */
501          .heapIndex = device->vram_non_mappable.size > 0 ? 2 : 0,
502       };
503    } else if (device->info.has_llc) {
504       device->memory.heap_count = 1;
505       device->memory.heaps[0] = (struct anv_memory_heap) {
506          .size = device->sys.size,
507          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
508          .is_local_mem = false,
509       };
510
511       /* Big core GPUs share LLC with the CPU and thus one memory type can be
512        * both cached and coherent at the same time.
513        */
514       device->memory.type_count = 1;
515       device->memory.types[0] = (struct anv_memory_type) {
516          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
517                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
518                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
519                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
520          .heapIndex = 0,
521       };
522    } else {
523       device->memory.heap_count = 1;
524       device->memory.heaps[0] = (struct anv_memory_heap) {
525          .size = device->sys.size,
526          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
527          .is_local_mem = false,
528       };
529
530       /* The spec requires that we expose a host-visible, coherent memory
531        * type, but Atom GPUs don't share LLC. Thus we offer two memory types
532        * to give the application a choice between cached, but not coherent and
533        * coherent but uncached (WC though).
534        */
535       device->memory.type_count = 2;
536       device->memory.types[0] = (struct anv_memory_type) {
537          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
538                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
539                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
540          .heapIndex = 0,
541       };
542       device->memory.types[1] = (struct anv_memory_type) {
543          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
544                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
545                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
546          .heapIndex = 0,
547       };
548    }
549
550    for (unsigned i = 0; i < device->memory.type_count; i++) {
551       VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
552       if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
553           !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
554 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
555          device->memory.need_clflush = true;
556 #else
557          return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
558                           "Memory configuration requires flushing, but it's not implemented for this architecture");
559 #endif
560    }
561
562    return VK_SUCCESS;
563 }
564
565 static VkResult
566 anv_physical_device_init_uuids(struct anv_physical_device *device)
567 {
568    const struct build_id_note *note =
569       build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
570    if (!note) {
571       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
572                        "Failed to find build-id");
573    }
574
575    unsigned build_id_len = build_id_length(note);
576    if (build_id_len < 20) {
577       return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
578                        "build-id too short.  It needs to be a SHA");
579    }
580
581    memcpy(device->driver_build_sha1, build_id_data(note), 20);
582
583    struct mesa_sha1 sha1_ctx;
584    uint8_t sha1[20];
585    STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
586
587    /* The pipeline cache UUID is used for determining when a pipeline cache is
588     * invalid.  It needs both a driver build and the PCI ID of the device.
589     */
590    _mesa_sha1_init(&sha1_ctx);
591    _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
592    _mesa_sha1_update(&sha1_ctx, &device->info.pci_device_id,
593                      sizeof(device->info.pci_device_id));
594    _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
595                      sizeof(device->always_use_bindless));
596    _mesa_sha1_final(&sha1_ctx, sha1);
597    memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
598
599    intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
600    intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
601
602    return VK_SUCCESS;
603 }
604
605 static void
606 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
607 {
608 #ifdef ENABLE_SHADER_CACHE
609    char renderer[10];
610    ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
611                                device->info.pci_device_id);
612    assert(len == sizeof(renderer) - 2);
613
614    char timestamp[41];
615    _mesa_sha1_format(timestamp, device->driver_build_sha1);
616
617    const uint64_t driver_flags =
618       brw_get_compiler_config_value(device->compiler);
619    device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
620 #endif
621 }
622
623 static void
624 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
625 {
626 #ifdef ENABLE_SHADER_CACHE
627    if (device->vk.disk_cache) {
628       disk_cache_destroy(device->vk.disk_cache);
629       device->vk.disk_cache = NULL;
630    }
631 #else
632    assert(device->vk.disk_cache == NULL);
633 #endif
634 }
635
636 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
637  * queue overrides.
638  *
639  * To override the number queues:
640  *  * "gc" is for graphics queues with compute support
641  *  * "g" is for graphics queues with no compute support
642  *  * "c" is for compute queues with no graphics support
643  *
644  * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
645  * advertised queues to be 2 queues with graphics+compute support, and 1 queue
646  * with compute-only support.
647  *
648  * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
649  * include 1 queue with compute-only support, but it will not change the
650  * number of graphics+compute queues.
651  *
652  * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
653  * to include 1 queue with compute-only support, and it would override the
654  * number of graphics+compute queues to be 0.
655  */
656 static void
657 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count)
658 {
659    int gc_override = -1;
660    int g_override = -1;
661    int c_override = -1;
662    char *env = getenv("ANV_QUEUE_OVERRIDE");
663
664    if (env == NULL)
665       return;
666
667    env = strdup(env);
668    char *save = NULL;
669    char *next = strtok_r(env, ",", &save);
670    while (next != NULL) {
671       if (strncmp(next, "gc=", 3) == 0) {
672          gc_override = strtol(next + 3, NULL, 0);
673       } else if (strncmp(next, "g=", 2) == 0) {
674          g_override = strtol(next + 2, NULL, 0);
675       } else if (strncmp(next, "c=", 2) == 0) {
676          c_override = strtol(next + 2, NULL, 0);
677       } else {
678          mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
679       }
680       next = strtok_r(NULL, ",", &save);
681    }
682    free(env);
683    if (gc_override >= 0)
684       *gc_count = gc_override;
685    if (g_override >= 0)
686       *g_count = g_override;
687    if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
688       mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
689                 "Vulkan specification");
690    if (c_override >= 0)
691       *c_count = c_override;
692 }
693
694 static void
695 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
696 {
697    uint32_t family_count = 0;
698
699    if (pdevice->engine_info) {
700       int gc_count =
701          intel_engines_count(pdevice->engine_info,
702                              INTEL_ENGINE_CLASS_RENDER);
703       int g_count = 0;
704       int c_count = 0;
705       if (debug_get_bool_option("INTEL_COMPUTE_CLASS", false))
706          c_count = intel_engines_count(pdevice->engine_info,
707                                        INTEL_ENGINE_CLASS_COMPUTE);
708       enum intel_engine_class compute_class =
709          c_count < 1 ? INTEL_ENGINE_CLASS_RENDER : INTEL_ENGINE_CLASS_COMPUTE;
710
711       anv_override_engine_counts(&gc_count, &g_count, &c_count);
712
713       if (gc_count > 0) {
714          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
715             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
716                           VK_QUEUE_COMPUTE_BIT |
717                           VK_QUEUE_TRANSFER_BIT,
718             .queueCount = gc_count,
719             .engine_class = INTEL_ENGINE_CLASS_RENDER,
720          };
721       }
722       if (g_count > 0) {
723          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
724             .queueFlags = VK_QUEUE_GRAPHICS_BIT |
725                           VK_QUEUE_TRANSFER_BIT,
726             .queueCount = g_count,
727             .engine_class = INTEL_ENGINE_CLASS_RENDER,
728          };
729       }
730       if (c_count > 0) {
731          pdevice->queue.families[family_count++] = (struct anv_queue_family) {
732             .queueFlags = VK_QUEUE_COMPUTE_BIT |
733                           VK_QUEUE_TRANSFER_BIT,
734             .queueCount = c_count,
735             .engine_class = compute_class,
736          };
737       }
738       /* Increase count below when other families are added as a reminder to
739        * increase the ANV_MAX_QUEUE_FAMILIES value.
740        */
741       STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 3);
742    } else {
743       /* Default to a single render queue */
744       pdevice->queue.families[family_count++] = (struct anv_queue_family) {
745          .queueFlags = VK_QUEUE_GRAPHICS_BIT |
746                        VK_QUEUE_COMPUTE_BIT |
747                        VK_QUEUE_TRANSFER_BIT,
748          .queueCount = 1,
749          .engine_class = INTEL_ENGINE_CLASS_RENDER,
750       };
751       family_count = 1;
752    }
753    assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
754    pdevice->queue.family_count = family_count;
755 }
756
757 static VkResult
758 anv_i915_physical_device_get_parameters(struct anv_physical_device *device)
759 {
760    VkResult result = VK_SUCCESS;
761    int val, fd = device->local_fd;
762
763    if (!intel_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT, &val) || !val) {
764        result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
765                           "kernel missing gem wait");
766        return result;
767    }
768
769    if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2, &val) || !val) {
770       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
771                          "kernel missing execbuf2");
772       return result;
773    }
774
775    if (!device->info.has_llc &&
776        (!intel_gem_get_param(fd, I915_PARAM_MMAP_VERSION, &val) || val < 1)) {
777        result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
778                           "kernel missing wc mmap");
779        return result;
780    }
781
782    if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &val) || !val) {
783       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
784                          "kernel missing softpin");
785       return result;
786    }
787
788    if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY, &val) || !val) {
789       result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
790                          "kernel missing syncobj support");
791       return result;
792    }
793
794    if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC, &val))
795       device->has_exec_async = val;
796    if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE, &val))
797       device->has_exec_capture = val;
798
799    /* Start with medium; sorted low to high */
800    const VkQueueGlobalPriorityKHR priorities[] = {
801          VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
802          VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
803          VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
804          VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
805    };
806    device->max_context_priority = VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR;
807    for (unsigned i = 0; i < ARRAY_SIZE(priorities); i++) {
808       if (!anv_gem_has_context_priority(fd, priorities[i]))
809          break;
810       device->max_context_priority = priorities[i];
811    }
812
813    if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES, &val))
814       device->has_exec_timeline = val;
815
816    return result;
817 }
818
819 static VkResult
820 anv_physical_device_get_parameters(struct anv_physical_device *device)
821 {
822    return anv_i915_physical_device_get_parameters(device);
823 }
824
825 static VkResult
826 anv_physical_device_try_create(struct vk_instance *vk_instance,
827                                struct _drmDevice *drm_device,
828                                struct vk_physical_device **out)
829 {
830    struct anv_instance *instance =
831       container_of(vk_instance, struct anv_instance, vk);
832
833    if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
834        drm_device->bustype != DRM_BUS_PCI ||
835        drm_device->deviceinfo.pci->vendor_id != 0x8086)
836       return VK_ERROR_INCOMPATIBLE_DRIVER;
837
838    const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
839    const char *path = drm_device->nodes[DRM_NODE_RENDER];
840    VkResult result;
841    int fd;
842    int master_fd = -1;
843
844    brw_process_intel_debug_variable();
845
846    fd = open(path, O_RDWR | O_CLOEXEC);
847    if (fd < 0) {
848       if (errno == ENOMEM) {
849          return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
850                           "Unable to open device %s: out of memory", path);
851       }
852       return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
853                        "Unable to open device %s: %m", path);
854    }
855
856    struct intel_device_info devinfo;
857    if (!intel_get_device_info_from_fd(fd, &devinfo)) {
858       result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
859       goto fail_fd;
860    }
861
862    if (devinfo.ver > 12) {
863       result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
864                          "Vulkan not yet supported on %s", devinfo.name);
865       goto fail_fd;
866    } else if (devinfo.ver < 9) {
867       /* Silently fail here, hasvk should pick up this device. */
868       result = VK_ERROR_INCOMPATIBLE_DRIVER;
869       goto fail_fd;
870    }
871
872    struct anv_physical_device *device =
873       vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
874                 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
875    if (device == NULL) {
876       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
877       goto fail_fd;
878    }
879
880    struct vk_physical_device_dispatch_table dispatch_table;
881    vk_physical_device_dispatch_table_from_entrypoints(
882       &dispatch_table, &anv_physical_device_entrypoints, true);
883    vk_physical_device_dispatch_table_from_entrypoints(
884       &dispatch_table, &wsi_physical_device_entrypoints, false);
885
886    result = vk_physical_device_init(&device->vk, &instance->vk,
887                                     NULL, /* We set up extensions later */
888                                     &dispatch_table);
889    if (result != VK_SUCCESS) {
890       vk_error(instance, result);
891       goto fail_alloc;
892    }
893    device->instance = instance;
894
895    assert(strlen(path) < ARRAY_SIZE(device->path));
896    snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
897
898    device->info = devinfo;
899
900    device->local_fd = fd;
901    result = anv_physical_device_get_parameters(device);
902    if (result != VK_SUCCESS)
903       goto fail_base;
904
905    device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
906                                               device->info.aperture_bytes;
907
908    /* We only allow 48-bit addresses with softpin because knowing the actual
909     * address is required for the vertex cache flush workaround.
910     */
911    device->supports_48bit_addresses =
912       device->gtt_size > (4ULL << 30 /* GiB */);
913
914    /* We currently only have the right bits for instructions in Gen12+. If the
915     * kernel ever starts supporting that feature on previous generations,
916     * we'll need to edit genxml prior to enabling here.
917     */
918    device->has_protected_contexts = device->info.ver >= 12 &&
919       intel_gem_supports_protected_context(fd);
920
921    result = anv_physical_device_init_heaps(device, fd);
922    if (result != VK_SUCCESS)
923       goto fail_base;
924
925    if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
926       device->has_exec_timeline = false;
927
928    unsigned st_idx = 0;
929
930    device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
931    if (!device->has_exec_timeline)
932       device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
933    device->sync_types[st_idx++] = &device->sync_syncobj_type;
934
935    if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
936       device->sync_types[st_idx++] = &anv_bo_sync_type;
937
938    if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
939       device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
940       device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
941    }
942
943    device->sync_types[st_idx++] = NULL;
944    assert(st_idx <= ARRAY_SIZE(device->sync_types));
945    device->vk.supported_sync_types = device->sync_types;
946
947    device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
948
949    device->always_use_bindless =
950       debug_get_bool_option("ANV_ALWAYS_BINDLESS", false);
951
952    device->use_call_secondary =
953       !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
954
955    device->has_implicit_ccs = device->info.has_aux_map ||
956                               device->info.verx10 >= 125;
957
958    /* Check if we can read the GPU timestamp register from the CPU */
959    uint64_t u64_ignore;
960    device->has_reg_timestamp = intel_gem_read_render_timestamp(fd, &u64_ignore);
961
962    device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
963       driQueryOptionb(&instance->dri_options, "always_flush_cache");
964
965    device->compiler = brw_compiler_create(NULL, &device->info);
966    if (device->compiler == NULL) {
967       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
968       goto fail_base;
969    }
970    device->compiler->shader_debug_log = compiler_debug_log;
971    device->compiler->shader_perf_log = compiler_perf_log;
972    device->compiler->constant_buffer_0_is_relative =
973       !device->info.has_context_isolation;
974    device->compiler->supports_shader_constants = true;
975    device->compiler->indirect_ubos_use_sampler = device->info.ver < 12;
976
977    isl_device_init(&device->isl_dev, &device->info);
978
979    result = anv_physical_device_init_uuids(device);
980    if (result != VK_SUCCESS)
981       goto fail_compiler;
982
983    anv_physical_device_init_disk_cache(device);
984
985    if (instance->vk.enabled_extensions.KHR_display) {
986       master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
987       if (master_fd >= 0) {
988          /* fail if we don't have permission to even render on this device */
989          if (!intel_gem_can_render_on_fd(master_fd)) {
990             close(master_fd);
991             master_fd = -1;
992          }
993       }
994    }
995    device->master_fd = master_fd;
996
997    device->engine_info = intel_engine_get_info(fd);
998    anv_physical_device_init_queue_families(device);
999
1000    anv_physical_device_init_perf(device, fd);
1001
1002    get_device_extensions(device, &device->vk.supported_extensions);
1003
1004    /* Gather major/minor before WSI. */
1005    struct stat st;
1006
1007    if (stat(primary_path, &st) == 0) {
1008       device->has_master = true;
1009       device->master_major = major(st.st_rdev);
1010       device->master_minor = minor(st.st_rdev);
1011    } else {
1012       device->has_master = false;
1013       device->master_major = 0;
1014       device->master_minor = 0;
1015    }
1016
1017    if (stat(path, &st) == 0) {
1018       device->has_local = true;
1019       device->local_major = major(st.st_rdev);
1020       device->local_minor = minor(st.st_rdev);
1021    } else {
1022       device->has_local = false;
1023       device->local_major = 0;
1024       device->local_minor = 0;
1025    }
1026
1027    result = anv_init_wsi(device);
1028    if (result != VK_SUCCESS)
1029       goto fail_perf;
1030
1031    anv_measure_device_init(device);
1032
1033    anv_genX(&device->info, init_physical_device_state)(device);
1034
1035    *out = &device->vk;
1036
1037    return VK_SUCCESS;
1038
1039 fail_perf:
1040    ralloc_free(device->perf);
1041    free(device->engine_info);
1042    anv_physical_device_free_disk_cache(device);
1043 fail_compiler:
1044    ralloc_free(device->compiler);
1045 fail_base:
1046    vk_physical_device_finish(&device->vk);
1047 fail_alloc:
1048    vk_free(&instance->vk.alloc, device);
1049 fail_fd:
1050    close(fd);
1051    if (master_fd != -1)
1052       close(master_fd);
1053    return result;
1054 }
1055
1056 static void
1057 anv_physical_device_destroy(struct vk_physical_device *vk_device)
1058 {
1059    struct anv_physical_device *device =
1060       container_of(vk_device, struct anv_physical_device, vk);
1061
1062    anv_finish_wsi(device);
1063    anv_measure_device_destroy(device);
1064    free(device->engine_info);
1065    anv_physical_device_free_disk_cache(device);
1066    ralloc_free(device->compiler);
1067    ralloc_free(device->perf);
1068    close(device->local_fd);
1069    if (device->master_fd >= 0)
1070       close(device->master_fd);
1071    vk_physical_device_finish(&device->vk);
1072    vk_free(&device->instance->vk.alloc, device);
1073 }
1074
1075 VkResult anv_EnumerateInstanceExtensionProperties(
1076     const char*                                 pLayerName,
1077     uint32_t*                                   pPropertyCount,
1078     VkExtensionProperties*                      pProperties)
1079 {
1080    if (pLayerName)
1081       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1082
1083    return vk_enumerate_instance_extension_properties(
1084       &instance_extensions, pPropertyCount, pProperties);
1085 }
1086
1087 static void
1088 anv_init_dri_options(struct anv_instance *instance)
1089 {
1090    driParseOptionInfo(&instance->available_dri_options, anv_dri_options,
1091                       ARRAY_SIZE(anv_dri_options));
1092    driParseConfigFiles(&instance->dri_options,
1093                        &instance->available_dri_options, 0, "anv", NULL, NULL,
1094                        instance->vk.app_info.app_name,
1095                        instance->vk.app_info.app_version,
1096                        instance->vk.app_info.engine_name,
1097                        instance->vk.app_info.engine_version);
1098
1099     instance->assume_full_subgroups =
1100             driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups");
1101     instance->limit_trig_input_range =
1102             driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
1103     instance->sample_mask_out_opengl_behaviour =
1104             driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
1105     instance->lower_depth_range_rate =
1106             driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
1107     instance->fp64_workaround_enabled =
1108             driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled");
1109 }
1110
1111 VkResult anv_CreateInstance(
1112     const VkInstanceCreateInfo*                 pCreateInfo,
1113     const VkAllocationCallbacks*                pAllocator,
1114     VkInstance*                                 pInstance)
1115 {
1116    struct anv_instance *instance;
1117    VkResult result;
1118
1119    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1120
1121    if (pAllocator == NULL)
1122       pAllocator = vk_default_allocator();
1123
1124    instance = vk_alloc(pAllocator, sizeof(*instance), 8,
1125                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1126    if (!instance)
1127       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1128
1129    struct vk_instance_dispatch_table dispatch_table;
1130    vk_instance_dispatch_table_from_entrypoints(
1131       &dispatch_table, &anv_instance_entrypoints, true);
1132    vk_instance_dispatch_table_from_entrypoints(
1133       &dispatch_table, &wsi_instance_entrypoints, false);
1134
1135    result = vk_instance_init(&instance->vk, &instance_extensions,
1136                              &dispatch_table, pCreateInfo, pAllocator);
1137    if (result != VK_SUCCESS) {
1138       vk_free(pAllocator, instance);
1139       return vk_error(NULL, result);
1140    }
1141
1142    instance->vk.physical_devices.try_create_for_drm = anv_physical_device_try_create;
1143    instance->vk.physical_devices.destroy = anv_physical_device_destroy;
1144
1145    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1146
1147    anv_init_dri_options(instance);
1148
1149    intel_driver_ds_init();
1150
1151    *pInstance = anv_instance_to_handle(instance);
1152
1153    return VK_SUCCESS;
1154 }
1155
1156 void anv_DestroyInstance(
1157     VkInstance                                  _instance,
1158     const VkAllocationCallbacks*                pAllocator)
1159 {
1160    ANV_FROM_HANDLE(anv_instance, instance, _instance);
1161
1162    if (!instance)
1163       return;
1164
1165    VG(VALGRIND_DESTROY_MEMPOOL(instance));
1166
1167    driDestroyOptionCache(&instance->dri_options);
1168    driDestroyOptionInfo(&instance->available_dri_options);
1169
1170    vk_instance_finish(&instance->vk);
1171    vk_free(&instance->vk.alloc, instance);
1172 }
1173
1174 void anv_GetPhysicalDeviceFeatures(
1175     VkPhysicalDevice                            physicalDevice,
1176     VkPhysicalDeviceFeatures*                   pFeatures)
1177 {
1178    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1179
1180    /* Just pick one; they're all the same */
1181    const bool has_astc_ldr =
1182       isl_format_supports_sampling(&pdevice->info,
1183                                    ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
1184
1185    *pFeatures = (VkPhysicalDeviceFeatures) {
1186       .robustBufferAccess                       = true,
1187       .fullDrawIndexUint32                      = true,
1188       .imageCubeArray                           = true,
1189       .independentBlend                         = true,
1190       .geometryShader                           = true,
1191       .tessellationShader                       = true,
1192       .sampleRateShading                        = true,
1193       .dualSrcBlend                             = true,
1194       .logicOp                                  = true,
1195       .multiDrawIndirect                        = true,
1196       .drawIndirectFirstInstance                = true,
1197       .depthClamp                               = true,
1198       .depthBiasClamp                           = true,
1199       .fillModeNonSolid                         = true,
1200       .depthBounds                              = pdevice->info.ver >= 12,
1201       .wideLines                                = true,
1202       .largePoints                              = true,
1203       .alphaToOne                               = true,
1204       .multiViewport                            = true,
1205       .samplerAnisotropy                        = true,
1206       .textureCompressionETC2                   = true,
1207       .textureCompressionASTC_LDR               = has_astc_ldr,
1208       .textureCompressionBC                     = true,
1209       .occlusionQueryPrecise                    = true,
1210       .pipelineStatisticsQuery                  = true,
1211       .fragmentStoresAndAtomics                 = true,
1212       .shaderTessellationAndGeometryPointSize   = true,
1213       .shaderImageGatherExtended                = true,
1214       .shaderStorageImageExtendedFormats        = true,
1215       .shaderStorageImageMultisample            = false,
1216       .shaderStorageImageReadWithoutFormat      = false,
1217       .shaderStorageImageWriteWithoutFormat     = true,
1218       .shaderUniformBufferArrayDynamicIndexing  = true,
1219       .shaderSampledImageArrayDynamicIndexing   = true,
1220       .shaderStorageBufferArrayDynamicIndexing  = true,
1221       .shaderStorageImageArrayDynamicIndexing   = true,
1222       .shaderClipDistance                       = true,
1223       .shaderCullDistance                       = true,
1224       .shaderFloat64                            = pdevice->info.has_64bit_float,
1225       .shaderInt64                              = true,
1226       .shaderInt16                              = true,
1227       .shaderResourceMinLod                     = true,
1228       .variableMultisampleRate                  = true,
1229       .inheritedQueries                         = true,
1230    };
1231
1232    /* We can't do image stores in vec4 shaders */
1233    pFeatures->vertexPipelineStoresAndAtomics =
1234       pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
1235       pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
1236
1237    struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
1238
1239    /* The new DOOM and Wolfenstein games require depthBounds without
1240     * checking for it.  They seem to run fine without it so just claim it's
1241     * there and accept the consequences.
1242     */
1243    if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
1244       pFeatures->depthBounds = true;
1245 }
1246
1247 static void
1248 anv_get_physical_device_features_1_1(struct anv_physical_device *pdevice,
1249                                      VkPhysicalDeviceVulkan11Features *f)
1250 {
1251    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
1252
1253    f->storageBuffer16BitAccess            = true;
1254    f->uniformAndStorageBuffer16BitAccess  = true;
1255    f->storagePushConstant16               = true;
1256    f->storageInputOutput16                = false;
1257    f->multiview                           = true;
1258    f->multiviewGeometryShader             = true;
1259    f->multiviewTessellationShader         = true;
1260    f->variablePointersStorageBuffer       = true;
1261    f->variablePointers                    = true;
1262    f->protectedMemory                     = false;
1263    f->samplerYcbcrConversion              = true;
1264    f->shaderDrawParameters                = true;
1265 }
1266
1267 static void
1268 anv_get_physical_device_features_1_2(struct anv_physical_device *pdevice,
1269                                      VkPhysicalDeviceVulkan12Features *f)
1270 {
1271    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
1272
1273    f->samplerMirrorClampToEdge            = true;
1274    f->drawIndirectCount                   = true;
1275    f->storageBuffer8BitAccess             = true;
1276    f->uniformAndStorageBuffer8BitAccess   = true;
1277    f->storagePushConstant8                = true;
1278    f->shaderBufferInt64Atomics            = true;
1279    f->shaderSharedInt64Atomics            = false;
1280    f->shaderFloat16                       = true;
1281    f->shaderInt8                          = true;
1282
1283    f->descriptorIndexing                                 = true;
1284    f->shaderInputAttachmentArrayDynamicIndexing          = false;
1285    f->shaderUniformTexelBufferArrayDynamicIndexing       = true;
1286    f->shaderStorageTexelBufferArrayDynamicIndexing       = true;
1287    f->shaderUniformBufferArrayNonUniformIndexing         = false;
1288    f->shaderSampledImageArrayNonUniformIndexing          = true;
1289    f->shaderStorageBufferArrayNonUniformIndexing         = true;
1290    f->shaderStorageImageArrayNonUniformIndexing          = true;
1291    f->shaderInputAttachmentArrayNonUniformIndexing       = false;
1292    f->shaderUniformTexelBufferArrayNonUniformIndexing    = true;
1293    f->shaderStorageTexelBufferArrayNonUniformIndexing    = true;
1294    f->descriptorBindingUniformBufferUpdateAfterBind      = true;
1295    f->descriptorBindingSampledImageUpdateAfterBind       = true;
1296    f->descriptorBindingStorageImageUpdateAfterBind       = true;
1297    f->descriptorBindingStorageBufferUpdateAfterBind      = true;
1298    f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
1299    f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
1300    f->descriptorBindingUpdateUnusedWhilePending          = true;
1301    f->descriptorBindingPartiallyBound                    = true;
1302    f->descriptorBindingVariableDescriptorCount           = true;
1303    f->runtimeDescriptorArray                             = true;
1304
1305    f->samplerFilterMinmax                 = true;
1306    f->scalarBlockLayout                   = true;
1307    f->imagelessFramebuffer                = true;
1308    f->uniformBufferStandardLayout         = true;
1309    f->shaderSubgroupExtendedTypes         = true;
1310    f->separateDepthStencilLayouts         = true;
1311    f->hostQueryReset                      = true;
1312    f->timelineSemaphore                   = true;
1313    f->bufferDeviceAddress                 = true;
1314    f->bufferDeviceAddressCaptureReplay    = true;
1315    f->bufferDeviceAddressMultiDevice      = false;
1316    f->vulkanMemoryModel                   = true;
1317    f->vulkanMemoryModelDeviceScope        = true;
1318    f->vulkanMemoryModelAvailabilityVisibilityChains = true;
1319    f->shaderOutputViewportIndex           = true;
1320    f->shaderOutputLayer                   = true;
1321    f->subgroupBroadcastDynamicId          = true;
1322 }
1323
1324 static void
1325 anv_get_physical_device_features_1_3(struct anv_physical_device *pdevice,
1326                                      VkPhysicalDeviceVulkan13Features *f)
1327 {
1328    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES);
1329
1330    f->robustImageAccess = true;
1331    f->inlineUniformBlock = true;
1332    f->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1333    f->pipelineCreationCacheControl = true;
1334    f->privateData = true;
1335    f->shaderDemoteToHelperInvocation = true;
1336    f->shaderTerminateInvocation = true;
1337    f->subgroupSizeControl = true;
1338    f->computeFullSubgroups = true;
1339    f->synchronization2 = true;
1340    f->textureCompressionASTC_HDR = false;
1341    f->shaderZeroInitializeWorkgroupMemory = true;
1342    f->dynamicRendering = true;
1343    f->shaderIntegerDotProduct = true;
1344    f->maintenance4 = true;
1345 }
1346
1347 void anv_GetPhysicalDeviceFeatures2(
1348     VkPhysicalDevice                            physicalDevice,
1349     VkPhysicalDeviceFeatures2*                  pFeatures)
1350 {
1351    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1352    anv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1353
1354    VkPhysicalDeviceVulkan11Features core_1_1 = {
1355       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1356    };
1357    anv_get_physical_device_features_1_1(pdevice, &core_1_1);
1358
1359    VkPhysicalDeviceVulkan12Features core_1_2 = {
1360       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1361    };
1362    anv_get_physical_device_features_1_2(pdevice, &core_1_2);
1363
1364    VkPhysicalDeviceVulkan13Features core_1_3 = {
1365       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
1366    };
1367    anv_get_physical_device_features_1_3(pdevice, &core_1_3);
1368
1369    vk_foreach_struct(ext, pFeatures->pNext) {
1370       if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1))
1371          continue;
1372       if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2))
1373          continue;
1374       if (vk_get_physical_device_core_1_3_feature_ext(ext, &core_1_3))
1375          continue;
1376
1377       switch (ext->sType) {
1378       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
1379          VkPhysicalDevice4444FormatsFeaturesEXT *features =
1380             (VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
1381          features->formatA4R4G4B4 = true;
1382          features->formatA4B4G4R4 = false;
1383          break;
1384       }
1385
1386       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR: {
1387          VkPhysicalDeviceAccelerationStructureFeaturesKHR *features = (void *)ext;
1388          features->accelerationStructure =
1389             ANV_SUPPORT_RT && pdevice->info.has_ray_tracing;
1390          features->accelerationStructureCaptureReplay = false; /* TODO */
1391          features->accelerationStructureIndirectBuild = false; /* TODO */
1392          features->accelerationStructureHostCommands = false;
1393          features->descriptorBindingAccelerationStructureUpdateAfterBind =
1394             ANV_SUPPORT_RT && pdevice->info.has_ray_tracing;
1395          break;
1396       }
1397
1398       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
1399          VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features = (void *)ext;
1400          features->bufferDeviceAddress = true;
1401          features->bufferDeviceAddressCaptureReplay = false;
1402          features->bufferDeviceAddressMultiDevice = false;
1403          break;
1404       }
1405
1406       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT: {
1407          VkPhysicalDeviceBorderColorSwizzleFeaturesEXT *features =
1408             (VkPhysicalDeviceBorderColorSwizzleFeaturesEXT *)ext;
1409          features->borderColorSwizzle = true;
1410          features->borderColorSwizzleFromImage = true;
1411          break;
1412       }
1413
1414       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {
1415          VkPhysicalDeviceColorWriteEnableFeaturesEXT *features =
1416             (VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext;
1417          features->colorWriteEnable = true;
1418          break;
1419       }
1420
1421       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT: {
1422          VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *features =
1423             (VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *)ext;
1424          features->image2DViewOf3D = true;
1425          features->sampler2DViewOf3D = true;
1426          break;
1427       }
1428
1429       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1430          VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1431             (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1432          features->computeDerivativeGroupQuads = true;
1433          features->computeDerivativeGroupLinear = true;
1434          break;
1435       }
1436
1437       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
1438          VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
1439             (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
1440          features->conditionalRendering = true;
1441          features->inheritedConditionalRendering = true;
1442          break;
1443       }
1444
1445       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1446          VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
1447             (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
1448          features->customBorderColors = true;
1449          features->customBorderColorWithoutFormat = true;
1450          break;
1451       }
1452
1453       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLAMP_ZERO_ONE_FEATURES_EXT: {
1454          VkPhysicalDeviceDepthClampZeroOneFeaturesEXT *features =
1455             (VkPhysicalDeviceDepthClampZeroOneFeaturesEXT *)ext;
1456          features->depthClampZeroOne = true;
1457          break;
1458       }
1459
1460       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
1461          VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
1462             (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
1463          features->depthClipEnable = true;
1464          break;
1465       }
1466
1467       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: {
1468          VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *features =
1469             (VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *)ext;
1470          features->fragmentShaderSampleInterlock = true;
1471          features->fragmentShaderPixelInterlock = true;
1472          features->fragmentShaderShadingRateInterlock = false;
1473          break;
1474       }
1475
1476       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_KHR: {
1477          VkPhysicalDeviceGlobalPriorityQueryFeaturesKHR *features =
1478             (VkPhysicalDeviceGlobalPriorityQueryFeaturesKHR *)ext;
1479          features->globalPriorityQuery = true;
1480          break;
1481       }
1482
1483       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
1484          VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
1485             (VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
1486          features->attachmentFragmentShadingRate = false;
1487          features->pipelineFragmentShadingRate = true;
1488          features->primitiveFragmentShadingRate =
1489             pdevice->info.has_coarse_pixel_primitive_and_cb;
1490          features->attachmentFragmentShadingRate =
1491             pdevice->info.has_coarse_pixel_primitive_and_cb;
1492          break;
1493       }
1494
1495       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_MIN_LOD_FEATURES_EXT: {
1496          VkPhysicalDeviceImageViewMinLodFeaturesEXT *features =
1497             (VkPhysicalDeviceImageViewMinLodFeaturesEXT *)ext;
1498          features->minLod = true;
1499          break;
1500       }
1501
1502       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1503          VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1504             (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1505          features->indexTypeUint8 = true;
1506          break;
1507       }
1508
1509       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
1510          VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
1511             (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
1512          /* Rectangular lines must use the strict algorithm, which is not
1513           * supported for wide lines prior to ICL.  See rasterization_mode for
1514           * details and how the HW states are programmed.
1515           */
1516          features->rectangularLines = pdevice->info.ver >= 10;
1517          features->bresenhamLines = true;
1518          /* Support for Smooth lines with MSAA was removed on gfx11.  From the
1519           * BSpec section "Multisample ModesState" table for "AA Line Support
1520           * Requirements":
1521           *
1522           *    GFX10:BUG:########       NUM_MULTISAMPLES == 1
1523           *
1524           * Fortunately, this isn't a case most people care about.
1525           */
1526          features->smoothLines = pdevice->info.ver < 10;
1527          features->stippledRectangularLines = false;
1528          features->stippledBresenhamLines = true;
1529          features->stippledSmoothLines = false;
1530          break;
1531       }
1532
1533       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV: {
1534          VkPhysicalDeviceMeshShaderFeaturesNV *features =
1535             (VkPhysicalDeviceMeshShaderFeaturesNV *)ext;
1536          features->taskShader = pdevice->vk.supported_extensions.NV_mesh_shader;
1537          features->meshShader = pdevice->vk.supported_extensions.NV_mesh_shader;
1538          break;
1539       }
1540
1541       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT: {
1542          VkPhysicalDeviceMeshShaderFeaturesEXT *features =
1543             (VkPhysicalDeviceMeshShaderFeaturesEXT *)ext;
1544          features->meshShader = pdevice->vk.supported_extensions.EXT_mesh_shader;
1545          features->taskShader = pdevice->vk.supported_extensions.EXT_mesh_shader;
1546          features->multiviewMeshShader = false;
1547          features->primitiveFragmentShadingRateMeshShader = features->meshShader;
1548          features->meshShaderQueries = false;
1549          break;
1550       }
1551
1552       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT: {
1553          VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT *features =
1554             (VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT *)ext;
1555          features->mutableDescriptorType = true;
1556          break;
1557       }
1558
1559       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
1560          VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
1561             (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
1562          feature->performanceCounterQueryPools = true;
1563          /* HW only supports a single configuration at a time. */
1564          feature->performanceCounterMultipleQueryPools = false;
1565          break;
1566       }
1567
1568       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1569          VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1570             (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1571          features->pipelineExecutableInfo = true;
1572          break;
1573       }
1574
1575       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: {
1576          VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features =
1577             (VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *)ext;
1578          features->primitivesGeneratedQuery = true;
1579          features->primitivesGeneratedQueryWithRasterizerDiscard = false;
1580          features->primitivesGeneratedQueryWithNonZeroStreams = false;
1581          break;
1582       }
1583
1584       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
1585          VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
1586             (VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
1587          features->provokingVertexLast = true;
1588          features->transformFeedbackPreservesProvokingVertex = true;
1589          break;
1590       }
1591
1592       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR: {
1593          VkPhysicalDeviceRayQueryFeaturesKHR *features = (void *)ext;
1594          features->rayQuery = ANV_SUPPORT_RT && pdevice->info.has_ray_tracing;
1595          break;
1596       }
1597
1598       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR: {
1599          VkPhysicalDeviceRayTracingPipelineFeaturesKHR *features = (void *)ext;
1600          features->rayTracingPipeline = pdevice->info.has_ray_tracing;
1601          features->rayTracingPipelineShaderGroupHandleCaptureReplay = false;
1602          features->rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false;
1603          features->rayTracingPipelineTraceRaysIndirect = true;
1604          features->rayTraversalPrimitiveCulling = true;
1605          break;
1606       }
1607
1608       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1609          VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
1610          features->robustBufferAccess2 = true;
1611          features->robustImageAccess2 = true;
1612          features->nullDescriptor = true;
1613          break;
1614       }
1615
1616       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
1617          VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features = (void *)ext;
1618          features->shaderBufferFloat32Atomics =    true;
1619          features->shaderBufferFloat32AtomicAdd =  pdevice->info.has_lsc;
1620          features->shaderBufferFloat64Atomics =
1621             pdevice->info.has_64bit_float && pdevice->info.has_lsc;
1622          features->shaderBufferFloat64AtomicAdd =  false;
1623          features->shaderSharedFloat32Atomics =    true;
1624          features->shaderSharedFloat32AtomicAdd =  false;
1625          features->shaderSharedFloat64Atomics =    false;
1626          features->shaderSharedFloat64AtomicAdd =  false;
1627          features->shaderImageFloat32Atomics =     true;
1628          features->shaderImageFloat32AtomicAdd =   false;
1629          features->sparseImageFloat32Atomics =     false;
1630          features->sparseImageFloat32AtomicAdd =   false;
1631          break;
1632       }
1633
1634       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: {
1635          VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features = (void *)ext;
1636          features->shaderBufferFloat16Atomics      = pdevice->info.has_lsc;
1637          features->shaderBufferFloat16AtomicAdd    = false;
1638          features->shaderBufferFloat16AtomicMinMax = pdevice->info.has_lsc;
1639          features->shaderBufferFloat32AtomicMinMax = true;
1640          features->shaderBufferFloat64AtomicMinMax =
1641             pdevice->info.has_64bit_float && pdevice->info.has_lsc;
1642          features->shaderSharedFloat16Atomics      = pdevice->info.has_lsc;
1643          features->shaderSharedFloat16AtomicAdd    = false;
1644          features->shaderSharedFloat16AtomicMinMax = pdevice->info.has_lsc;
1645          features->shaderSharedFloat32AtomicMinMax = true;
1646          features->shaderSharedFloat64AtomicMinMax = false;
1647          features->shaderImageFloat32AtomicMinMax  = false;
1648          features->sparseImageFloat32AtomicMinMax  = false;
1649          break;
1650       }
1651
1652       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
1653          VkPhysicalDeviceShaderClockFeaturesKHR *features =
1654             (VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
1655          features->shaderSubgroupClock = true;
1656          features->shaderDeviceClock = false;
1657          break;
1658       }
1659
1660       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL: {
1661          VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *features =
1662             (VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *)ext;
1663          features->shaderIntegerFunctions2 = true;
1664          break;
1665       }
1666
1667       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_FEATURES_EXT: {
1668          VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *features =
1669             (VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *)ext;
1670          features->shaderModuleIdentifier = true;
1671          break;
1672       }
1673
1674       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR: {
1675          VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *features =
1676             (VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *)ext;
1677          features->shaderSubgroupUniformControlFlow = true;
1678          break;
1679       }
1680
1681       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
1682          VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
1683             (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
1684          features->texelBufferAlignment = true;
1685          break;
1686       }
1687
1688       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
1689          VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
1690             (VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
1691          features->transformFeedback = true;
1692          features->geometryStreams = true;
1693          break;
1694       }
1695
1696       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
1697          VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
1698             (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
1699          features->vertexAttributeInstanceRateDivisor = true;
1700          features->vertexAttributeInstanceRateZeroDivisor = true;
1701          break;
1702       }
1703
1704       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {
1705          VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =
1706             (VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;
1707          features->workgroupMemoryExplicitLayout = true;
1708          features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
1709          features->workgroupMemoryExplicitLayout8BitAccess = true;
1710          features->workgroupMemoryExplicitLayout16BitAccess = true;
1711          break;
1712       }
1713
1714       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1715          VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1716             (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;
1717          features->ycbcrImageArrays = true;
1718          break;
1719       }
1720
1721       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
1722          VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
1723             (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *)ext;
1724          features->extendedDynamicState = true;
1725          break;
1726       }
1727
1728       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
1729          VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =
1730             (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;
1731          features->extendedDynamicState2 = true;
1732          features->extendedDynamicState2LogicOp = true;
1733          features->extendedDynamicState2PatchControlPoints = false;
1734          break;
1735       }
1736
1737       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT: {
1738          VkPhysicalDeviceExtendedDynamicState3FeaturesEXT *features =
1739             (VkPhysicalDeviceExtendedDynamicState3FeaturesEXT *)ext;
1740          features->extendedDynamicState3PolygonMode = true;
1741          features->extendedDynamicState3TessellationDomainOrigin = true;
1742          features->extendedDynamicState3RasterizationStream = true;
1743          features->extendedDynamicState3LineStippleEnable = true;
1744          features->extendedDynamicState3LineRasterizationMode = true;
1745          features->extendedDynamicState3LogicOpEnable = true;
1746          features->extendedDynamicState3AlphaToOneEnable = true;
1747          features->extendedDynamicState3DepthClipEnable = true;
1748          features->extendedDynamicState3DepthClampEnable = true;
1749          features->extendedDynamicState3DepthClipNegativeOneToOne = true;
1750          features->extendedDynamicState3ProvokingVertexMode = true;
1751          features->extendedDynamicState3ColorBlendEnable = true;
1752          features->extendedDynamicState3ColorWriteMask = true;
1753          features->extendedDynamicState3ColorBlendEquation = true;
1754          features->extendedDynamicState3SampleLocationsEnable = true;
1755          features->extendedDynamicState3SampleMask = true;
1756
1757          features->extendedDynamicState3RasterizationSamples = false;
1758          features->extendedDynamicState3AlphaToCoverageEnable = false;
1759          features->extendedDynamicState3ConservativeRasterizationMode = false;
1760          features->extendedDynamicState3ExtraPrimitiveOverestimationSize = false;
1761          features->extendedDynamicState3ViewportWScalingEnable = false;
1762          features->extendedDynamicState3ViewportSwizzle = false;
1763          features->extendedDynamicState3ShadingRateImageEnable = false;
1764          features->extendedDynamicState3CoverageToColorEnable = false;
1765          features->extendedDynamicState3CoverageToColorLocation = false;
1766          features->extendedDynamicState3CoverageModulationMode = false;
1767          features->extendedDynamicState3CoverageModulationTableEnable = false;
1768          features->extendedDynamicState3CoverageModulationTable = false;
1769          features->extendedDynamicState3CoverageReductionMode = false;
1770          features->extendedDynamicState3RepresentativeFragmentTestEnable = false;
1771          features->extendedDynamicState3ColorBlendAdvanced = false;
1772
1773          break;
1774       }
1775
1776       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: {
1777          VkPhysicalDeviceMultiDrawFeaturesEXT *features = (VkPhysicalDeviceMultiDrawFeaturesEXT *)ext;
1778          features->multiDraw = true;
1779          break;
1780       }
1781
1782       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_NON_SEAMLESS_CUBE_MAP_FEATURES_EXT : {
1783          VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT *features =
1784             (VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT *)ext;
1785          features->nonSeamlessCubeMap = true;
1786          break;
1787       }
1788
1789       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: {
1790          VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *features =
1791             (VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *)ext;
1792          features->primitiveTopologyListRestart = true;
1793          features->primitiveTopologyPatchListRestart = true;
1794          break;
1795       }
1796
1797       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT: {
1798          VkPhysicalDeviceDepthClipControlFeaturesEXT *features =
1799             (VkPhysicalDeviceDepthClipControlFeaturesEXT *)ext;
1800          features->depthClipControl = true;
1801          break;
1802       }
1803
1804       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENT_ID_FEATURES_KHR: {
1805          VkPhysicalDevicePresentIdFeaturesKHR *features =
1806             (VkPhysicalDevicePresentIdFeaturesKHR *) ext;
1807          features->presentId = pdevice->vk.supported_extensions.KHR_present_id;
1808          break;
1809       }
1810
1811       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENT_WAIT_FEATURES_KHR: {
1812          VkPhysicalDevicePresentWaitFeaturesKHR *features =
1813             (VkPhysicalDevicePresentWaitFeaturesKHR *) ext;
1814          features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait;
1815          break;
1816       }
1817
1818       default:
1819          anv_debug_ignored_stype(ext->sType);
1820          break;
1821       }
1822    }
1823
1824 }
1825
1826 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS   64
1827
1828 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
1829 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS       256
1830
1831 #define MAX_CUSTOM_BORDER_COLORS                   4096
1832
1833 void anv_GetPhysicalDeviceProperties(
1834     VkPhysicalDevice                            physicalDevice,
1835     VkPhysicalDeviceProperties*                 pProperties)
1836 {
1837    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1838    const struct intel_device_info *devinfo = &pdevice->info;
1839
1840    const uint32_t max_ssbos = UINT16_MAX;
1841    const uint32_t max_textures = UINT16_MAX;
1842    const uint32_t max_samplers = UINT16_MAX;
1843    const uint32_t max_images = UINT16_MAX;
1844
1845    /* Claim a high per-stage limit since we have bindless. */
1846    const uint32_t max_per_stage = UINT32_MAX;
1847
1848    const uint32_t max_workgroup_size =
1849       MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
1850
1851    VkSampleCountFlags sample_counts =
1852       isl_device_get_sample_counts(&pdevice->isl_dev);
1853
1854
1855    VkPhysicalDeviceLimits limits = {
1856       .maxImageDimension1D                      = (1 << 14),
1857       .maxImageDimension2D                      = (1 << 14),
1858       .maxImageDimension3D                      = (1 << 11),
1859       .maxImageDimensionCube                    = (1 << 14),
1860       .maxImageArrayLayers                      = (1 << 11),
1861       .maxTexelBufferElements                   = 128 * 1024 * 1024,
1862       .maxUniformBufferRange                    = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
1863       .maxStorageBufferRange                    = pdevice->isl_dev.max_buffer_size,
1864       .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
1865       .maxMemoryAllocationCount                 = UINT32_MAX,
1866       .maxSamplerAllocationCount                = 64 * 1024,
1867       .bufferImageGranularity                   = 1,
1868       .sparseAddressSpaceSize                   = 0,
1869       .maxBoundDescriptorSets                   = MAX_SETS,
1870       .maxPerStageDescriptorSamplers            = max_samplers,
1871       .maxPerStageDescriptorUniformBuffers      = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
1872       .maxPerStageDescriptorStorageBuffers      = max_ssbos,
1873       .maxPerStageDescriptorSampledImages       = max_textures,
1874       .maxPerStageDescriptorStorageImages       = max_images,
1875       .maxPerStageDescriptorInputAttachments    = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1876       .maxPerStageResources                     = max_per_stage,
1877       .maxDescriptorSetSamplers                 = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
1878       .maxDescriptorSetUniformBuffers           = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,           /* number of stages * maxPerStageDescriptorUniformBuffers */
1879       .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1880       .maxDescriptorSetStorageBuffers           = 6 * max_ssbos,    /* number of stages * maxPerStageDescriptorStorageBuffers */
1881       .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
1882       .maxDescriptorSetSampledImages            = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
1883       .maxDescriptorSetStorageImages            = 6 * max_images,   /* number of stages * maxPerStageDescriptorStorageImages */
1884       .maxDescriptorSetInputAttachments         = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1885       .maxVertexInputAttributes                 = MAX_VES,
1886       .maxVertexInputBindings                   = MAX_VBS,
1887       /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1888        *
1889        * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1890        */
1891       .maxVertexInputAttributeOffset            = 2047,
1892       /* Skylake PRMs: Volume 2d: Command Reference: Structures:
1893        *
1894        * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1895        */
1896       .maxVertexInputBindingStride              = 4095,
1897       .maxVertexOutputComponents                = 128,
1898       .maxTessellationGenerationLevel           = 64,
1899       .maxTessellationPatchSize                 = 32,
1900       .maxTessellationControlPerVertexInputComponents = 128,
1901       .maxTessellationControlPerVertexOutputComponents = 128,
1902       .maxTessellationControlPerPatchOutputComponents = 128,
1903       .maxTessellationControlTotalOutputComponents = 2048,
1904       .maxTessellationEvaluationInputComponents = 128,
1905       .maxTessellationEvaluationOutputComponents = 128,
1906       .maxGeometryShaderInvocations             = 32,
1907       .maxGeometryInputComponents               = 128,
1908       .maxGeometryOutputComponents              = 128,
1909       .maxGeometryOutputVertices                = 256,
1910       .maxGeometryTotalOutputComponents         = 1024,
1911       .maxFragmentInputComponents               = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1912       .maxFragmentOutputAttachments             = 8,
1913       .maxFragmentDualSrcAttachments            = 1,
1914       .maxFragmentCombinedOutputResources       = MAX_RTS + max_ssbos + max_images,
1915       .maxComputeSharedMemorySize               = 64 * 1024,
1916       .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
1917       .maxComputeWorkGroupInvocations           = max_workgroup_size,
1918       .maxComputeWorkGroupSize = {
1919          max_workgroup_size,
1920          max_workgroup_size,
1921          max_workgroup_size,
1922       },
1923       .subPixelPrecisionBits                    = 8,
1924       .subTexelPrecisionBits                    = 8,
1925       .mipmapPrecisionBits                      = 8,
1926       .maxDrawIndexedIndexValue                 = UINT32_MAX,
1927       .maxDrawIndirectCount                     = UINT32_MAX,
1928       .maxSamplerLodBias                        = 16,
1929       .maxSamplerAnisotropy                     = 16,
1930       .maxViewports                             = MAX_VIEWPORTS,
1931       .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
1932       .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
1933       .viewportSubPixelBits                     = 13, /* We take a float? */
1934       .minMemoryMapAlignment                    = 4096, /* A page */
1935       /* The dataport requires texel alignment so we need to assume a worst
1936        * case of R32G32B32A32 which is 16 bytes.
1937        */
1938       .minTexelBufferOffsetAlignment            = 16,
1939       .minUniformBufferOffsetAlignment          = ANV_UBO_ALIGNMENT,
1940       .minStorageBufferOffsetAlignment          = ANV_SSBO_ALIGNMENT,
1941       .minTexelOffset                           = -8,
1942       .maxTexelOffset                           = 7,
1943       .minTexelGatherOffset                     = -32,
1944       .maxTexelGatherOffset                     = 31,
1945       .minInterpolationOffset                   = -0.5,
1946       .maxInterpolationOffset                   = 0.4375,
1947       .subPixelInterpolationOffsetBits          = 4,
1948       .maxFramebufferWidth                      = (1 << 14),
1949       .maxFramebufferHeight                     = (1 << 14),
1950       .maxFramebufferLayers                     = (1 << 11),
1951       .framebufferColorSampleCounts             = sample_counts,
1952       .framebufferDepthSampleCounts             = sample_counts,
1953       .framebufferStencilSampleCounts           = sample_counts,
1954       .framebufferNoAttachmentsSampleCounts     = sample_counts,
1955       .maxColorAttachments                      = MAX_RTS,
1956       .sampledImageColorSampleCounts            = sample_counts,
1957       .sampledImageIntegerSampleCounts          = sample_counts,
1958       .sampledImageDepthSampleCounts            = sample_counts,
1959       .sampledImageStencilSampleCounts          = sample_counts,
1960       .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
1961       .maxSampleMaskWords                       = 1,
1962       .timestampComputeAndGraphics              = true,
1963       .timestampPeriod                          = 1000000000.0 / devinfo->timestamp_frequency,
1964       .maxClipDistances                         = 8,
1965       .maxCullDistances                         = 8,
1966       .maxCombinedClipAndCullDistances          = 8,
1967       .discreteQueuePriorities                  = 2,
1968       .pointSizeRange                           = { 0.125, 255.875 },
1969       /* While SKL and up support much wider lines than we are setting here,
1970        * in practice we run into conformance issues if we go past this limit.
1971        * Since the Windows driver does the same, it's probably fair to assume
1972        * that no one needs more than this.
1973        */
1974       .lineWidthRange                           = { 0.0, 8.0 },
1975       .pointSizeGranularity                     = (1.0 / 8.0),
1976       .lineWidthGranularity                     = (1.0 / 128.0),
1977       .strictLines                              = false,
1978       .standardSampleLocations                  = true,
1979       .optimalBufferCopyOffsetAlignment         = 128,
1980       .optimalBufferCopyRowPitchAlignment       = 128,
1981       .nonCoherentAtomSize                      = 64,
1982    };
1983
1984    *pProperties = (VkPhysicalDeviceProperties) {
1985       .apiVersion = ANV_API_VERSION,
1986       .driverVersion = vk_get_driver_version(),
1987       .vendorID = 0x8086,
1988       .deviceID = pdevice->info.pci_device_id,
1989       .deviceType = pdevice->info.has_local_mem ?
1990                     VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
1991                     VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1992       .limits = limits,
1993       .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
1994    };
1995
1996    snprintf(pProperties->deviceName, sizeof(pProperties->deviceName),
1997             "%s", pdevice->info.name);
1998    memcpy(pProperties->pipelineCacheUUID,
1999           pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
2000 }
2001
2002 static void
2003 anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice,
2004                                        VkPhysicalDeviceVulkan11Properties *p)
2005 {
2006    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
2007
2008    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
2009    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
2010    memset(p->deviceLUID, 0, VK_LUID_SIZE);
2011    p->deviceNodeMask = 0;
2012    p->deviceLUIDValid = false;
2013
2014    p->subgroupSize = BRW_SUBGROUP_SIZE;
2015    VkShaderStageFlags scalar_stages = 0;
2016    for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
2017       if (pdevice->compiler->scalar_stage[stage])
2018          scalar_stages |= mesa_to_vk_shader_stage(stage);
2019    }
2020    if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) {
2021       scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
2022                        VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
2023                        VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
2024                        VK_SHADER_STAGE_MISS_BIT_KHR |
2025                        VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
2026                        VK_SHADER_STAGE_CALLABLE_BIT_KHR;
2027    }
2028    if (pdevice->vk.supported_extensions.NV_mesh_shader ||
2029        pdevice->vk.supported_extensions.EXT_mesh_shader) {
2030       scalar_stages |= VK_SHADER_STAGE_TASK_BIT_EXT |
2031                        VK_SHADER_STAGE_MESH_BIT_EXT;
2032    }
2033    p->subgroupSupportedStages = scalar_stages;
2034    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
2035                                     VK_SUBGROUP_FEATURE_VOTE_BIT |
2036                                     VK_SUBGROUP_FEATURE_BALLOT_BIT |
2037                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
2038                                     VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
2039                                     VK_SUBGROUP_FEATURE_QUAD_BIT |
2040                                     VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
2041                                     VK_SUBGROUP_FEATURE_CLUSTERED_BIT;
2042    p->subgroupQuadOperationsInAllStages = true;
2043
2044    p->pointClippingBehavior      = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
2045    p->maxMultiviewViewCount      = 16;
2046    p->maxMultiviewInstanceIndex  = UINT32_MAX / 16;
2047    p->protectedNoFault           = false;
2048    /* This value doesn't matter for us today as our per-stage descriptors are
2049     * the real limit.
2050     */
2051    p->maxPerSetDescriptors       = 1024;
2052    p->maxMemoryAllocationSize    = MAX_MEMORY_ALLOCATION_SIZE;
2053 }
2054
2055 static void
2056 anv_get_physical_device_properties_1_2(struct anv_physical_device *pdevice,
2057                                        VkPhysicalDeviceVulkan12Properties *p)
2058 {
2059    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
2060
2061    p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
2062    memset(p->driverName, 0, sizeof(p->driverName));
2063    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
2064             "Intel open-source Mesa driver");
2065    memset(p->driverInfo, 0, sizeof(p->driverInfo));
2066    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
2067             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
2068
2069    p->conformanceVersion = (VkConformanceVersion) {
2070       .major = 1,
2071       .minor = 3,
2072       .subminor = 0,
2073       .patch = 0,
2074    };
2075
2076    p->denormBehaviorIndependence =
2077       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
2078    p->roundingModeIndependence =
2079       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
2080
2081    /* Broadwell does not support HF denorms and there are restrictions
2082     * other gens. According to Kabylake's PRM:
2083     *
2084     * "math - Extended Math Function
2085     * [...]
2086     * Restriction : Half-float denorms are always retained."
2087     */
2088    p->shaderDenormFlushToZeroFloat16         = false;
2089    p->shaderDenormPreserveFloat16            = pdevice->info.ver > 8;
2090    p->shaderRoundingModeRTEFloat16           = true;
2091    p->shaderRoundingModeRTZFloat16           = true;
2092    p->shaderSignedZeroInfNanPreserveFloat16  = true;
2093
2094    p->shaderDenormFlushToZeroFloat32         = true;
2095    p->shaderDenormPreserveFloat32            = true;
2096    p->shaderRoundingModeRTEFloat32           = true;
2097    p->shaderRoundingModeRTZFloat32           = true;
2098    p->shaderSignedZeroInfNanPreserveFloat32  = true;
2099
2100    p->shaderDenormFlushToZeroFloat64         = true;
2101    p->shaderDenormPreserveFloat64            = true;
2102    p->shaderRoundingModeRTEFloat64           = true;
2103    p->shaderRoundingModeRTZFloat64           = true;
2104    p->shaderSignedZeroInfNanPreserveFloat64  = true;
2105
2106    /* It's a bit hard to exactly map our implementation to the limits
2107     * described by Vulkan.  The bindless surface handle in the extended
2108     * message descriptors is 20 bits and it's an index into the table of
2109     * RENDER_SURFACE_STATE structs that starts at bindless surface base
2110     * address.  This means that we can have at must 1M surface states
2111     * allocated at any given time.  Since most image views take two
2112     * descriptors, this means we have a limit of about 500K image views.
2113     *
2114     * However, since we allocate surface states at vkCreateImageView time,
2115     * this means our limit is actually something on the order of 500K image
2116     * views allocated at any time.  The actual limit describe by Vulkan, on
2117     * the other hand, is a limit of how many you can have in a descriptor set.
2118     * Assuming anyone using 1M descriptors will be using the same image view
2119     * twice a bunch of times (or a bunch of null descriptors), we can safely
2120     * advertise a larger limit here.
2121     */
2122    const unsigned max_bindless_views = 1 << 20;
2123    p->maxUpdateAfterBindDescriptorsInAllPools            = max_bindless_views;
2124    p->shaderUniformBufferArrayNonUniformIndexingNative   = false;
2125    p->shaderSampledImageArrayNonUniformIndexingNative    = false;
2126    p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
2127    p->shaderStorageImageArrayNonUniformIndexingNative    = false;
2128    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
2129    p->robustBufferAccessUpdateAfterBind                  = true;
2130    p->quadDivergentImplicitLod                           = false;
2131    p->maxPerStageDescriptorUpdateAfterBindSamplers       = max_bindless_views;
2132    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
2133    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
2134    p->maxPerStageDescriptorUpdateAfterBindSampledImages  = max_bindless_views;
2135    p->maxPerStageDescriptorUpdateAfterBindStorageImages  = max_bindless_views;
2136    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
2137    p->maxPerStageUpdateAfterBindResources                = UINT32_MAX;
2138    p->maxDescriptorSetUpdateAfterBindSamplers            = max_bindless_views;
2139    p->maxDescriptorSetUpdateAfterBindUniformBuffers      = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
2140    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
2141    p->maxDescriptorSetUpdateAfterBindStorageBuffers      = UINT32_MAX;
2142    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
2143    p->maxDescriptorSetUpdateAfterBindSampledImages       = max_bindless_views;
2144    p->maxDescriptorSetUpdateAfterBindStorageImages       = max_bindless_views;
2145    p->maxDescriptorSetUpdateAfterBindInputAttachments    = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
2146
2147    /* We support all of the depth resolve modes */
2148    p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
2149                                       VK_RESOLVE_MODE_AVERAGE_BIT |
2150                                       VK_RESOLVE_MODE_MIN_BIT |
2151                                       VK_RESOLVE_MODE_MAX_BIT;
2152    /* Average doesn't make sense for stencil so we don't support that */
2153    p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
2154                                       VK_RESOLVE_MODE_MIN_BIT |
2155                                       VK_RESOLVE_MODE_MAX_BIT;
2156    p->independentResolveNone  = true;
2157    p->independentResolve      = true;
2158
2159    p->filterMinmaxSingleComponentFormats  = true;
2160    p->filterMinmaxImageComponentMapping   = true;
2161
2162    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
2163
2164    p->framebufferIntegerColorSampleCounts =
2165       isl_device_get_sample_counts(&pdevice->isl_dev);
2166 }
2167
2168 static void
2169 anv_get_physical_device_properties_1_3(struct anv_physical_device *pdevice,
2170                                        VkPhysicalDeviceVulkan13Properties *p)
2171 {
2172    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES);
2173
2174    p->minSubgroupSize = 8;
2175    p->maxSubgroupSize = 32;
2176    p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
2177    p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT |
2178                                    VK_SHADER_STAGE_TASK_BIT_EXT |
2179                                    VK_SHADER_STAGE_MESH_BIT_EXT;
2180
2181    p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
2182    p->maxPerStageDescriptorInlineUniformBlocks =
2183       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2184    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
2185       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2186    p->maxDescriptorSetInlineUniformBlocks =
2187       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2188    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
2189       MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2190    p->maxInlineUniformTotalSize = UINT16_MAX;
2191
2192    p->integerDotProduct8BitUnsignedAccelerated = false;
2193    p->integerDotProduct8BitSignedAccelerated = false;
2194    p->integerDotProduct8BitMixedSignednessAccelerated = false;
2195    p->integerDotProduct4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
2196    p->integerDotProduct4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
2197    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
2198    p->integerDotProduct16BitUnsignedAccelerated = false;
2199    p->integerDotProduct16BitSignedAccelerated = false;
2200    p->integerDotProduct16BitMixedSignednessAccelerated = false;
2201    p->integerDotProduct32BitUnsignedAccelerated = false;
2202    p->integerDotProduct32BitSignedAccelerated = false;
2203    p->integerDotProduct32BitMixedSignednessAccelerated = false;
2204    p->integerDotProduct64BitUnsignedAccelerated = false;
2205    p->integerDotProduct64BitSignedAccelerated = false;
2206    p->integerDotProduct64BitMixedSignednessAccelerated = false;
2207    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
2208    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
2209    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
2210    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
2211    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
2212    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
2213    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
2214    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
2215    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
2216    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
2217    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
2218    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
2219    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
2220    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
2221    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
2222
2223    /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
2224     * Base Address:
2225     *
2226     *    "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
2227     *    specifies the base address of the first element of the surface,
2228     *    computed in software by adding the surface base address to the
2229     *    byte offset of the element in the buffer. The base address must
2230     *    be aligned to element size."
2231     *
2232     * The typed dataport messages require that things be texel aligned.
2233     * Otherwise, we may just load/store the wrong data or, in the worst
2234     * case, there may be hangs.
2235     */
2236    p->storageTexelBufferOffsetAlignmentBytes = 16;
2237    p->storageTexelBufferOffsetSingleTexelAlignment = true;
2238
2239    /* The sampler, however, is much more forgiving and it can handle
2240     * arbitrary byte alignment for linear and buffer surfaces.  It's
2241     * hard to find a good PRM citation for this but years of empirical
2242     * experience demonstrate that this is true.
2243     */
2244    p->uniformTexelBufferOffsetAlignmentBytes = 1;
2245    p->uniformTexelBufferOffsetSingleTexelAlignment = false;
2246
2247    p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
2248 }
2249
2250 void anv_GetPhysicalDeviceProperties2(
2251     VkPhysicalDevice                            physicalDevice,
2252     VkPhysicalDeviceProperties2*                pProperties)
2253 {
2254    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2255
2256    anv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
2257
2258    VkPhysicalDeviceVulkan11Properties core_1_1 = {
2259       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
2260    };
2261    anv_get_physical_device_properties_1_1(pdevice, &core_1_1);
2262
2263    VkPhysicalDeviceVulkan12Properties core_1_2 = {
2264       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
2265    };
2266    anv_get_physical_device_properties_1_2(pdevice, &core_1_2);
2267
2268    VkPhysicalDeviceVulkan13Properties core_1_3 = {
2269       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
2270    };
2271    anv_get_physical_device_properties_1_3(pdevice, &core_1_3);
2272
2273    vk_foreach_struct(ext, pProperties->pNext) {
2274       if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
2275          continue;
2276       if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
2277          continue;
2278       if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
2279          continue;
2280
2281       switch (ext->sType) {
2282       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR: {
2283          VkPhysicalDeviceAccelerationStructurePropertiesKHR *props = (void *)ext;
2284          props->maxGeometryCount = (1u << 24) - 1;
2285          props->maxInstanceCount = (1u << 24) - 1;
2286          props->maxPrimitiveCount = (1u << 29) - 1;
2287          props->maxPerStageDescriptorAccelerationStructures = UINT16_MAX;
2288          props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = UINT16_MAX;
2289          props->maxDescriptorSetAccelerationStructures = UINT16_MAX;
2290          props->maxDescriptorSetUpdateAfterBindAccelerationStructures = UINT16_MAX;
2291          props->minAccelerationStructureScratchOffsetAlignment = 64;
2292          break;
2293       }
2294
2295       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
2296          /* TODO: Real limits */
2297          VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
2298             (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
2299          /* There's nothing in the public docs about this value as far as I
2300           * can tell.  However, this is the value the Windows driver reports
2301           * and there's a comment on a rejected HW feature in the internal
2302           * docs that says:
2303           *
2304           *    "This is similar to conservative rasterization, except the
2305           *    primitive area is not extended by 1/512 and..."
2306           *
2307           * That's a bit of an obtuse reference but it's the best we've got
2308           * for now.
2309           */
2310          properties->primitiveOverestimationSize = 1.0f / 512.0f;
2311          properties->maxExtraPrimitiveOverestimationSize = 0.0f;
2312          properties->extraPrimitiveOverestimationSizeGranularity = 0.0f;
2313          properties->primitiveUnderestimation = false;
2314          properties->conservativePointAndLineRasterization = false;
2315          properties->degenerateTrianglesRasterized = true;
2316          properties->degenerateLinesRasterized = false;
2317          properties->fullyCoveredFragmentShaderInputVariable = false;
2318          properties->conservativeRasterizationPostDepthCoverage = true;
2319          break;
2320       }
2321
2322       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
2323          VkPhysicalDeviceCustomBorderColorPropertiesEXT *properties =
2324             (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
2325          properties->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
2326          break;
2327       }
2328
2329       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
2330          VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
2331             (VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
2332          props->primitiveFragmentShadingRateWithMultipleViewports =
2333             pdevice->info.has_coarse_pixel_primitive_and_cb;
2334          props->layeredShadingRateAttachments = pdevice->info.has_coarse_pixel_primitive_and_cb;
2335          props->fragmentShadingRateNonTrivialCombinerOps =
2336             pdevice->info.has_coarse_pixel_primitive_and_cb;
2337          props->maxFragmentSize = (VkExtent2D) { 4, 4 };
2338          props->maxFragmentSizeAspectRatio =
2339             pdevice->info.has_coarse_pixel_primitive_and_cb ?
2340             2 : 4;
2341          props->maxFragmentShadingRateCoverageSamples = 4 * 4 *
2342             (pdevice->info.has_coarse_pixel_primitive_and_cb ? 4 : 16);
2343          props->maxFragmentShadingRateRasterizationSamples =
2344             pdevice->info.has_coarse_pixel_primitive_and_cb ?
2345             VK_SAMPLE_COUNT_4_BIT :  VK_SAMPLE_COUNT_16_BIT;
2346          props->fragmentShadingRateWithShaderDepthStencilWrites = false;
2347          props->fragmentShadingRateWithSampleMask = true;
2348          props->fragmentShadingRateWithShaderSampleMask = false;
2349          props->fragmentShadingRateWithConservativeRasterization = true;
2350          props->fragmentShadingRateWithFragmentShaderInterlock = true;
2351          props->fragmentShadingRateWithCustomSampleLocations = true;
2352
2353          /* Fix in DG2_G10_C0 and DG2_G11_B0. Consider any other Sku as having
2354           * the fix.
2355           */
2356          props->fragmentShadingRateStrictMultiplyCombiner =
2357             pdevice->info.platform == INTEL_PLATFORM_DG2_G10 ?
2358             pdevice->info.revision >= 8 :
2359             pdevice->info.platform == INTEL_PLATFORM_DG2_G11 ?
2360             pdevice->info.revision >= 4 : true;
2361
2362          if (pdevice->info.has_coarse_pixel_primitive_and_cb) {
2363             props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
2364             props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
2365             props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
2366          } else {
2367             /* Those must be 0 if attachmentFragmentShadingRate is not
2368              * supported.
2369              */
2370             props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
2371             props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
2372             props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
2373          }
2374          break;
2375       }
2376
2377       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
2378          VkPhysicalDeviceDrmPropertiesEXT *props =
2379             (VkPhysicalDeviceDrmPropertiesEXT *)ext;
2380
2381          props->hasPrimary = pdevice->has_master;
2382          props->primaryMajor = pdevice->master_major;
2383          props->primaryMinor = pdevice->master_minor;
2384
2385          props->hasRender = pdevice->has_local;
2386          props->renderMajor = pdevice->local_major;
2387          props->renderMinor = pdevice->local_minor;
2388
2389          break;
2390       }
2391
2392       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_PROPERTIES_EXT: {
2393          VkPhysicalDeviceExtendedDynamicState3PropertiesEXT *props =
2394             (VkPhysicalDeviceExtendedDynamicState3PropertiesEXT *) ext;
2395          props->dynamicPrimitiveTopologyUnrestricted = true;
2396          break;
2397       }
2398
2399       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
2400          VkPhysicalDeviceExternalMemoryHostPropertiesEXT *props =
2401             (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
2402          /* Userptr needs page aligned memory. */
2403          props->minImportedHostPointerAlignment = 4096;
2404          break;
2405       }
2406
2407       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
2408          VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
2409             (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
2410          /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond)
2411           * Sampling Rules - Legacy Mode", it says the following:
2412           *
2413           *    "Note that the device divides a pixel into a 16x16 array of
2414           *    subpixels, referenced by their upper left corners."
2415           *
2416           * This is the only known reference in the PRMs to the subpixel
2417           * precision of line rasterization and a "16x16 array of subpixels"
2418           * implies 4 subpixel precision bits.  Empirical testing has shown
2419           * that 4 subpixel precision bits applies to all line rasterization
2420           * types.
2421           */
2422          props->lineSubPixelPrecisionBits = 4;
2423          break;
2424       }
2425
2426       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES: {
2427          VkPhysicalDeviceMaintenance4Properties *properties =
2428             (VkPhysicalDeviceMaintenance4Properties *)ext;
2429          properties->maxBufferSize = pdevice->isl_dev.max_buffer_size;
2430          break;
2431       }
2432
2433       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV: {
2434          VkPhysicalDeviceMeshShaderPropertiesNV *props =
2435             (VkPhysicalDeviceMeshShaderPropertiesNV *)ext;
2436
2437          /* Bounded by the maximum representable size in
2438           * 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize.  Same for Task.
2439           */
2440          const uint32_t max_slm_size = 64 * 1024;
2441
2442          /* Bounded by the maximum representable size in
2443           * 3DSTATE_MESH_SHADER_BODY::LocalXMaximum.  Same for Task.
2444           */
2445          const uint32_t max_workgroup_size = 1 << 10;
2446
2447          /* Bounded by the maximum representable count in
2448           * 3DSTATE_MESH_SHADER_BODY::MaximumPrimitiveCount.
2449           */
2450          const uint32_t max_primitives = 1024;
2451
2452          /* TODO(mesh): Multiview. */
2453          const uint32_t max_view_count = 1;
2454
2455          props->maxDrawMeshTasksCount = UINT32_MAX;
2456
2457          /* TODO(mesh): Implement workgroup Y and Z sizes larger than one by
2458           * mapping them to/from the single value that HW provides us
2459           * (currently used for X).
2460           */
2461
2462          props->maxTaskWorkGroupInvocations = max_workgroup_size;
2463          props->maxTaskWorkGroupSize[0] = max_workgroup_size;
2464          props->maxTaskWorkGroupSize[1] = 1;
2465          props->maxTaskWorkGroupSize[2] = 1;
2466          props->maxTaskTotalMemorySize = max_slm_size;
2467          props->maxTaskOutputCount = UINT16_MAX;
2468
2469          props->maxMeshWorkGroupInvocations = max_workgroup_size;
2470          props->maxMeshWorkGroupSize[0] = max_workgroup_size;
2471          props->maxMeshWorkGroupSize[1] = 1;
2472          props->maxMeshWorkGroupSize[2] = 1;
2473          props->maxMeshTotalMemorySize = max_slm_size / max_view_count;
2474          props->maxMeshOutputPrimitives = max_primitives / max_view_count;
2475          props->maxMeshMultiviewViewCount = max_view_count;
2476
2477          /* Depends on what indices can be represented with IndexFormat.  For
2478           * now we always use U32, so bound to the maximum unique vertices we
2479           * need for the maximum primitives.
2480           *
2481           * TODO(mesh): Revisit this if we drop "U32" IndexFormat when adding
2482           * support for others.
2483           */
2484          props->maxMeshOutputVertices = 3 * props->maxMeshOutputPrimitives;
2485
2486
2487          props->meshOutputPerVertexGranularity = 32;
2488          props->meshOutputPerPrimitiveGranularity = 32;
2489
2490          break;
2491       }
2492
2493       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_EXT: {
2494          VkPhysicalDeviceMeshShaderPropertiesEXT *properties =
2495             (VkPhysicalDeviceMeshShaderPropertiesEXT *)ext;
2496
2497          /* Bounded by the maximum representable size in
2498           * 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize.  Same for Task.
2499           */
2500          const uint32_t max_slm_size = 64 * 1024;
2501
2502          /* Bounded by the maximum representable size in
2503           * 3DSTATE_MESH_SHADER_BODY::LocalXMaximum.  Same for Task.
2504           */
2505          const uint32_t max_workgroup_size = 1 << 10;
2506
2507          /* 3DMESH_3D limitation. */
2508          const uint32_t max_threadgroup_count = 1 << 22;
2509
2510          /* 3DMESH_3D limitation. */
2511          const uint32_t max_threadgroup_xyz = 65535;
2512
2513          const uint32_t max_urb_size = 64 * 1024;
2514
2515          properties->maxTaskWorkGroupTotalCount = max_threadgroup_count;
2516          properties->maxTaskWorkGroupCount[0] = max_threadgroup_xyz;
2517          properties->maxTaskWorkGroupCount[1] = max_threadgroup_xyz;
2518          properties->maxTaskWorkGroupCount[2] = max_threadgroup_xyz;
2519
2520          properties->maxTaskWorkGroupInvocations = max_workgroup_size;
2521          properties->maxTaskWorkGroupSize[0] = max_workgroup_size;
2522          properties->maxTaskWorkGroupSize[1] = max_workgroup_size;
2523          properties->maxTaskWorkGroupSize[2] = max_workgroup_size;
2524
2525          /* TUE header with padding */
2526          const uint32_t task_payload_reserved = 32;
2527
2528          properties->maxTaskPayloadSize = max_urb_size - task_payload_reserved;
2529          properties->maxTaskSharedMemorySize = max_slm_size;
2530          properties->maxTaskPayloadAndSharedMemorySize =
2531                properties->maxTaskPayloadSize +
2532                properties->maxTaskSharedMemorySize;
2533
2534          properties->maxMeshWorkGroupTotalCount = max_threadgroup_count;
2535          properties->maxMeshWorkGroupCount[0] = max_threadgroup_xyz;
2536          properties->maxMeshWorkGroupCount[1] = max_threadgroup_xyz;
2537          properties->maxMeshWorkGroupCount[2] = max_threadgroup_xyz;
2538
2539          properties->maxMeshWorkGroupInvocations = max_workgroup_size;
2540          properties->maxMeshWorkGroupSize[0] = max_workgroup_size;
2541          properties->maxMeshWorkGroupSize[1] = max_workgroup_size;
2542          properties->maxMeshWorkGroupSize[2] = max_workgroup_size;
2543
2544          properties->maxMeshSharedMemorySize = max_slm_size;
2545          properties->maxMeshPayloadAndSharedMemorySize =
2546                properties->maxTaskPayloadSize +
2547                properties->maxMeshSharedMemorySize;
2548
2549          /* Unfortunately spec's formula for the max output size doesn't match our hardware
2550           * (because some per-primitive and per-vertex attributes have alignment restrictions),
2551           * so we have to advertise the minimum value mandated by the spec to not overflow it.
2552           */
2553          properties->maxMeshOutputPrimitives = 256;
2554          properties->maxMeshOutputVertices = 256;
2555
2556          /* NumPrim + Primitive Data List */
2557          const uint32_t max_indices_memory =
2558                ALIGN(sizeof(uint32_t) +
2559                      sizeof(uint32_t) * properties->maxMeshOutputVertices, 32);
2560
2561          properties->maxMeshOutputMemorySize = MIN2(max_urb_size - max_indices_memory, 32768);
2562
2563          properties->maxMeshPayloadAndOutputMemorySize =
2564                properties->maxTaskPayloadSize +
2565                properties->maxMeshOutputMemorySize;
2566
2567          properties->maxMeshOutputComponents = 128;
2568
2569          /* RTAIndex is 11-bits wide */
2570          properties->maxMeshOutputLayers = 1 << 11;
2571
2572          properties->maxMeshMultiviewViewCount = 1;
2573
2574          /* Elements in Vertex Data Array must be aligned to 32 bytes (8 dwords). */
2575          properties->meshOutputPerVertexGranularity = 8;
2576          /* Elements in Primitive Data Array must be aligned to 32 bytes (8 dwords). */
2577          properties->meshOutputPerPrimitiveGranularity = 8;
2578
2579          /* SIMD16 */
2580          properties->maxPreferredTaskWorkGroupInvocations = 16;
2581          properties->maxPreferredMeshWorkGroupInvocations = 16;
2582
2583          properties->prefersLocalInvocationVertexOutput = false;
2584          properties->prefersLocalInvocationPrimitiveOutput = false;
2585          properties->prefersCompactVertexOutput = false;
2586          properties->prefersCompactPrimitiveOutput = false;
2587
2588          /* Spec minimum values */
2589          assert(properties->maxTaskWorkGroupTotalCount >= (1U << 22));
2590          assert(properties->maxTaskWorkGroupCount[0] >= 65535);
2591          assert(properties->maxTaskWorkGroupCount[1] >= 65535);
2592          assert(properties->maxTaskWorkGroupCount[2] >= 65535);
2593
2594          assert(properties->maxTaskWorkGroupInvocations >= 128);
2595          assert(properties->maxTaskWorkGroupSize[0] >= 128);
2596          assert(properties->maxTaskWorkGroupSize[1] >= 128);
2597          assert(properties->maxTaskWorkGroupSize[2] >= 128);
2598
2599          assert(properties->maxTaskPayloadSize >= 16384);
2600          assert(properties->maxTaskSharedMemorySize >= 32768);
2601          assert(properties->maxTaskPayloadAndSharedMemorySize >= 32768);
2602
2603
2604          assert(properties->maxMeshWorkGroupTotalCount >= (1U << 22));
2605          assert(properties->maxMeshWorkGroupCount[0] >= 65535);
2606          assert(properties->maxMeshWorkGroupCount[1] >= 65535);
2607          assert(properties->maxMeshWorkGroupCount[2] >= 65535);
2608
2609          assert(properties->maxMeshWorkGroupInvocations >= 128);
2610          assert(properties->maxMeshWorkGroupSize[0] >= 128);
2611          assert(properties->maxMeshWorkGroupSize[1] >= 128);
2612          assert(properties->maxMeshWorkGroupSize[2] >= 128);
2613
2614          assert(properties->maxMeshSharedMemorySize >= 28672);
2615          assert(properties->maxMeshPayloadAndSharedMemorySize >= 28672);
2616          assert(properties->maxMeshOutputMemorySize >= 32768);
2617          assert(properties->maxMeshPayloadAndOutputMemorySize >= 48128);
2618
2619          assert(properties->maxMeshOutputComponents >= 128);
2620
2621          assert(properties->maxMeshOutputVertices >= 256);
2622          assert(properties->maxMeshOutputPrimitives >= 256);
2623          assert(properties->maxMeshOutputLayers >= 8);
2624          assert(properties->maxMeshMultiviewViewCount >= 1);
2625
2626          break;
2627       }
2628
2629       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
2630          VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
2631             (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
2632          properties->pciDomain = pdevice->info.pci_domain;
2633          properties->pciBus = pdevice->info.pci_bus;
2634          properties->pciDevice = pdevice->info.pci_dev;
2635          properties->pciFunction = pdevice->info.pci_func;
2636          break;
2637       }
2638
2639       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: {
2640          VkPhysicalDevicePerformanceQueryPropertiesKHR *properties =
2641             (VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext;
2642          /* We could support this by spawning a shader to do the equation
2643           * normalization.
2644           */
2645          properties->allowCommandBufferQueryCopies = false;
2646          break;
2647       }
2648
2649 #pragma GCC diagnostic push
2650 #pragma GCC diagnostic ignored "-Wswitch"
2651       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENTATION_PROPERTIES_ANDROID: {
2652          VkPhysicalDevicePresentationPropertiesANDROID *props =
2653             (VkPhysicalDevicePresentationPropertiesANDROID *)ext;
2654          props->sharedImage = VK_FALSE;
2655          break;
2656       }
2657 #pragma GCC diagnostic pop
2658
2659       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
2660          VkPhysicalDeviceProvokingVertexPropertiesEXT *properties =
2661             (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
2662          properties->provokingVertexModePerPipeline = true;
2663          properties->transformFeedbackPreservesTriangleFanProvokingVertex = false;
2664          break;
2665       }
2666
2667       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
2668          VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
2669             (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
2670          properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
2671          break;
2672       }
2673
2674       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR: {
2675          VkPhysicalDeviceRayTracingPipelinePropertiesKHR *props = (void *)ext;
2676          /* TODO */
2677          props->shaderGroupHandleSize = 32;
2678          props->maxRayRecursionDepth = 31;
2679          /* MemRay::hitGroupSRStride is 16 bits */
2680          props->maxShaderGroupStride = UINT16_MAX;
2681          /* MemRay::hitGroupSRBasePtr requires 16B alignment */
2682          props->shaderGroupBaseAlignment = 16;
2683          props->shaderGroupHandleAlignment = 16;
2684          props->shaderGroupHandleCaptureReplaySize = 32;
2685          props->maxRayDispatchInvocationCount = 1U << 30; /* required min limit */
2686          props->maxRayHitAttributeSize = BRW_RT_SIZEOF_HIT_ATTRIB_DATA;
2687          break;
2688       }
2689
2690       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
2691          VkPhysicalDeviceRobustness2PropertiesEXT *properties = (void *)ext;
2692          properties->robustStorageBufferAccessSizeAlignment =
2693             ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
2694          properties->robustUniformBufferAccessSizeAlignment =
2695             ANV_UBO_ALIGNMENT;
2696          break;
2697       }
2698
2699       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
2700          VkPhysicalDeviceSampleLocationsPropertiesEXT *props =
2701             (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
2702
2703          props->sampleLocationSampleCounts =
2704             isl_device_get_sample_counts(&pdevice->isl_dev);
2705
2706          /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
2707          props->maxSampleLocationGridSize.width = 1;
2708          props->maxSampleLocationGridSize.height = 1;
2709
2710          props->sampleLocationCoordinateRange[0] = 0;
2711          props->sampleLocationCoordinateRange[1] = 0.9375;
2712          props->sampleLocationSubPixelBits = 4;
2713
2714          props->variableSampleLocations = true;
2715          break;
2716       }
2717
2718       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT: {
2719          VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *props =
2720             (VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext;
2721          STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
2722                        sizeof(props->shaderModuleIdentifierAlgorithmUUID));
2723          memcpy(props->shaderModuleIdentifierAlgorithmUUID,
2724                 vk_shaderModuleIdentifierAlgorithmUUID,
2725                 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
2726          break;
2727       }
2728
2729       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
2730          VkPhysicalDeviceTransformFeedbackPropertiesEXT *props =
2731             (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
2732
2733          props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
2734          props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
2735          props->maxTransformFeedbackBufferSize = (1ull << 32);
2736          props->maxTransformFeedbackStreamDataSize = 128 * 4;
2737          props->maxTransformFeedbackBufferDataSize = 128 * 4;
2738          props->maxTransformFeedbackBufferDataStride = 2048;
2739          props->transformFeedbackQueries = true;
2740          props->transformFeedbackStreamsLinesTriangles = false;
2741          props->transformFeedbackRasterizationStreamSelect = false;
2742          props->transformFeedbackDraw = true;
2743          break;
2744       }
2745
2746       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
2747          VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
2748             (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
2749          /* We have to restrict this a bit for multiview */
2750          props->maxVertexAttribDivisor = UINT32_MAX / 16;
2751          break;
2752       }
2753
2754       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
2755          VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
2756          props->maxMultiDrawCount = 2048;
2757          break;
2758       }
2759
2760       default:
2761          anv_debug_ignored_stype(ext->sType);
2762          break;
2763       }
2764    }
2765 }
2766
2767 static const VkQueueFamilyProperties
2768 anv_queue_family_properties_template = {
2769    .timestampValidBits = 36, /* XXX: Real value here */
2770    .minImageTransferGranularity = { 1, 1, 1 },
2771 };
2772
2773 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2774     VkPhysicalDevice                            physicalDevice,
2775     uint32_t*                                   pQueueFamilyPropertyCount,
2776     VkQueueFamilyProperties2*                   pQueueFamilyProperties)
2777 {
2778    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2779    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2780                           pQueueFamilyProperties, pQueueFamilyPropertyCount);
2781
2782    for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2783       struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2784       vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2785          p->queueFamilyProperties = anv_queue_family_properties_template;
2786          p->queueFamilyProperties.queueFlags = queue_family->queueFlags;
2787          p->queueFamilyProperties.queueCount = queue_family->queueCount;
2788
2789          vk_foreach_struct(ext, p->pNext) {
2790             switch (ext->sType) {
2791             case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2792                VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2793                   (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2794
2795                /* Deliberately sorted low to high */
2796                VkQueueGlobalPriorityKHR all_priorities[] = {
2797                   VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2798                   VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2799                   VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2800                   VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2801                };
2802
2803                uint32_t count = 0;
2804                for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2805                   if (all_priorities[i] > pdevice->max_context_priority)
2806                      break;
2807
2808                   properties->priorities[count++] = all_priorities[i];
2809                }
2810                properties->priorityCount = count;
2811                break;
2812             }
2813
2814             default:
2815                anv_debug_ignored_stype(ext->sType);
2816             }
2817          }
2818       }
2819    }
2820 }
2821
2822 void anv_GetPhysicalDeviceMemoryProperties(
2823     VkPhysicalDevice                            physicalDevice,
2824     VkPhysicalDeviceMemoryProperties*           pMemoryProperties)
2825 {
2826    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2827
2828    pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2829    for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2830       pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2831          .propertyFlags = physical_device->memory.types[i].propertyFlags,
2832          .heapIndex     = physical_device->memory.types[i].heapIndex,
2833       };
2834    }
2835
2836    pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2837    for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2838       pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2839          .size    = physical_device->memory.heaps[i].size,
2840          .flags   = physical_device->memory.heaps[i].flags,
2841       };
2842    }
2843 }
2844
2845 static void
2846 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2847                       VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2848 {
2849    ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2850
2851    if (!device->vk.supported_extensions.EXT_memory_budget)
2852       return;
2853
2854    anv_update_meminfo(device, device->local_fd);
2855
2856    VkDeviceSize total_sys_heaps_size = 0, total_vram_heaps_size = 0;
2857    for (size_t i = 0; i < device->memory.heap_count; i++) {
2858       if (device->memory.heaps[i].is_local_mem) {
2859          total_vram_heaps_size += device->memory.heaps[i].size;
2860       } else {
2861          total_sys_heaps_size += device->memory.heaps[i].size;
2862       }
2863    }
2864
2865    for (size_t i = 0; i < device->memory.heap_count; i++) {
2866       VkDeviceSize heap_size = device->memory.heaps[i].size;
2867       VkDeviceSize heap_used = device->memory.heaps[i].used;
2868       VkDeviceSize heap_budget, total_heaps_size;
2869       uint64_t mem_available = 0;
2870
2871       if (device->memory.heaps[i].is_local_mem) {
2872          total_heaps_size = total_vram_heaps_size;
2873          if (device->vram_non_mappable.size > 0 && i == 0) {
2874             mem_available = device->vram_non_mappable.available;
2875          } else {
2876             mem_available = device->vram_mappable.available;
2877          }
2878       } else {
2879          total_heaps_size = total_sys_heaps_size;
2880          mem_available = device->sys.available;
2881       }
2882
2883       double heap_proportion = (double) heap_size / total_heaps_size;
2884       VkDeviceSize available_prop = mem_available * heap_proportion;
2885
2886       /*
2887        * Let's not incite the app to starve the system: report at most 90% of
2888        * the available heap memory.
2889        */
2890       uint64_t heap_available = available_prop * 9 / 10;
2891       heap_budget = MIN2(heap_size, heap_used + heap_available);
2892
2893       /*
2894        * Round down to the nearest MB
2895        */
2896       heap_budget &= ~((1ull << 20) - 1);
2897
2898       /*
2899        * The heapBudget value must be non-zero for array elements less than
2900        * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2901        * value must be less than or equal to VkMemoryHeap::size for each heap.
2902        */
2903       assert(0 < heap_budget && heap_budget <= heap_size);
2904
2905       memoryBudget->heapUsage[i] = heap_used;
2906       memoryBudget->heapBudget[i] = heap_budget;
2907    }
2908
2909    /* The heapBudget and heapUsage values must be zero for array elements
2910     * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2911     */
2912    for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2913       memoryBudget->heapBudget[i] = 0;
2914       memoryBudget->heapUsage[i] = 0;
2915    }
2916 }
2917
2918 void anv_GetPhysicalDeviceMemoryProperties2(
2919     VkPhysicalDevice                            physicalDevice,
2920     VkPhysicalDeviceMemoryProperties2*          pMemoryProperties)
2921 {
2922    anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2923                                          &pMemoryProperties->memoryProperties);
2924
2925    vk_foreach_struct(ext, pMemoryProperties->pNext) {
2926       switch (ext->sType) {
2927       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2928          anv_get_memory_budget(physicalDevice, (void*)ext);
2929          break;
2930       default:
2931          anv_debug_ignored_stype(ext->sType);
2932          break;
2933       }
2934    }
2935 }
2936
2937 void
2938 anv_GetDeviceGroupPeerMemoryFeatures(
2939     VkDevice                                    device,
2940     uint32_t                                    heapIndex,
2941     uint32_t                                    localDeviceIndex,
2942     uint32_t                                    remoteDeviceIndex,
2943     VkPeerMemoryFeatureFlags*                   pPeerMemoryFeatures)
2944 {
2945    assert(localDeviceIndex == 0 && remoteDeviceIndex == 0);
2946    *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
2947                           VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
2948                           VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
2949                           VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
2950 }
2951
2952 PFN_vkVoidFunction anv_GetInstanceProcAddr(
2953     VkInstance                                  _instance,
2954     const char*                                 pName)
2955 {
2956    ANV_FROM_HANDLE(anv_instance, instance, _instance);
2957    return vk_instance_get_proc_addr(&instance->vk,
2958                                     &anv_instance_entrypoints,
2959                                     pName);
2960 }
2961
2962 /* With version 1+ of the loader interface the ICD should expose
2963  * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
2964  */
2965 PUBLIC
2966 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2967     VkInstance                                  instance,
2968     const char*                                 pName);
2969
2970 PUBLIC
2971 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2972     VkInstance                                  instance,
2973     const char*                                 pName)
2974 {
2975    return anv_GetInstanceProcAddr(instance, pName);
2976 }
2977
2978 /* With version 4+ of the loader interface the ICD should expose
2979  * vk_icdGetPhysicalDeviceProcAddr()
2980  */
2981 PUBLIC
2982 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
2983     VkInstance  _instance,
2984     const char* pName);
2985
2986 PFN_vkVoidFunction vk_icdGetPhysicalDeviceProcAddr(
2987     VkInstance  _instance,
2988     const char* pName)
2989 {
2990    ANV_FROM_HANDLE(anv_instance, instance, _instance);
2991    return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
2992 }
2993
2994 static struct anv_state
2995 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
2996 {
2997    struct anv_state state;
2998
2999    state = anv_state_pool_alloc(pool, size, align);
3000    memcpy(state.map, p, size);
3001
3002    return state;
3003 }
3004
3005 static void
3006 anv_device_init_border_colors(struct anv_device *device)
3007 {
3008    if (device->info->platform == INTEL_PLATFORM_HSW) {
3009       static const struct hsw_border_color border_colors[] = {
3010          [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
3011          [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
3012          [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
3013          [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
3014          [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
3015          [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
3016       };
3017
3018       device->border_colors =
3019          anv_state_pool_emit_data(&device->dynamic_state_pool,
3020                                   sizeof(border_colors), 512, border_colors);
3021    } else {
3022       static const struct gfx8_border_color border_colors[] = {
3023          [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
3024          [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
3025          [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
3026          [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
3027          [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
3028          [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
3029       };
3030
3031       device->border_colors =
3032          anv_state_pool_emit_data(&device->dynamic_state_pool,
3033                                   sizeof(border_colors), 64, border_colors);
3034    }
3035 }
3036
3037 static VkResult
3038 anv_device_init_trivial_batch(struct anv_device *device)
3039 {
3040    VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
3041                                          ANV_BO_ALLOC_MAPPED,
3042                                          0 /* explicit_address */,
3043                                          &device->trivial_batch_bo);
3044    if (result != VK_SUCCESS)
3045       return result;
3046
3047    struct anv_batch batch = {
3048       .start = device->trivial_batch_bo->map,
3049       .next = device->trivial_batch_bo->map,
3050       .end = device->trivial_batch_bo->map + 4096,
3051    };
3052
3053    anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
3054    anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
3055
3056 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3057    if (device->physical->memory.need_clflush)
3058       intel_clflush_range(batch.start, batch.next - batch.start);
3059 #endif
3060
3061    return VK_SUCCESS;
3062 }
3063
3064 static bool
3065 get_bo_from_pool(struct intel_batch_decode_bo *ret,
3066                  struct anv_block_pool *pool,
3067                  uint64_t address)
3068 {
3069    anv_block_pool_foreach_bo(bo, pool) {
3070       uint64_t bo_address = intel_48b_address(bo->offset);
3071       if (address >= bo_address && address < (bo_address + bo->size)) {
3072          *ret = (struct intel_batch_decode_bo) {
3073             .addr = bo_address,
3074             .size = bo->size,
3075             .map = bo->map,
3076          };
3077          return true;
3078       }
3079    }
3080    return false;
3081 }
3082
3083 /* Finding a buffer for batch decoding */
3084 static struct intel_batch_decode_bo
3085 decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
3086 {
3087    struct anv_device *device = v_batch;
3088    struct intel_batch_decode_bo ret_bo = {};
3089
3090    assert(ppgtt);
3091
3092    if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
3093       return ret_bo;
3094    if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
3095       return ret_bo;
3096    if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
3097       return ret_bo;
3098    if (get_bo_from_pool(&ret_bo, &device->scratch_surface_state_pool.block_pool, address))
3099       return ret_bo;
3100    if (get_bo_from_pool(&ret_bo, &device->bindless_surface_state_pool.block_pool, address))
3101       return ret_bo;
3102    if (get_bo_from_pool(&ret_bo, &device->internal_surface_state_pool.block_pool, address))
3103       return ret_bo;
3104
3105    if (!device->cmd_buffer_being_decoded)
3106       return (struct intel_batch_decode_bo) { };
3107
3108    struct anv_batch_bo **bo;
3109
3110    u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) {
3111       /* The decoder zeroes out the top 16 bits, so we need to as well */
3112       uint64_t bo_address = (*bo)->bo->offset & (~0ull >> 16);
3113
3114       if (address >= bo_address && address < bo_address + (*bo)->bo->size) {
3115          return (struct intel_batch_decode_bo) {
3116             .addr = bo_address,
3117             .size = (*bo)->bo->size,
3118             .map = (*bo)->bo->map,
3119          };
3120       }
3121    }
3122
3123    return (struct intel_batch_decode_bo) { };
3124 }
3125
3126 struct intel_aux_map_buffer {
3127    struct intel_buffer base;
3128    struct anv_state state;
3129 };
3130
3131 static struct intel_buffer *
3132 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
3133 {
3134    struct intel_aux_map_buffer *buf = malloc(sizeof(struct intel_aux_map_buffer));
3135    if (!buf)
3136       return NULL;
3137
3138    struct anv_device *device = (struct anv_device*)driver_ctx;
3139    assert(device->physical->supports_48bit_addresses);
3140
3141    struct anv_state_pool *pool = &device->dynamic_state_pool;
3142    buf->state = anv_state_pool_alloc(pool, size, size);
3143
3144    buf->base.gpu = pool->block_pool.bo->offset + buf->state.offset;
3145    buf->base.gpu_end = buf->base.gpu + buf->state.alloc_size;
3146    buf->base.map = buf->state.map;
3147    buf->base.driver_bo = &buf->state;
3148    return &buf->base;
3149 }
3150
3151 static void
3152 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
3153 {
3154    struct intel_aux_map_buffer *buf = (struct intel_aux_map_buffer*)buffer;
3155    struct anv_device *device = (struct anv_device*)driver_ctx;
3156    struct anv_state_pool *pool = &device->dynamic_state_pool;
3157    anv_state_pool_free(pool, buf->state);
3158    free(buf);
3159 }
3160
3161 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
3162    .alloc = intel_aux_map_buffer_alloc,
3163    .free = intel_aux_map_buffer_free,
3164 };
3165
3166 static VkResult anv_device_check_status(struct vk_device *vk_device);
3167
3168 static VkResult
3169 anv_device_setup_context(struct anv_device *device,
3170                          const VkDeviceCreateInfo *pCreateInfo,
3171                          const uint32_t num_queues)
3172 {
3173    struct anv_physical_device *physical_device = device->physical;
3174    VkResult result = VK_SUCCESS;
3175
3176    if (device->physical->engine_info) {
3177       /* The kernel API supports at most 64 engines */
3178       assert(num_queues <= 64);
3179       enum intel_engine_class engine_classes[64];
3180       int engine_count = 0;
3181       for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3182          const VkDeviceQueueCreateInfo *queueCreateInfo =
3183             &pCreateInfo->pQueueCreateInfos[i];
3184
3185          assert(queueCreateInfo->queueFamilyIndex <
3186                 physical_device->queue.family_count);
3187          struct anv_queue_family *queue_family =
3188             &physical_device->queue.families[queueCreateInfo->queueFamilyIndex];
3189
3190          for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++)
3191             engine_classes[engine_count++] = queue_family->engine_class;
3192       }
3193       if (!intel_gem_create_context_engines(device->fd,
3194                                             physical_device->engine_info,
3195                                             engine_count, engine_classes,
3196                                             (uint32_t *)&device->context_id))
3197          result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
3198                             "kernel context creation failed");
3199    } else {
3200       assert(num_queues == 1);
3201       if (!intel_gem_create_context(device->fd, &device->context_id))
3202          result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3203    }
3204
3205    if (result != VK_SUCCESS)
3206       return result;
3207
3208    /* Here we tell the kernel not to attempt to recover our context but
3209     * immediately (on the next batchbuffer submission) report that the
3210     * context is lost, and we will do the recovery ourselves.  In the case
3211     * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting
3212     * the client clean up the pieces.
3213     */
3214    anv_gem_set_context_param(device->fd, device->context_id,
3215                              I915_CONTEXT_PARAM_RECOVERABLE, false);
3216
3217    /* Check if client specified queue priority. */
3218    const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority =
3219       vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
3220                            DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
3221
3222    VkQueueGlobalPriorityKHR priority =
3223       queue_priority ? queue_priority->globalPriority :
3224          VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
3225
3226    /* As per spec, the driver implementation may deny requests to acquire
3227     * a priority above the default priority (MEDIUM) if the caller does not
3228     * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR
3229     * is returned.
3230     */
3231    if (physical_device->max_context_priority >= VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
3232       int err = anv_gem_set_context_param(device->fd, device->context_id,
3233                                           I915_CONTEXT_PARAM_PRIORITY,
3234                                           priority);
3235       if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
3236          result = vk_error(device, VK_ERROR_NOT_PERMITTED_KHR);
3237          goto fail_context;
3238       }
3239    }
3240
3241    return result;
3242
3243 fail_context:
3244    intel_gem_destroy_context(device->fd, device->context_id);
3245    return result;
3246 }
3247
3248 VkResult anv_CreateDevice(
3249     VkPhysicalDevice                            physicalDevice,
3250     const VkDeviceCreateInfo*                   pCreateInfo,
3251     const VkAllocationCallbacks*                pAllocator,
3252     VkDevice*                                   pDevice)
3253 {
3254    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
3255    VkResult result;
3256    struct anv_device *device;
3257
3258    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
3259
3260    /* Check enabled features */
3261    bool robust_buffer_access = false;
3262    if (pCreateInfo->pEnabledFeatures) {
3263       if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
3264          robust_buffer_access = true;
3265    }
3266
3267    vk_foreach_struct_const(ext, pCreateInfo->pNext) {
3268       switch (ext->sType) {
3269       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
3270          const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
3271          if (features->features.robustBufferAccess)
3272             robust_buffer_access = true;
3273          break;
3274       }
3275
3276       default:
3277          /* Don't warn */
3278          break;
3279       }
3280    }
3281
3282    /* Check requested queues and fail if we are requested to create any
3283     * queues with flags we don't support.
3284     */
3285    assert(pCreateInfo->queueCreateInfoCount > 0);
3286    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3287       if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
3288          return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
3289    }
3290
3291    device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
3292                        sizeof(*device), 8,
3293                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3294    if (!device)
3295       return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
3296
3297    struct vk_device_dispatch_table dispatch_table;
3298
3299    bool override_initial_entrypoints = true;
3300    if (physical_device->instance->vk.app_info.app_name &&
3301        !strcmp(physical_device->instance->vk.app_info.app_name, "HITMAN3.exe")) {
3302       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &hitman3_device_entrypoints, true);
3303       override_initial_entrypoints = false;
3304    }
3305    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3306       anv_genX(&physical_device->info, device_entrypoints),
3307       override_initial_entrypoints);
3308    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3309       &anv_device_entrypoints, false);
3310    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3311       &wsi_device_entrypoints, false);
3312
3313    result = vk_device_init(&device->vk, &physical_device->vk,
3314                            &dispatch_table, pCreateInfo, pAllocator);
3315    if (result != VK_SUCCESS)
3316       goto fail_alloc;
3317
3318    if (INTEL_DEBUG(DEBUG_BATCH)) {
3319       const unsigned decode_flags =
3320          INTEL_BATCH_DECODE_FULL |
3321          (INTEL_DEBUG(DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) |
3322          INTEL_BATCH_DECODE_OFFSETS |
3323          INTEL_BATCH_DECODE_FLOATS;
3324
3325       intel_batch_decode_ctx_init(&device->decoder_ctx,
3326                                   &physical_device->compiler->isa,
3327                                   &physical_device->info,
3328                                   stderr, decode_flags, NULL,
3329                                   decode_get_bo, NULL, device);
3330
3331       device->decoder_ctx.dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
3332       device->decoder_ctx.surface_base = INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS;
3333       device->decoder_ctx.instruction_base =
3334          INSTRUCTION_STATE_POOL_MIN_ADDRESS;
3335    }
3336
3337    anv_device_set_physical(device, physical_device);
3338
3339    /* XXX(chadv): Can we dup() physicalDevice->fd here? */
3340    device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
3341    if (device->fd == -1) {
3342       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3343       goto fail_device;
3344    }
3345
3346    device->vk.command_buffer_ops = &anv_cmd_buffer_ops;
3347    device->vk.check_status = anv_device_check_status;
3348    device->vk.create_sync_for_memory = anv_create_sync_for_memory;
3349    vk_device_set_drm_fd(&device->vk, device->fd);
3350
3351    uint32_t num_queues = 0;
3352    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
3353       num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
3354
3355    result = anv_device_setup_context(device, pCreateInfo, num_queues);
3356    if (result != VK_SUCCESS)
3357       goto fail_fd;
3358
3359    device->queues =
3360       vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
3361                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3362    if (device->queues == NULL) {
3363       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3364       goto fail_context_id;
3365    }
3366
3367    device->queue_count = 0;
3368    for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3369       const VkDeviceQueueCreateInfo *queueCreateInfo =
3370          &pCreateInfo->pQueueCreateInfos[i];
3371
3372       for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
3373          /* When using legacy contexts, we use I915_EXEC_RENDER but, with
3374           * engine-based contexts, the bottom 6 bits of exec_flags are used
3375           * for the engine ID.
3376           */
3377          uint32_t exec_flags = device->physical->engine_info ?
3378                                device->queue_count : I915_EXEC_RENDER;
3379
3380          result = anv_queue_init(device, &device->queues[device->queue_count],
3381                                  exec_flags, queueCreateInfo, j);
3382          if (result != VK_SUCCESS)
3383             goto fail_queues;
3384
3385          device->queue_count++;
3386       }
3387    }
3388
3389    if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
3390       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3391       goto fail_queues;
3392    }
3393
3394    /* keep the page with address zero out of the allocator */
3395    util_vma_heap_init(&device->vma_lo,
3396                       LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
3397
3398    util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
3399                       CLIENT_VISIBLE_HEAP_SIZE);
3400
3401    /* Leave the last 4GiB out of the high vma range, so that no state
3402     * base address + size can overflow 48 bits. For more information see
3403     * the comment about Wa32bitGeneralStateOffset in anv_allocator.c
3404     */
3405    util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
3406                       physical_device->gtt_size - (1ull << 32) -
3407                       HIGH_HEAP_MIN_ADDRESS);
3408
3409    list_inithead(&device->memory_objects);
3410
3411    device->robust_buffer_access = robust_buffer_access;
3412
3413    if (pthread_mutex_init(&device->mutex, NULL) != 0) {
3414       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3415       goto fail_vmas;
3416    }
3417
3418    pthread_condattr_t condattr;
3419    if (pthread_condattr_init(&condattr) != 0) {
3420       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3421       goto fail_mutex;
3422    }
3423    if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
3424       pthread_condattr_destroy(&condattr);
3425       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3426       goto fail_mutex;
3427    }
3428    if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
3429       pthread_condattr_destroy(&condattr);
3430       result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3431       goto fail_mutex;
3432    }
3433    pthread_condattr_destroy(&condattr);
3434
3435    result = anv_bo_cache_init(&device->bo_cache, device);
3436    if (result != VK_SUCCESS)
3437       goto fail_queue_cond;
3438
3439    anv_bo_pool_init(&device->batch_bo_pool, device, "batch");
3440
3441    /* Because scratch is also relative to General State Base Address, we leave
3442     * the base address 0 and start the pool memory at an offset.  This way we
3443     * get the correct offsets in the anv_states that get allocated from it.
3444     */
3445    result = anv_state_pool_init(&device->general_state_pool, device,
3446                                 "general pool",
3447                                 0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
3448    if (result != VK_SUCCESS)
3449       goto fail_batch_bo_pool;
3450
3451    result = anv_state_pool_init(&device->dynamic_state_pool, device,
3452                                 "dynamic pool",
3453                                 DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
3454    if (result != VK_SUCCESS)
3455       goto fail_general_state_pool;
3456
3457    /* The border color pointer is limited to 24 bits, so we need to make
3458     * sure that any such color used at any point in the program doesn't
3459     * exceed that limit.
3460     * We achieve that by reserving all the custom border colors we support
3461     * right off the bat, so they are close to the base address.
3462     */
3463    anv_state_reserved_pool_init(&device->custom_border_colors,
3464                                 &device->dynamic_state_pool,
3465                                 MAX_CUSTOM_BORDER_COLORS,
3466                                 sizeof(struct gfx8_border_color), 64);
3467
3468    result = anv_state_pool_init(&device->instruction_state_pool, device,
3469                                 "instruction pool",
3470                                 INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
3471    if (result != VK_SUCCESS)
3472       goto fail_dynamic_state_pool;
3473
3474    if (device->info->verx10 >= 125) {
3475       /* Put the scratch surface states at the beginning of the internal
3476        * surface state pool.
3477        */
3478       result = anv_state_pool_init(&device->scratch_surface_state_pool, device,
3479                                    "scratch surface state pool",
3480                                    SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
3481       if (result != VK_SUCCESS)
3482          goto fail_instruction_state_pool;
3483
3484       result = anv_state_pool_init(&device->internal_surface_state_pool, device,
3485                                    "internal surface state pool",
3486                                    INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS,
3487                                    SCRATCH_SURFACE_STATE_POOL_SIZE, 4096);
3488    } else {
3489       result = anv_state_pool_init(&device->internal_surface_state_pool, device,
3490                                    "internal surface state pool",
3491                                    INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
3492    }
3493    if (result != VK_SUCCESS)
3494       goto fail_scratch_surface_state_pool;
3495
3496    result = anv_state_pool_init(&device->bindless_surface_state_pool, device,
3497                                 "bindless surface state pool",
3498                                 BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
3499    if (result != VK_SUCCESS)
3500       goto fail_internal_surface_state_pool;
3501
3502    if (device->info->verx10 >= 125) {
3503       /* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to give the binding
3504        * table its own base address separately from surface state base.
3505        */
3506       result = anv_state_pool_init(&device->binding_table_pool, device,
3507                                    "binding table pool",
3508                                    BINDING_TABLE_POOL_MIN_ADDRESS, 0,
3509                                    BINDING_TABLE_POOL_BLOCK_SIZE);
3510    } else {
3511       int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
3512                                (int64_t)INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS;
3513       assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
3514       result = anv_state_pool_init(&device->binding_table_pool, device,
3515                                    "binding table pool",
3516                                    INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS,
3517                                    bt_pool_offset,
3518                                    BINDING_TABLE_POOL_BLOCK_SIZE);
3519    }
3520    if (result != VK_SUCCESS)
3521       goto fail_bindless_surface_state_pool;
3522
3523    if (device->info->has_aux_map) {
3524       device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
3525                                                &physical_device->info);
3526       if (!device->aux_map_ctx)
3527          goto fail_binding_table_pool;
3528    }
3529
3530    result = anv_device_alloc_bo(device, "workaround", 4096,
3531                                 ANV_BO_ALLOC_CAPTURE |
3532                                 ANV_BO_ALLOC_MAPPED |
3533                                 (device->info->has_local_mem ?
3534                                  ANV_BO_ALLOC_WRITE_COMBINE : 0),
3535                                 0 /* explicit_address */,
3536                                 &device->workaround_bo);
3537    if (result != VK_SUCCESS)
3538       goto fail_surface_aux_map_pool;
3539
3540    device->workaround_address = (struct anv_address) {
3541       .bo = device->workaround_bo,
3542       .offset = align_u32(
3543          intel_debug_write_identifiers(device->workaround_bo->map,
3544                                        device->workaround_bo->size,
3545                                        "Anv") + 8, 8),
3546    };
3547
3548    device->rt_uuid_addr = anv_address_add(device->workaround_address, 8);
3549    memcpy(device->rt_uuid_addr.bo->map + device->rt_uuid_addr.offset,
3550           physical_device->rt_uuid,
3551           sizeof(physical_device->rt_uuid));
3552
3553    device->debug_frame_desc =
3554       intel_debug_get_identifier_block(device->workaround_bo->map,
3555                                        device->workaround_bo->size,
3556                                        INTEL_DEBUG_BLOCK_TYPE_FRAME);
3557
3558    if (device->vk.enabled_extensions.KHR_ray_query) {
3559       uint32_t ray_queries_size =
3560          align_u32(brw_rt_ray_queries_hw_stacks_size(device->info), 4096);
3561
3562       result = anv_device_alloc_bo(device, "ray queries",
3563                                    ray_queries_size,
3564                                    0,
3565                                    0 /* explicit_address */,
3566                                    &device->ray_query_bo);
3567       if (result != VK_SUCCESS)
3568          goto fail_workaround_bo;
3569    }
3570
3571    result = anv_device_init_trivial_batch(device);
3572    if (result != VK_SUCCESS)
3573       goto fail_ray_query_bo;
3574
3575    /* Emit the CPS states before running the initialization batch as those
3576     * structures are referenced.
3577     */
3578    if (device->info->ver >= 12) {
3579       uint32_t n_cps_states = 3 * 3; /* All combinaisons of X by Y CP sizes (1, 2, 4) */
3580
3581       if (device->info->has_coarse_pixel_primitive_and_cb)
3582          n_cps_states *= 5 * 5; /* 5 combiners by 2 operators */
3583
3584       n_cps_states += 1; /* Disable CPS */
3585
3586        /* Each of the combinaison must be replicated on all viewports */
3587       n_cps_states *= MAX_VIEWPORTS;
3588
3589       device->cps_states =
3590          anv_state_pool_alloc(&device->dynamic_state_pool,
3591                               n_cps_states * CPS_STATE_length(device->info) * 4,
3592                               32);
3593       if (device->cps_states.map == NULL)
3594          goto fail_trivial_batch;
3595
3596       anv_genX(device->info, init_cps_device_state)(device);
3597    }
3598
3599    /* Allocate a null surface state at surface state offset 0.  This makes
3600     * NULL descriptor handling trivial because we can just memset structures
3601     * to zero and they have a valid descriptor.
3602     */
3603    device->null_surface_state =
3604       anv_state_pool_alloc(device->info->verx10 >= 125 ?
3605                            &device->scratch_surface_state_pool :
3606                            &device->internal_surface_state_pool,
3607                            device->isl_dev.ss.size,
3608                            device->isl_dev.ss.align);
3609    isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
3610                        .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
3611    assert(device->null_surface_state.offset == 0);
3612
3613    anv_scratch_pool_init(device, &device->scratch_pool);
3614
3615    /* TODO(RT): Do we want some sort of data structure for this? */
3616    memset(device->rt_scratch_bos, 0, sizeof(device->rt_scratch_bos));
3617
3618    if (ANV_SUPPORT_RT && device->info->has_ray_tracing) {
3619       /* The docs say to always allocate 128KB per DSS */
3620       const uint32_t btd_fifo_bo_size =
3621          128 * 1024 * intel_device_info_dual_subslice_id_bound(device->info);
3622       result = anv_device_alloc_bo(device,
3623                                    "rt-btd-fifo",
3624                                    btd_fifo_bo_size,
3625                                    0 /* alloc_flags */,
3626                                    0 /* explicit_address */,
3627                                    &device->btd_fifo_bo);
3628       if (result != VK_SUCCESS)
3629          goto fail_trivial_batch_bo_and_scratch_pool;
3630    }
3631
3632    result = anv_genX(device->info, init_device_state)(device);
3633    if (result != VK_SUCCESS)
3634       goto fail_btd_fifo_bo;
3635
3636    struct vk_pipeline_cache_create_info pcc_info = { };
3637    device->default_pipeline_cache =
3638       vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
3639    if (!device->default_pipeline_cache) {
3640       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3641       goto fail_btd_fifo_bo;
3642    }
3643
3644    /* Internal shaders need their own pipeline cache because, unlike the rest
3645     * of ANV, it won't work at all without the cache. It depends on it for
3646     * shaders to remain resident while it runs. Therefore, we need a special
3647     * cache just for BLORP/RT that's forced to always be enabled.
3648     */
3649    pcc_info.force_enable = true;
3650    device->internal_cache =
3651       vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
3652    if (device->internal_cache == NULL) {
3653       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3654       goto fail_default_pipeline_cache;
3655    }
3656
3657    /* The device (currently is ICL/TGL) does not have float64 support. */
3658    if (!device->info->has_64bit_float &&
3659       device->physical->instance->fp64_workaround_enabled)
3660       anv_load_fp64_shader(device);
3661
3662    result = anv_device_init_rt_shaders(device);
3663    if (result != VK_SUCCESS) {
3664       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3665       goto fail_internal_cache;
3666    }
3667
3668    anv_device_init_blorp(device);
3669
3670    anv_device_init_border_colors(device);
3671
3672    anv_device_perf_init(device);
3673
3674    anv_device_utrace_init(device);
3675
3676    *pDevice = anv_device_to_handle(device);
3677
3678    return VK_SUCCESS;
3679
3680  fail_internal_cache:
3681    vk_pipeline_cache_destroy(device->internal_cache, NULL);
3682  fail_default_pipeline_cache:
3683    vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
3684  fail_btd_fifo_bo:
3685    if (ANV_SUPPORT_RT && device->info->has_ray_tracing)
3686       anv_device_release_bo(device, device->btd_fifo_bo);
3687  fail_trivial_batch_bo_and_scratch_pool:
3688    anv_scratch_pool_finish(device, &device->scratch_pool);
3689  fail_trivial_batch:
3690    anv_device_release_bo(device, device->trivial_batch_bo);
3691  fail_ray_query_bo:
3692    if (device->ray_query_bo)
3693       anv_device_release_bo(device, device->ray_query_bo);
3694  fail_workaround_bo:
3695    anv_device_release_bo(device, device->workaround_bo);
3696  fail_surface_aux_map_pool:
3697    if (device->info->has_aux_map) {
3698       intel_aux_map_finish(device->aux_map_ctx);
3699       device->aux_map_ctx = NULL;
3700    }
3701  fail_binding_table_pool:
3702    anv_state_pool_finish(&device->binding_table_pool);
3703  fail_bindless_surface_state_pool:
3704    anv_state_pool_finish(&device->bindless_surface_state_pool);
3705  fail_internal_surface_state_pool:
3706    anv_state_pool_finish(&device->internal_surface_state_pool);
3707  fail_scratch_surface_state_pool:
3708    if (device->info->verx10 >= 125)
3709       anv_state_pool_finish(&device->scratch_surface_state_pool);
3710  fail_instruction_state_pool:
3711    anv_state_pool_finish(&device->instruction_state_pool);
3712  fail_dynamic_state_pool:
3713    anv_state_reserved_pool_finish(&device->custom_border_colors);
3714    anv_state_pool_finish(&device->dynamic_state_pool);
3715  fail_general_state_pool:
3716    anv_state_pool_finish(&device->general_state_pool);
3717  fail_batch_bo_pool:
3718    anv_bo_pool_finish(&device->batch_bo_pool);
3719    anv_bo_cache_finish(&device->bo_cache);
3720  fail_queue_cond:
3721    pthread_cond_destroy(&device->queue_submit);
3722  fail_mutex:
3723    pthread_mutex_destroy(&device->mutex);
3724  fail_vmas:
3725    util_vma_heap_finish(&device->vma_hi);
3726    util_vma_heap_finish(&device->vma_cva);
3727    util_vma_heap_finish(&device->vma_lo);
3728  fail_queues:
3729    for (uint32_t i = 0; i < device->queue_count; i++)
3730       anv_queue_finish(&device->queues[i]);
3731    vk_free(&device->vk.alloc, device->queues);
3732  fail_context_id:
3733    intel_gem_destroy_context(device->fd, device->context_id);
3734  fail_fd:
3735    close(device->fd);
3736  fail_device:
3737    vk_device_finish(&device->vk);
3738  fail_alloc:
3739    vk_free(&device->vk.alloc, device);
3740
3741    return result;
3742 }
3743
3744 void anv_DestroyDevice(
3745     VkDevice                                    _device,
3746     const VkAllocationCallbacks*                pAllocator)
3747 {
3748    ANV_FROM_HANDLE(anv_device, device, _device);
3749
3750    if (!device)
3751       return;
3752
3753    anv_device_utrace_finish(device);
3754
3755    anv_device_finish_blorp(device);
3756
3757    anv_device_finish_rt_shaders(device);
3758
3759    vk_pipeline_cache_destroy(device->internal_cache, NULL);
3760    vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
3761
3762    if (ANV_SUPPORT_RT && device->info->has_ray_tracing)
3763       anv_device_release_bo(device, device->btd_fifo_bo);
3764
3765 #ifdef HAVE_VALGRIND
3766    /* We only need to free these to prevent valgrind errors.  The backing
3767     * BO will go away in a couple of lines so we don't actually leak.
3768     */
3769    anv_state_reserved_pool_finish(&device->custom_border_colors);
3770    anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
3771    anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
3772    anv_state_pool_free(&device->dynamic_state_pool, device->cps_states);
3773 #endif
3774
3775    for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
3776       if (device->rt_scratch_bos[i] != NULL)
3777          anv_device_release_bo(device, device->rt_scratch_bos[i]);
3778    }
3779
3780    anv_scratch_pool_finish(device, &device->scratch_pool);
3781
3782    if (device->vk.enabled_extensions.KHR_ray_query) {
3783       for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_shadow_bos); i++) {
3784          if (device->ray_query_shadow_bos[i] != NULL)
3785             anv_device_release_bo(device, device->ray_query_shadow_bos[i]);
3786       }
3787       anv_device_release_bo(device, device->ray_query_bo);
3788    }
3789    anv_device_release_bo(device, device->workaround_bo);
3790    anv_device_release_bo(device, device->trivial_batch_bo);
3791
3792    if (device->info->has_aux_map) {
3793       intel_aux_map_finish(device->aux_map_ctx);
3794       device->aux_map_ctx = NULL;
3795    }
3796
3797    anv_state_pool_finish(&device->binding_table_pool);
3798    if (device->info->verx10 >= 125)
3799       anv_state_pool_finish(&device->scratch_surface_state_pool);
3800    anv_state_pool_finish(&device->internal_surface_state_pool);
3801    anv_state_pool_finish(&device->bindless_surface_state_pool);
3802    anv_state_pool_finish(&device->instruction_state_pool);
3803    anv_state_pool_finish(&device->dynamic_state_pool);
3804    anv_state_pool_finish(&device->general_state_pool);
3805
3806    anv_bo_pool_finish(&device->batch_bo_pool);
3807
3808    anv_bo_cache_finish(&device->bo_cache);
3809
3810    util_vma_heap_finish(&device->vma_hi);
3811    util_vma_heap_finish(&device->vma_cva);
3812    util_vma_heap_finish(&device->vma_lo);
3813
3814    pthread_cond_destroy(&device->queue_submit);
3815    pthread_mutex_destroy(&device->mutex);
3816
3817    for (uint32_t i = 0; i < device->queue_count; i++)
3818       anv_queue_finish(&device->queues[i]);
3819    vk_free(&device->vk.alloc, device->queues);
3820
3821    intel_gem_destroy_context(device->fd, device->context_id);
3822
3823    if (INTEL_DEBUG(DEBUG_BATCH))
3824       intel_batch_decode_ctx_finish(&device->decoder_ctx);
3825
3826    close(device->fd);
3827
3828    vk_device_finish(&device->vk);
3829    vk_free(&device->vk.alloc, device);
3830 }
3831
3832 VkResult anv_EnumerateInstanceLayerProperties(
3833     uint32_t*                                   pPropertyCount,
3834     VkLayerProperties*                          pProperties)
3835 {
3836    if (pProperties == NULL) {
3837       *pPropertyCount = 0;
3838       return VK_SUCCESS;
3839    }
3840
3841    /* None supported at this time */
3842    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3843 }
3844
3845 static VkResult
3846 anv_device_check_status(struct vk_device *vk_device)
3847 {
3848    struct anv_device *device = container_of(vk_device, struct anv_device, vk);
3849
3850    uint32_t active, pending;
3851    int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id,
3852                                              &active, &pending);
3853    if (ret == -1) {
3854       /* We don't know the real error. */
3855       return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m");
3856    }
3857
3858    if (active) {
3859       return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers");
3860    } else if (pending) {
3861       return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight");
3862    }
3863
3864    return VK_SUCCESS;
3865 }
3866
3867 VkResult
3868 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
3869                 int64_t timeout)
3870 {
3871    int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
3872    if (ret == -1 && errno == ETIME) {
3873       return VK_TIMEOUT;
3874    } else if (ret == -1) {
3875       /* We don't know the real error. */
3876       return vk_device_set_lost(&device->vk, "gem wait failed: %m");
3877    } else {
3878       return VK_SUCCESS;
3879    }
3880 }
3881
3882 uint64_t
3883 anv_vma_alloc(struct anv_device *device,
3884               uint64_t size, uint64_t align,
3885               enum anv_bo_alloc_flags alloc_flags,
3886               uint64_t client_address)
3887 {
3888    pthread_mutex_lock(&device->vma_mutex);
3889
3890    uint64_t addr = 0;
3891
3892    if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
3893       if (client_address) {
3894          if (util_vma_heap_alloc_addr(&device->vma_cva,
3895                                       client_address, size)) {
3896             addr = client_address;
3897          }
3898       } else {
3899          addr = util_vma_heap_alloc(&device->vma_cva, size, align);
3900       }
3901       /* We don't want to fall back to other heaps */
3902       goto done;
3903    }
3904
3905    assert(client_address == 0);
3906
3907    if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS))
3908       addr = util_vma_heap_alloc(&device->vma_hi, size, align);
3909
3910    if (addr == 0)
3911       addr = util_vma_heap_alloc(&device->vma_lo, size, align);
3912
3913 done:
3914    pthread_mutex_unlock(&device->vma_mutex);
3915
3916    assert(addr == intel_48b_address(addr));
3917    return intel_canonical_address(addr);
3918 }
3919
3920 void
3921 anv_vma_free(struct anv_device *device,
3922              uint64_t address, uint64_t size)
3923 {
3924    const uint64_t addr_48b = intel_48b_address(address);
3925
3926    pthread_mutex_lock(&device->vma_mutex);
3927
3928    if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
3929        addr_48b <= LOW_HEAP_MAX_ADDRESS) {
3930       util_vma_heap_free(&device->vma_lo, addr_48b, size);
3931    } else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS &&
3932               addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) {
3933       util_vma_heap_free(&device->vma_cva, addr_48b, size);
3934    } else {
3935       assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS);
3936       util_vma_heap_free(&device->vma_hi, addr_48b, size);
3937    }
3938
3939    pthread_mutex_unlock(&device->vma_mutex);
3940 }
3941
3942 VkResult anv_AllocateMemory(
3943     VkDevice                                    _device,
3944     const VkMemoryAllocateInfo*                 pAllocateInfo,
3945     const VkAllocationCallbacks*                pAllocator,
3946     VkDeviceMemory*                             pMem)
3947 {
3948    ANV_FROM_HANDLE(anv_device, device, _device);
3949    struct anv_physical_device *pdevice = device->physical;
3950    struct anv_device_memory *mem;
3951    VkResult result = VK_SUCCESS;
3952
3953    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
3954
3955    /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
3956    assert(pAllocateInfo->allocationSize > 0);
3957
3958    VkDeviceSize aligned_alloc_size =
3959       align_u64(pAllocateInfo->allocationSize, 4096);
3960
3961    if (aligned_alloc_size > MAX_MEMORY_ALLOCATION_SIZE)
3962       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3963
3964    assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
3965    struct anv_memory_type *mem_type =
3966       &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
3967    assert(mem_type->heapIndex < pdevice->memory.heap_count);
3968    struct anv_memory_heap *mem_heap =
3969       &pdevice->memory.heaps[mem_type->heapIndex];
3970
3971    uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
3972    if (mem_heap_used + aligned_alloc_size > mem_heap->size)
3973       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3974
3975    mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
3976                          VK_OBJECT_TYPE_DEVICE_MEMORY);
3977    if (mem == NULL)
3978       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3979
3980    mem->type = mem_type;
3981    mem->map = NULL;
3982    mem->map_size = 0;
3983    mem->map_delta = 0;
3984    mem->ahw = NULL;
3985    mem->host_ptr = NULL;
3986
3987    enum anv_bo_alloc_flags alloc_flags = 0;
3988
3989    const VkExportMemoryAllocateInfo *export_info = NULL;
3990    const VkImportAndroidHardwareBufferInfoANDROID *ahw_import_info = NULL;
3991    const VkImportMemoryFdInfoKHR *fd_info = NULL;
3992    const VkImportMemoryHostPointerInfoEXT *host_ptr_info = NULL;
3993    const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
3994    VkMemoryAllocateFlags vk_flags = 0;
3995    uint64_t client_address = 0;
3996
3997    vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
3998       switch (ext->sType) {
3999       case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
4000          export_info = (void *)ext;
4001          break;
4002
4003       case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
4004          ahw_import_info = (void *)ext;
4005          break;
4006
4007       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
4008          fd_info = (void *)ext;
4009          break;
4010
4011       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
4012          host_ptr_info = (void *)ext;
4013          break;
4014
4015       case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO: {
4016          const VkMemoryAllocateFlagsInfo *flags_info = (void *)ext;
4017          vk_flags = flags_info->flags;
4018          break;
4019       }
4020
4021       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
4022          dedicated_info = (void *)ext;
4023          break;
4024
4025       case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO: {
4026          const VkMemoryOpaqueCaptureAddressAllocateInfo *addr_info =
4027             (const VkMemoryOpaqueCaptureAddressAllocateInfo *)ext;
4028          client_address = addr_info->opaqueCaptureAddress;
4029          break;
4030       }
4031
4032       default:
4033          if (ext->sType != VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA)
4034             /* this isn't a real enum value,
4035              * so use conditional to avoid compiler warn
4036              */
4037             anv_debug_ignored_stype(ext->sType);
4038          break;
4039       }
4040    }
4041
4042    /* By default, we want all VkDeviceMemory objects to support CCS */
4043    if (device->physical->has_implicit_ccs && device->info->has_aux_map)
4044       alloc_flags |= ANV_BO_ALLOC_IMPLICIT_CCS;
4045
4046    /* If i915 reported a mappable/non_mappable vram regions and the
4047     * application want lmem mappable, then we need to use the
4048     * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag to create our BO.
4049     */
4050    if (pdevice->vram_mappable.size > 0 &&
4051        pdevice->vram_non_mappable.size > 0 &&
4052        (mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
4053        (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
4054       alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE;
4055
4056    if (!(mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
4057       alloc_flags |= ANV_BO_ALLOC_NO_LOCAL_MEM;
4058
4059    /* If the allocated buffer might end up in local memory and it's host
4060     * visible, make CPU writes are combined, it should be faster.
4061     */
4062    if (!(alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM) &&
4063        (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
4064       alloc_flags |= ANV_BO_ALLOC_WRITE_COMBINE;
4065
4066    if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
4067       alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
4068
4069    if ((export_info && export_info->handleTypes) ||
4070        (fd_info && fd_info->handleType) ||
4071        (host_ptr_info && host_ptr_info->handleType)) {
4072       /* Anything imported or exported is EXTERNAL */
4073       alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
4074    }
4075
4076    /* Check if we need to support Android HW buffer export. If so,
4077     * create AHardwareBuffer and import memory from it.
4078     */
4079    bool android_export = false;
4080    if (export_info && export_info->handleTypes &
4081        VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)
4082       android_export = true;
4083
4084    if (ahw_import_info) {
4085       result = anv_import_ahw_memory(_device, mem, ahw_import_info);
4086       if (result != VK_SUCCESS)
4087          goto fail;
4088
4089       goto success;
4090    } else if (android_export) {
4091       result = anv_create_ahw_memory(_device, mem, pAllocateInfo);
4092       if (result != VK_SUCCESS)
4093          goto fail;
4094
4095       goto success;
4096    }
4097
4098    /* The Vulkan spec permits handleType to be 0, in which case the struct is
4099     * ignored.
4100     */
4101    if (fd_info && fd_info->handleType) {
4102       /* At the moment, we support only the below handle types. */
4103       assert(fd_info->handleType ==
4104                VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
4105              fd_info->handleType ==
4106                VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4107
4108       result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
4109                                     client_address, &mem->bo);
4110       if (result != VK_SUCCESS)
4111          goto fail;
4112
4113       /* For security purposes, we reject importing the bo if it's smaller
4114        * than the requested allocation size.  This prevents a malicious client
4115        * from passing a buffer to a trusted client, lying about the size, and
4116        * telling the trusted client to try and texture from an image that goes
4117        * out-of-bounds.  This sort of thing could lead to GPU hangs or worse
4118        * in the trusted client.  The trusted client can protect itself against
4119        * this sort of attack but only if it can trust the buffer size.
4120        */
4121       if (mem->bo->size < aligned_alloc_size) {
4122          result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
4123                             "aligned allocationSize too large for "
4124                             "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
4125                             "%"PRIu64"B > %"PRIu64"B",
4126                             aligned_alloc_size, mem->bo->size);
4127          anv_device_release_bo(device, mem->bo);
4128          goto fail;
4129       }
4130
4131       /* From the Vulkan spec:
4132        *
4133        *    "Importing memory from a file descriptor transfers ownership of
4134        *    the file descriptor from the application to the Vulkan
4135        *    implementation. The application must not perform any operations on
4136        *    the file descriptor after a successful import."
4137        *
4138        * If the import fails, we leave the file descriptor open.
4139        */
4140       close(fd_info->fd);
4141       goto success;
4142    }
4143
4144    if (host_ptr_info && host_ptr_info->handleType) {
4145       if (host_ptr_info->handleType ==
4146           VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
4147          result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
4148          goto fail;
4149       }
4150
4151       assert(host_ptr_info->handleType ==
4152              VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
4153
4154       result = anv_device_import_bo_from_host_ptr(device,
4155                                                   host_ptr_info->pHostPointer,
4156                                                   pAllocateInfo->allocationSize,
4157                                                   alloc_flags,
4158                                                   client_address,
4159                                                   &mem->bo);
4160       if (result != VK_SUCCESS)
4161          goto fail;
4162
4163       mem->host_ptr = host_ptr_info->pHostPointer;
4164       goto success;
4165    }
4166
4167    /* Regular allocate (not importing memory). */
4168
4169    result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
4170                                 alloc_flags, client_address, &mem->bo);
4171    if (result != VK_SUCCESS)
4172       goto fail;
4173
4174    if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
4175       ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
4176
4177       /* Some legacy (non-modifiers) consumers need the tiling to be set on
4178        * the BO.  In this case, we have a dedicated allocation.
4179        */
4180       if (image->vk.wsi_legacy_scanout) {
4181          const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
4182          result = anv_device_set_bo_tiling(device, mem->bo,
4183                                            surf->row_pitch_B,
4184                                            surf->tiling);
4185          if (result != VK_SUCCESS) {
4186             anv_device_release_bo(device, mem->bo);
4187             goto fail;
4188          }
4189       }
4190    }
4191
4192  success:
4193    mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
4194    if (mem_heap_used > mem_heap->size) {
4195       p_atomic_add(&mem_heap->used, -mem->bo->size);
4196       anv_device_release_bo(device, mem->bo);
4197       result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
4198                          "Out of heap memory");
4199       goto fail;
4200    }
4201
4202    pthread_mutex_lock(&device->mutex);
4203    list_addtail(&mem->link, &device->memory_objects);
4204    pthread_mutex_unlock(&device->mutex);
4205
4206    *pMem = anv_device_memory_to_handle(mem);
4207
4208    return VK_SUCCESS;
4209
4210  fail:
4211    vk_object_free(&device->vk, pAllocator, mem);
4212
4213    return result;
4214 }
4215
4216 VkResult anv_GetMemoryFdKHR(
4217     VkDevice                                    device_h,
4218     const VkMemoryGetFdInfoKHR*                 pGetFdInfo,
4219     int*                                        pFd)
4220 {
4221    ANV_FROM_HANDLE(anv_device, dev, device_h);
4222    ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
4223
4224    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
4225
4226    assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
4227           pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4228
4229    return anv_device_export_bo(dev, mem->bo, pFd);
4230 }
4231
4232 VkResult anv_GetMemoryFdPropertiesKHR(
4233     VkDevice                                    _device,
4234     VkExternalMemoryHandleTypeFlagBits          handleType,
4235     int                                         fd,
4236     VkMemoryFdPropertiesKHR*                    pMemoryFdProperties)
4237 {
4238    ANV_FROM_HANDLE(anv_device, device, _device);
4239
4240    switch (handleType) {
4241    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4242       /* dma-buf can be imported as any memory type */
4243       pMemoryFdProperties->memoryTypeBits =
4244          (1 << device->physical->memory.type_count) - 1;
4245       return VK_SUCCESS;
4246
4247    default:
4248       /* The valid usage section for this function says:
4249        *
4250        *    "handleType must not be one of the handle types defined as
4251        *    opaque."
4252        *
4253        * So opaque handle types fall into the default "unsupported" case.
4254        */
4255       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
4256    }
4257 }
4258
4259 VkResult anv_GetMemoryHostPointerPropertiesEXT(
4260    VkDevice                                    _device,
4261    VkExternalMemoryHandleTypeFlagBits          handleType,
4262    const void*                                 pHostPointer,
4263    VkMemoryHostPointerPropertiesEXT*           pMemoryHostPointerProperties)
4264 {
4265    ANV_FROM_HANDLE(anv_device, device, _device);
4266
4267    assert(pMemoryHostPointerProperties->sType ==
4268           VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
4269
4270    switch (handleType) {
4271    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
4272       /* Host memory can be imported as any memory type. */
4273       pMemoryHostPointerProperties->memoryTypeBits =
4274          (1ull << device->physical->memory.type_count) - 1;
4275
4276       return VK_SUCCESS;
4277
4278    default:
4279       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
4280    }
4281 }
4282
4283 void anv_FreeMemory(
4284     VkDevice                                    _device,
4285     VkDeviceMemory                              _mem,
4286     const VkAllocationCallbacks*                pAllocator)
4287 {
4288    ANV_FROM_HANDLE(anv_device, device, _device);
4289    ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
4290
4291    if (mem == NULL)
4292       return;
4293
4294    pthread_mutex_lock(&device->mutex);
4295    list_del(&mem->link);
4296    pthread_mutex_unlock(&device->mutex);
4297
4298    if (mem->map)
4299       anv_UnmapMemory(_device, _mem);
4300
4301    p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
4302                 -mem->bo->size);
4303
4304    anv_device_release_bo(device, mem->bo);
4305
4306 #if defined(ANDROID) && ANDROID_API_LEVEL >= 26
4307    if (mem->ahw)
4308       AHardwareBuffer_release(mem->ahw);
4309 #endif
4310
4311    vk_object_free(&device->vk, pAllocator, mem);
4312 }
4313
4314 VkResult anv_MapMemory(
4315     VkDevice                                    _device,
4316     VkDeviceMemory                              _memory,
4317     VkDeviceSize                                offset,
4318     VkDeviceSize                                size,
4319     VkMemoryMapFlags                            flags,
4320     void**                                      ppData)
4321 {
4322    ANV_FROM_HANDLE(anv_device, device, _device);
4323    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
4324
4325    if (mem == NULL) {
4326       *ppData = NULL;
4327       return VK_SUCCESS;
4328    }
4329
4330    if (mem->host_ptr) {
4331       *ppData = mem->host_ptr + offset;
4332       return VK_SUCCESS;
4333    }
4334
4335    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
4336     *
4337     *  * memory must have been created with a memory type that reports
4338     *    VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
4339     */
4340    if (!(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
4341       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
4342                        "Memory object not mappable.");
4343    }
4344
4345    if (size == VK_WHOLE_SIZE)
4346       size = mem->bo->size - offset;
4347
4348    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
4349     *
4350     *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
4351     *    assert(size != 0);
4352     *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
4353     *    equal to the size of the memory minus offset
4354     */
4355    assert(size > 0);
4356    assert(offset + size <= mem->bo->size);
4357
4358    if (size != (size_t)size) {
4359       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
4360                        "requested size 0x%"PRIx64" does not fit in %u bits",
4361                        size, (unsigned)(sizeof(size_t) * 8));
4362    }
4363
4364    /* From the Vulkan 1.2.194 spec:
4365     *
4366     *    "memory must not be currently host mapped"
4367     */
4368    if (mem->map != NULL) {
4369       return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
4370                        "Memory object already mapped.");
4371    }
4372
4373    uint32_t gem_flags = 0;
4374
4375    if (!device->info->has_llc &&
4376        (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
4377       gem_flags |= I915_MMAP_WC;
4378
4379    /* GEM will fail to map if the offset isn't 4k-aligned.  Round down. */
4380    uint64_t map_offset;
4381    if (!device->physical->info.has_mmap_offset)
4382       map_offset = offset & ~4095ull;
4383    else
4384       map_offset = 0;
4385    assert(offset >= map_offset);
4386    uint64_t map_size = (offset + size) - map_offset;
4387
4388    /* Let's map whole pages */
4389    map_size = align_u64(map_size, 4096);
4390
4391    void *map;
4392    VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
4393                                        map_size, gem_flags, &map);
4394    if (result != VK_SUCCESS)
4395       return result;
4396
4397    mem->map = map;
4398    mem->map_size = map_size;
4399    mem->map_delta = (offset - map_offset);
4400    *ppData = mem->map + mem->map_delta;
4401
4402    return VK_SUCCESS;
4403 }
4404
4405 void anv_UnmapMemory(
4406     VkDevice                                    _device,
4407     VkDeviceMemory                              _memory)
4408 {
4409    ANV_FROM_HANDLE(anv_device, device, _device);
4410    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
4411
4412    if (mem == NULL || mem->host_ptr)
4413       return;
4414
4415    anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size);
4416
4417    mem->map = NULL;
4418    mem->map_size = 0;
4419    mem->map_delta = 0;
4420 }
4421
4422 VkResult anv_FlushMappedMemoryRanges(
4423     VkDevice                                    _device,
4424     uint32_t                                    memoryRangeCount,
4425     const VkMappedMemoryRange*                  pMemoryRanges)
4426 {
4427 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
4428    ANV_FROM_HANDLE(anv_device, device, _device);
4429
4430    if (!device->physical->memory.need_clflush)
4431       return VK_SUCCESS;
4432
4433    /* Make sure the writes we're flushing have landed. */
4434    __builtin_ia32_mfence();
4435
4436    for (uint32_t i = 0; i < memoryRangeCount; i++) {
4437       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
4438       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
4439          continue;
4440
4441       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
4442       if (map_offset >= mem->map_size)
4443          continue;
4444
4445       intel_clflush_range(mem->map + map_offset,
4446                           MIN2(pMemoryRanges[i].size,
4447                                mem->map_size - map_offset));
4448    }
4449 #endif
4450    return VK_SUCCESS;
4451 }
4452
4453 VkResult anv_InvalidateMappedMemoryRanges(
4454     VkDevice                                    _device,
4455     uint32_t                                    memoryRangeCount,
4456     const VkMappedMemoryRange*                  pMemoryRanges)
4457 {
4458 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
4459    ANV_FROM_HANDLE(anv_device, device, _device);
4460
4461    if (!device->physical->memory.need_clflush)
4462       return VK_SUCCESS;
4463
4464    for (uint32_t i = 0; i < memoryRangeCount; i++) {
4465       ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
4466       if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
4467          continue;
4468
4469       uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
4470       if (map_offset >= mem->map_size)
4471          continue;
4472
4473       intel_invalidate_range(mem->map + map_offset,
4474                              MIN2(pMemoryRanges[i].size,
4475                                   mem->map_size - map_offset));
4476    }
4477
4478    /* Make sure no reads get moved up above the invalidate. */
4479    __builtin_ia32_mfence();
4480 #endif
4481    return VK_SUCCESS;
4482 }
4483
4484 void anv_GetDeviceMemoryCommitment(
4485     VkDevice                                    device,
4486     VkDeviceMemory                              memory,
4487     VkDeviceSize*                               pCommittedMemoryInBytes)
4488 {
4489    *pCommittedMemoryInBytes = 0;
4490 }
4491
4492 static void
4493 anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo)
4494 {
4495    ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
4496    ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
4497
4498    assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
4499
4500    if (mem) {
4501       assert(pBindInfo->memoryOffset < mem->bo->size);
4502       assert(mem->bo->size - pBindInfo->memoryOffset >= buffer->vk.size);
4503       buffer->address = (struct anv_address) {
4504          .bo = mem->bo,
4505          .offset = pBindInfo->memoryOffset,
4506       };
4507    } else {
4508       buffer->address = ANV_NULL_ADDRESS;
4509    }
4510 }
4511
4512 VkResult anv_BindBufferMemory2(
4513     VkDevice                                    device,
4514     uint32_t                                    bindInfoCount,
4515     const VkBindBufferMemoryInfo*               pBindInfos)
4516 {
4517    for (uint32_t i = 0; i < bindInfoCount; i++)
4518       anv_bind_buffer_memory(&pBindInfos[i]);
4519
4520    return VK_SUCCESS;
4521 }
4522
4523 VkResult anv_QueueBindSparse(
4524     VkQueue                                     _queue,
4525     uint32_t                                    bindInfoCount,
4526     const VkBindSparseInfo*                     pBindInfo,
4527     VkFence                                     fence)
4528 {
4529    ANV_FROM_HANDLE(anv_queue, queue, _queue);
4530    if (vk_device_is_lost(&queue->device->vk))
4531       return VK_ERROR_DEVICE_LOST;
4532
4533    return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
4534 }
4535
4536 // Event functions
4537
4538 VkResult anv_CreateEvent(
4539     VkDevice                                    _device,
4540     const VkEventCreateInfo*                    pCreateInfo,
4541     const VkAllocationCallbacks*                pAllocator,
4542     VkEvent*                                    pEvent)
4543 {
4544    ANV_FROM_HANDLE(anv_device, device, _device);
4545    struct anv_event *event;
4546
4547    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
4548
4549    event = vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
4550                            VK_OBJECT_TYPE_EVENT);
4551    if (event == NULL)
4552       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4553
4554    event->state = anv_state_pool_alloc(&device->dynamic_state_pool,
4555                                        sizeof(uint64_t), 8);
4556    *(uint64_t *)event->state.map = VK_EVENT_RESET;
4557
4558    *pEvent = anv_event_to_handle(event);
4559
4560    return VK_SUCCESS;
4561 }
4562
4563 void anv_DestroyEvent(
4564     VkDevice                                    _device,
4565     VkEvent                                     _event,
4566     const VkAllocationCallbacks*                pAllocator)
4567 {
4568    ANV_FROM_HANDLE(anv_device, device, _device);
4569    ANV_FROM_HANDLE(anv_event, event, _event);
4570
4571    if (!event)
4572       return;
4573
4574    anv_state_pool_free(&device->dynamic_state_pool, event->state);
4575
4576    vk_object_free(&device->vk, pAllocator, event);
4577 }
4578
4579 VkResult anv_GetEventStatus(
4580     VkDevice                                    _device,
4581     VkEvent                                     _event)
4582 {
4583    ANV_FROM_HANDLE(anv_device, device, _device);
4584    ANV_FROM_HANDLE(anv_event, event, _event);
4585
4586    if (vk_device_is_lost(&device->vk))
4587       return VK_ERROR_DEVICE_LOST;
4588
4589    return *(uint64_t *)event->state.map;
4590 }
4591
4592 VkResult anv_SetEvent(
4593     VkDevice                                    _device,
4594     VkEvent                                     _event)
4595 {
4596    ANV_FROM_HANDLE(anv_event, event, _event);
4597
4598    *(uint64_t *)event->state.map = VK_EVENT_SET;
4599
4600    return VK_SUCCESS;
4601 }
4602
4603 VkResult anv_ResetEvent(
4604     VkDevice                                    _device,
4605     VkEvent                                     _event)
4606 {
4607    ANV_FROM_HANDLE(anv_event, event, _event);
4608
4609    *(uint64_t *)event->state.map = VK_EVENT_RESET;
4610
4611    return VK_SUCCESS;
4612 }
4613
4614 // Buffer functions
4615
4616 static void
4617 anv_get_buffer_memory_requirements(struct anv_device *device,
4618                                    VkDeviceSize size,
4619                                    VkBufferUsageFlags usage,
4620                                    VkMemoryRequirements2* pMemoryRequirements)
4621 {
4622    /* The Vulkan spec (git aaed022) says:
4623     *
4624     *    memoryTypeBits is a bitfield and contains one bit set for every
4625     *    supported memory type for the resource. The bit `1<<i` is set if and
4626     *    only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
4627     *    structure for the physical device is supported.
4628     */
4629    uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1;
4630
4631    /* Base alignment requirement of a cache line */
4632    uint32_t alignment = 16;
4633
4634    if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
4635       alignment = MAX2(alignment, ANV_UBO_ALIGNMENT);
4636
4637    pMemoryRequirements->memoryRequirements.size = size;
4638    pMemoryRequirements->memoryRequirements.alignment = alignment;
4639
4640    /* Storage and Uniform buffers should have their size aligned to
4641     * 32-bits to avoid boundary checks when last DWord is not complete.
4642     * This would ensure that not internal padding would be needed for
4643     * 16-bit types.
4644     */
4645    if (device->robust_buffer_access &&
4646        (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ||
4647         usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
4648       pMemoryRequirements->memoryRequirements.size = align_u64(size, 4);
4649
4650    pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
4651
4652    vk_foreach_struct(ext, pMemoryRequirements->pNext) {
4653       switch (ext->sType) {
4654       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
4655          VkMemoryDedicatedRequirements *requirements = (void *)ext;
4656          requirements->prefersDedicatedAllocation = false;
4657          requirements->requiresDedicatedAllocation = false;
4658          break;
4659       }
4660
4661       default:
4662          anv_debug_ignored_stype(ext->sType);
4663          break;
4664       }
4665    }
4666 }
4667
4668 void anv_GetBufferMemoryRequirements2(
4669     VkDevice                                    _device,
4670     const VkBufferMemoryRequirementsInfo2*      pInfo,
4671     VkMemoryRequirements2*                      pMemoryRequirements)
4672 {
4673    ANV_FROM_HANDLE(anv_device, device, _device);
4674    ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
4675
4676    anv_get_buffer_memory_requirements(device,
4677                                       buffer->vk.size,
4678                                       buffer->vk.usage,
4679                                       pMemoryRequirements);
4680 }
4681
4682 void anv_GetDeviceBufferMemoryRequirementsKHR(
4683     VkDevice                                    _device,
4684     const VkDeviceBufferMemoryRequirements*     pInfo,
4685     VkMemoryRequirements2*                      pMemoryRequirements)
4686 {
4687    ANV_FROM_HANDLE(anv_device, device, _device);
4688
4689    anv_get_buffer_memory_requirements(device,
4690                                       pInfo->pCreateInfo->size,
4691                                       pInfo->pCreateInfo->usage,
4692                                       pMemoryRequirements);
4693 }
4694
4695 VkResult anv_CreateBuffer(
4696     VkDevice                                    _device,
4697     const VkBufferCreateInfo*                   pCreateInfo,
4698     const VkAllocationCallbacks*                pAllocator,
4699     VkBuffer*                                   pBuffer)
4700 {
4701    ANV_FROM_HANDLE(anv_device, device, _device);
4702    struct anv_buffer *buffer;
4703
4704    /* Don't allow creating buffers bigger than our address space.  The real
4705     * issue here is that we may align up the buffer size and we don't want
4706     * doing so to cause roll-over.  However, no one has any business
4707     * allocating a buffer larger than our GTT size.
4708     */
4709    if (pCreateInfo->size > device->physical->gtt_size)
4710       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4711
4712    buffer = vk_buffer_create(&device->vk, pCreateInfo,
4713                              pAllocator, sizeof(*buffer));
4714    if (buffer == NULL)
4715       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4716
4717    buffer->address = ANV_NULL_ADDRESS;
4718
4719    *pBuffer = anv_buffer_to_handle(buffer);
4720
4721    return VK_SUCCESS;
4722 }
4723
4724 void anv_DestroyBuffer(
4725     VkDevice                                    _device,
4726     VkBuffer                                    _buffer,
4727     const VkAllocationCallbacks*                pAllocator)
4728 {
4729    ANV_FROM_HANDLE(anv_device, device, _device);
4730    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
4731
4732    if (!buffer)
4733       return;
4734
4735    vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
4736 }
4737
4738 VkDeviceAddress anv_GetBufferDeviceAddress(
4739     VkDevice                                    device,
4740     const VkBufferDeviceAddressInfo*            pInfo)
4741 {
4742    ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
4743
4744    assert(!anv_address_is_null(buffer->address));
4745
4746    return anv_address_physical(buffer->address);
4747 }
4748
4749 uint64_t anv_GetBufferOpaqueCaptureAddress(
4750     VkDevice                                    device,
4751     const VkBufferDeviceAddressInfo*            pInfo)
4752 {
4753    return 0;
4754 }
4755
4756 uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
4757     VkDevice                                    device,
4758     const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
4759 {
4760    ANV_FROM_HANDLE(anv_device_memory, memory, pInfo->memory);
4761
4762    assert(memory->bo->has_client_visible_address);
4763
4764    return intel_48b_address(memory->bo->offset);
4765 }
4766
4767 void
4768 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
4769                               enum isl_format format,
4770                               struct isl_swizzle swizzle,
4771                               isl_surf_usage_flags_t usage,
4772                               struct anv_address address,
4773                               uint32_t range, uint32_t stride)
4774 {
4775    isl_buffer_fill_state(&device->isl_dev, state.map,
4776                          .address = anv_address_physical(address),
4777                          .mocs = isl_mocs(&device->isl_dev, usage,
4778                                           address.bo && address.bo->is_external),
4779                          .size_B = range,
4780                          .format = format,
4781                          .swizzle = swizzle,
4782                          .stride_B = stride);
4783 }
4784
4785 void anv_DestroySampler(
4786     VkDevice                                    _device,
4787     VkSampler                                   _sampler,
4788     const VkAllocationCallbacks*                pAllocator)
4789 {
4790    ANV_FROM_HANDLE(anv_device, device, _device);
4791    ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
4792
4793    if (!sampler)
4794       return;
4795
4796    if (sampler->bindless_state.map) {
4797       anv_state_pool_free(&device->dynamic_state_pool,
4798                           sampler->bindless_state);
4799    }
4800
4801    if (sampler->custom_border_color.map) {
4802       anv_state_reserved_pool_free(&device->custom_border_colors,
4803                                    sampler->custom_border_color);
4804    }
4805
4806    vk_object_free(&device->vk, pAllocator, sampler);
4807 }
4808
4809 static const VkTimeDomainEXT anv_time_domains[] = {
4810    VK_TIME_DOMAIN_DEVICE_EXT,
4811    VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
4812 #ifdef CLOCK_MONOTONIC_RAW
4813    VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
4814 #endif
4815 };
4816
4817 VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
4818    VkPhysicalDevice                             physicalDevice,
4819    uint32_t                                     *pTimeDomainCount,
4820    VkTimeDomainEXT                              *pTimeDomains)
4821 {
4822    int d;
4823    VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
4824
4825    for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
4826       vk_outarray_append_typed(VkTimeDomainEXT, &out, i) {
4827          *i = anv_time_domains[d];
4828       }
4829    }
4830
4831    return vk_outarray_status(&out);
4832 }
4833
4834 VkResult anv_GetCalibratedTimestampsEXT(
4835    VkDevice                                     _device,
4836    uint32_t                                     timestampCount,
4837    const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
4838    uint64_t                                     *pTimestamps,
4839    uint64_t                                     *pMaxDeviation)
4840 {
4841    ANV_FROM_HANDLE(anv_device, device, _device);
4842    uint64_t timestamp_frequency = device->info->timestamp_frequency;
4843    int d;
4844    uint64_t begin, end;
4845    uint64_t max_clock_period = 0;
4846
4847 #ifdef CLOCK_MONOTONIC_RAW
4848    begin = vk_clock_gettime(CLOCK_MONOTONIC_RAW);
4849 #else
4850    begin = vk_clock_gettime(CLOCK_MONOTONIC);
4851 #endif
4852
4853    for (d = 0; d < timestampCount; d++) {
4854       switch (pTimestampInfos[d].timeDomain) {
4855       case VK_TIME_DOMAIN_DEVICE_EXT:
4856          if (!intel_gem_read_render_timestamp(device->fd, &pTimestamps[d])) {
4857             return vk_device_set_lost(&device->vk, "Failed to read the "
4858                                       "TIMESTAMP register: %m");
4859          }
4860          uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
4861          max_clock_period = MAX2(max_clock_period, device_period);
4862          break;
4863       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
4864          pTimestamps[d] = vk_clock_gettime(CLOCK_MONOTONIC);
4865          max_clock_period = MAX2(max_clock_period, 1);
4866          break;
4867
4868 #ifdef CLOCK_MONOTONIC_RAW
4869       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
4870          pTimestamps[d] = begin;
4871          break;
4872 #endif
4873       default:
4874          pTimestamps[d] = 0;
4875          break;
4876       }
4877    }
4878
4879 #ifdef CLOCK_MONOTONIC_RAW
4880    end = vk_clock_gettime(CLOCK_MONOTONIC_RAW);
4881 #else
4882    end = vk_clock_gettime(CLOCK_MONOTONIC);
4883 #endif
4884
4885    *pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period);
4886
4887    return VK_SUCCESS;
4888 }
4889
4890 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
4891     VkPhysicalDevice                            physicalDevice,
4892     VkSampleCountFlagBits                       samples,
4893     VkMultisamplePropertiesEXT*                 pMultisampleProperties)
4894 {
4895    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
4896
4897    assert(pMultisampleProperties->sType ==
4898           VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
4899
4900    VkExtent2D grid_size;
4901    if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
4902       grid_size.width = 1;
4903       grid_size.height = 1;
4904    } else {
4905       grid_size.width = 0;
4906       grid_size.height = 0;
4907    }
4908    pMultisampleProperties->maxSampleLocationGridSize = grid_size;
4909
4910    vk_foreach_struct(ext, pMultisampleProperties->pNext)
4911       anv_debug_ignored_stype(ext->sType);
4912 }
4913
4914 /* vk_icd.h does not declare this function, so we declare it here to
4915  * suppress Wmissing-prototypes.
4916  */
4917 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4918 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion);
4919
4920 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4921 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
4922 {
4923    /* For the full details on loader interface versioning, see
4924     * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
4925     * What follows is a condensed summary, to help you navigate the large and
4926     * confusing official doc.
4927     *
4928     *   - Loader interface v0 is incompatible with later versions. We don't
4929     *     support it.
4930     *
4931     *   - In loader interface v1:
4932     *       - The first ICD entrypoint called by the loader is
4933     *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
4934     *         entrypoint.
4935     *       - The ICD must statically expose no other Vulkan symbol unless it is
4936     *         linked with -Bsymbolic.
4937     *       - Each dispatchable Vulkan handle created by the ICD must be
4938     *         a pointer to a struct whose first member is VK_LOADER_DATA. The
4939     *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
4940     *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
4941     *         vkDestroySurfaceKHR(). The ICD must be capable of working with
4942     *         such loader-managed surfaces.
4943     *
4944     *    - Loader interface v2 differs from v1 in:
4945     *       - The first ICD entrypoint called by the loader is
4946     *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
4947     *         statically expose this entrypoint.
4948     *
4949     *    - Loader interface v3 differs from v2 in:
4950     *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
4951     *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
4952     *          because the loader no longer does so.
4953     *
4954     *    - Loader interface v4 differs from v3 in:
4955     *        - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
4956     *
4957     *    - Loader interface v5 differs from v4 in:
4958     *        - The ICD must support Vulkan API version 1.1 and must not return
4959     *          VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a
4960     *          Vulkan Loader with interface v4 or smaller is being used and the
4961     *          application provides an API version that is greater than 1.0.
4962     */
4963    *pSupportedVersion = MIN2(*pSupportedVersion, 5u);
4964    return VK_SUCCESS;
4965 }
4966
4967 VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
4968     VkPhysicalDevice                            physicalDevice,
4969     uint32_t*                                   pFragmentShadingRateCount,
4970     VkPhysicalDeviceFragmentShadingRateKHR*     pFragmentShadingRates)
4971 {
4972    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
4973    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
4974                           pFragmentShadingRates, pFragmentShadingRateCount);
4975
4976 #define append_rate(_samples, _width, _height)                                      \
4977    do {                                                                             \
4978       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, __r) { \
4979          __r->sampleCounts = _samples;                                              \
4980          __r->fragmentSize = (VkExtent2D) {                                         \
4981             .width = _width,                                                        \
4982             .height = _height,                                                      \
4983          };                                                                         \
4984       }                                                                             \
4985    } while (0)
4986
4987    VkSampleCountFlags sample_counts =
4988       isl_device_get_sample_counts(&physical_device->isl_dev);
4989
4990    /* BSpec 47003: There are a number of restrictions on the sample count
4991     * based off the coarse pixel size.
4992     */
4993    static const VkSampleCountFlags cp_size_sample_limits[] = {
4994       [1]  = ISL_SAMPLE_COUNT_16_BIT | ISL_SAMPLE_COUNT_8_BIT |
4995              ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4996       [2]  = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4997       [4]  = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4998       [8]  = ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4999       [16] = ISL_SAMPLE_COUNT_1_BIT,
5000    };
5001
5002    for (uint32_t x = 4; x >= 1; x /= 2) {
5003        for (uint32_t y = 4; y >= 1; y /= 2) {
5004           if (physical_device->info.has_coarse_pixel_primitive_and_cb) {
5005              /* BSpec 47003:
5006               *   "CPsize 1x4 and 4x1 are not supported"
5007               */
5008              if ((x == 1 && y == 4) || (x == 4 && y == 1))
5009                 continue;
5010
5011              /* For size {1, 1}, the sample count must be ~0
5012               *
5013               * 4x2 is also a specially case.
5014               */
5015              if (x == 1 && y == 1)
5016                 append_rate(~0, x, y);
5017              else if (x == 4 && y == 2)
5018                 append_rate(ISL_SAMPLE_COUNT_1_BIT, x, y);
5019              else
5020                 append_rate(cp_size_sample_limits[x * y], x, y);
5021           } else {
5022              /* For size {1, 1}, the sample count must be ~0 */
5023              if (x == 1 && y == 1)
5024                 append_rate(~0, x, y);
5025              else
5026                 append_rate(sample_counts, x, y);
5027           }
5028        }
5029    }
5030
5031 #undef append_rate
5032
5033    return vk_outarray_status(&out);
5034 }