2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29 #include <sys/mkdev.h>
31 #ifdef MAJOR_IN_SYSMACROS
32 #include <sys/sysmacros.h>
38 #include "drm-uapi/drm_fourcc.h"
39 #include "drm-uapi/drm.h"
42 #include "anv_private.h"
43 #include "anv_measure.h"
44 #include "util/u_debug.h"
45 #include "util/build_id.h"
46 #include "util/disk_cache.h"
47 #include "util/mesa-sha1.h"
48 #include "util/os_file.h"
49 #include "util/os_misc.h"
50 #include "util/u_atomic.h"
51 #include "util/u_string.h"
52 #include "util/driconf.h"
55 #include "vk_deferred_operation.h"
56 #include "vk_drm_syncobj.h"
57 #include "common/intel_aux_map.h"
58 #include "common/intel_defines.h"
59 #include "common/intel_uuid.h"
60 #include "perf/intel_perf.h"
62 #include "genxml/gen7_pack.h"
63 #include "genxml/genX_bits.h"
65 static const driOptionDescription anv_dri_options[] = {
66 DRI_CONF_SECTION_PERFORMANCE
67 DRI_CONF_ADAPTIVE_SYNC(true)
68 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
69 DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
70 DRI_CONF_VK_KHR_PRESENT_WAIT(false)
71 DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
72 DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false)
73 DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
74 DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false)
77 DRI_CONF_SECTION_DEBUG
78 DRI_CONF_ALWAYS_FLUSH_CACHE(false)
79 DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
80 DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
83 DRI_CONF_SECTION_QUALITY
84 DRI_CONF_PP_LOWER_DEPTH_RANGE_RATE()
88 /* This is probably far to big but it reflects the max size used for messages
89 * in OpenGLs KHR_debug.
91 #define MAX_DEBUG_MESSAGE_LENGTH 4096
93 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
94 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
95 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
99 compiler_debug_log(void *data, UNUSED unsigned *id, const char *fmt, ...)
101 char str[MAX_DEBUG_MESSAGE_LENGTH];
102 struct anv_device *device = (struct anv_device *)data;
103 UNUSED struct anv_instance *instance = device->physical->instance;
107 (void) vsnprintf(str, MAX_DEBUG_MESSAGE_LENGTH, fmt, args);
110 //vk_logd(VK_LOG_NO_OBJS(&instance->vk), "%s", str);
114 compiler_perf_log(UNUSED void *data, UNUSED unsigned *id, const char *fmt, ...)
119 if (INTEL_DEBUG(DEBUG_PERF))
120 mesa_logd_v(fmt, args);
125 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
126 defined(VK_USE_PLATFORM_XCB_KHR) || \
127 defined(VK_USE_PLATFORM_XLIB_KHR) || \
128 defined(VK_USE_PLATFORM_DISPLAY_KHR)
129 #define ANV_USE_WSI_PLATFORM
133 #define ANV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
135 #define ANV_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
138 VkResult anv_EnumerateInstanceVersion(
139 uint32_t* pApiVersion)
141 *pApiVersion = ANV_API_VERSION;
145 static const struct vk_instance_extension_table instance_extensions = {
146 .KHR_device_group_creation = true,
147 .KHR_external_fence_capabilities = true,
148 .KHR_external_memory_capabilities = true,
149 .KHR_external_semaphore_capabilities = true,
150 .KHR_get_physical_device_properties2 = true,
151 .EXT_debug_report = true,
152 .EXT_debug_utils = true,
154 #ifdef ANV_USE_WSI_PLATFORM
155 .KHR_get_surface_capabilities2 = true,
157 .KHR_surface_protected_capabilities = true,
158 .EXT_swapchain_colorspace = true,
160 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
161 .KHR_wayland_surface = true,
163 #ifdef VK_USE_PLATFORM_XCB_KHR
164 .KHR_xcb_surface = true,
166 #ifdef VK_USE_PLATFORM_XLIB_KHR
167 .KHR_xlib_surface = true,
169 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
170 .EXT_acquire_xlib_display = true,
172 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
174 .KHR_get_display_properties2 = true,
175 .EXT_direct_mode_display = true,
176 .EXT_display_surface_counter = true,
177 .EXT_acquire_drm_display = true,
182 get_device_extensions(const struct anv_physical_device *device,
183 struct vk_device_extension_table *ext)
185 const bool has_syncobj_wait =
186 (device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT) != 0;
188 const bool nv_mesh_shading_enabled =
189 debug_get_bool_option("ANV_EXPERIMENTAL_NV_MESH_SHADER", false);
191 *ext = (struct vk_device_extension_table) {
192 .KHR_8bit_storage = true,
193 .KHR_16bit_storage = true,
194 .KHR_acceleration_structure = device->info.has_ray_tracing,
195 .KHR_acceleration_structure = ANV_SUPPORT_RT &&
196 device->info.has_ray_tracing,
197 .KHR_bind_memory2 = true,
198 .KHR_buffer_device_address = true,
199 .KHR_copy_commands2 = true,
200 .KHR_create_renderpass2 = true,
201 .KHR_dedicated_allocation = true,
202 .KHR_deferred_host_operations = true,
203 .KHR_depth_stencil_resolve = true,
204 .KHR_descriptor_update_template = true,
205 .KHR_device_group = true,
206 .KHR_draw_indirect_count = true,
207 .KHR_driver_properties = true,
208 .KHR_dynamic_rendering = true,
209 .KHR_external_fence = has_syncobj_wait,
210 .KHR_external_fence_fd = has_syncobj_wait,
211 .KHR_external_memory = true,
212 .KHR_external_memory_fd = true,
213 .KHR_external_semaphore = true,
214 .KHR_external_semaphore_fd = true,
215 .KHR_format_feature_flags2 = true,
216 .KHR_fragment_shading_rate = device->info.ver >= 11,
217 .KHR_get_memory_requirements2 = true,
218 .KHR_image_format_list = true,
219 .KHR_imageless_framebuffer = true,
220 #ifdef ANV_USE_WSI_PLATFORM
221 .KHR_incremental_present = true,
223 .KHR_maintenance1 = true,
224 .KHR_maintenance2 = true,
225 .KHR_maintenance3 = true,
226 .KHR_maintenance4 = true,
227 .KHR_multiview = true,
228 .KHR_performance_query =
230 (device->perf->i915_perf_version >= 3 ||
231 INTEL_DEBUG(DEBUG_NO_OACONFIG)) &&
232 device->use_call_secondary,
233 .KHR_pipeline_executable_properties = true,
234 .KHR_pipeline_library = true,
235 /* Hide these behind dri configs for now since we cannot implement it reliably on
236 * all surfaces yet. There is no surface capability query for present wait/id,
237 * but the feature is useful enough to hide behind an opt-in mechanism for now.
238 * If the instance only enables surface extensions that unconditionally support present wait,
239 * we can also expose the extension that way. */
241 driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
242 wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
244 driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
245 wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
246 .KHR_push_descriptor = true,
248 ANV_SUPPORT_RT && device->info.has_ray_tracing,
249 .KHR_ray_tracing_pipeline =
250 ANV_SUPPORT_RT && device->info.has_ray_tracing,
251 .KHR_relaxed_block_layout = true,
252 .KHR_sampler_mirror_clamp_to_edge = true,
253 .KHR_sampler_ycbcr_conversion = true,
254 .KHR_separate_depth_stencil_layouts = true,
255 .KHR_shader_atomic_int64 = true,
256 .KHR_shader_clock = true,
257 .KHR_shader_draw_parameters = true,
258 .KHR_shader_float16_int8 = true,
259 .KHR_shader_float_controls = true,
260 .KHR_shader_integer_dot_product = true,
261 .KHR_shader_non_semantic_info = true,
262 .KHR_shader_subgroup_extended_types = true,
263 .KHR_shader_subgroup_uniform_control_flow = true,
264 .KHR_shader_terminate_invocation = true,
265 .KHR_spirv_1_4 = true,
266 .KHR_storage_buffer_storage_class = true,
267 #ifdef ANV_USE_WSI_PLATFORM
268 .KHR_swapchain = true,
269 .KHR_swapchain_mutable_format = true,
271 .KHR_synchronization2 = true,
272 .KHR_timeline_semaphore = true,
273 .KHR_uniform_buffer_standard_layout = true,
274 .KHR_variable_pointers = true,
275 .KHR_vulkan_memory_model = true,
276 .KHR_workgroup_memory_explicit_layout = true,
277 .KHR_zero_initialize_workgroup_memory = true,
278 .EXT_4444_formats = true,
279 .EXT_border_color_swizzle = true,
280 .EXT_buffer_device_address = true,
281 .EXT_calibrated_timestamps = device->has_reg_timestamp,
282 .EXT_color_write_enable = true,
283 .EXT_conditional_rendering = true,
284 .EXT_conservative_rasterization = true,
285 .EXT_custom_border_color = true,
286 .EXT_depth_clamp_zero_one = true,
287 .EXT_depth_clip_control = true,
288 .EXT_depth_clip_enable = true,
289 .EXT_descriptor_indexing = true,
290 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
291 .EXT_display_control = true,
293 .EXT_extended_dynamic_state = true,
294 .EXT_extended_dynamic_state2 = true,
295 .EXT_extended_dynamic_state3 = true,
296 .EXT_external_memory_dma_buf = true,
297 .EXT_external_memory_host = true,
298 .EXT_fragment_shader_interlock = true,
299 .EXT_global_priority = device->max_context_priority >=
300 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
301 .EXT_global_priority_query = device->max_context_priority >=
302 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
303 .EXT_host_query_reset = true,
304 .EXT_image_2d_view_of_3d = true,
305 .EXT_image_robustness = true,
306 .EXT_image_drm_format_modifier = true,
307 .EXT_image_view_min_lod = true,
308 .EXT_index_type_uint8 = true,
309 .EXT_inline_uniform_block = true,
310 .EXT_line_rasterization = true,
311 /* Enable the extension only if we have support on both the local &
314 .EXT_memory_budget = (!device->info.has_local_mem ||
315 device->vram_mappable.available > 0) &&
316 device->sys.available,
317 .EXT_mesh_shader = device->info.has_mesh_shading,
318 .EXT_mutable_descriptor_type = true,
319 .EXT_non_seamless_cube_map = true,
320 .EXT_pci_bus_info = true,
321 .EXT_physical_device_drm = true,
322 .EXT_pipeline_creation_cache_control = true,
323 .EXT_pipeline_creation_feedback = true,
324 .EXT_post_depth_coverage = true,
325 .EXT_primitives_generated_query = true,
326 .EXT_primitive_topology_list_restart = true,
327 .EXT_private_data = true,
328 .EXT_provoking_vertex = true,
329 .EXT_queue_family_foreign = true,
330 .EXT_robustness2 = true,
331 .EXT_sample_locations = true,
332 .EXT_sampler_filter_minmax = true,
333 .EXT_scalar_block_layout = true,
334 .EXT_separate_stencil_usage = true,
335 .EXT_shader_atomic_float = true,
336 .EXT_shader_atomic_float2 = true,
337 .EXT_shader_demote_to_helper_invocation = true,
338 .EXT_shader_module_identifier = true,
339 .EXT_shader_stencil_export = true,
340 .EXT_shader_subgroup_ballot = true,
341 .EXT_shader_subgroup_vote = true,
342 .EXT_shader_viewport_index_layer = true,
343 .EXT_subgroup_size_control = true,
344 .EXT_texel_buffer_alignment = true,
345 .EXT_tooling_info = true,
346 .EXT_transform_feedback = true,
347 .EXT_vertex_attribute_divisor = true,
348 .EXT_ycbcr_image_arrays = true,
350 .ANDROID_external_memory_android_hardware_buffer = true,
351 .ANDROID_native_buffer = true,
353 .GOOGLE_decorate_string = true,
354 .GOOGLE_hlsl_functionality1 = true,
355 .GOOGLE_user_type = true,
356 .INTEL_performance_query = device->perf &&
357 device->perf->i915_perf_version >= 3,
358 .INTEL_shader_integer_functions2 = true,
359 .EXT_multi_draw = true,
360 .NV_compute_shader_derivatives = true,
361 .NV_mesh_shader = device->info.has_mesh_shading &&
362 nv_mesh_shading_enabled,
363 .VALVE_mutable_descriptor_type = true,
368 anv_compute_sys_heap_size(struct anv_physical_device *device,
371 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
372 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
374 uint64_t available_ram;
375 if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
376 available_ram = total_ram / 2;
378 available_ram = total_ram * 3 / 4;
380 /* We also want to leave some padding for things we allocate in the driver,
381 * so don't go over 3/4 of the GTT either.
383 available_ram = MIN2(available_ram, device->gtt_size * 3 / 4);
385 if (available_ram > (2ull << 30) && !device->supports_48bit_addresses) {
386 /* When running with an overridden PCI ID, we may get a GTT size from
387 * the kernel that is greater than 2 GiB but the execbuf check for 48bit
388 * address support can still fail. Just clamp the address space size to
389 * 2 GiB if we don't have 48-bit support.
391 mesa_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but "
392 "not support for 48-bit addresses",
394 available_ram = 2ull << 30;
397 return available_ram;
400 static VkResult MUST_CHECK
401 anv_init_meminfo(struct anv_physical_device *device, int fd)
403 const struct intel_device_info *devinfo = &device->info;
405 device->sys.region.memory_class = devinfo->mem.sram.mem_class;
406 device->sys.region.memory_instance = devinfo->mem.sram.mem_instance;
408 anv_compute_sys_heap_size(device, devinfo->mem.sram.mappable.size);
409 device->sys.available = devinfo->mem.sram.mappable.free;
411 device->vram_mappable.region.memory_class = devinfo->mem.vram.mem_class;
412 device->vram_mappable.region.memory_instance =
413 devinfo->mem.vram.mem_instance;
414 device->vram_mappable.size = devinfo->mem.vram.mappable.size;
415 device->vram_mappable.available = devinfo->mem.vram.mappable.free;
417 device->vram_non_mappable.region.memory_class =
418 devinfo->mem.vram.mem_class;
419 device->vram_non_mappable.region.memory_instance =
420 devinfo->mem.vram.mem_instance;
421 device->vram_non_mappable.size = devinfo->mem.vram.unmappable.size;
422 device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
428 anv_update_meminfo(struct anv_physical_device *device, int fd)
430 if (!intel_device_info_update_memory_info(&device->info, fd))
433 const struct intel_device_info *devinfo = &device->info;
434 device->sys.available = devinfo->mem.sram.mappable.free;
435 device->vram_mappable.available = devinfo->mem.vram.mappable.free;
436 device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free;
441 anv_physical_device_init_heaps(struct anv_physical_device *device, int fd)
443 VkResult result = anv_init_meminfo(device, fd);
444 if (result != VK_SUCCESS)
447 assert(device->sys.size != 0);
449 if (anv_physical_device_has_vram(device)) {
450 /* We can create 2 or 3 different heaps when we have local memory
451 * support, first heap with local memory size and second with system
452 * memory size and the third is added only if part of the vram is
453 * mappable to the host.
455 device->memory.heap_count = 2;
456 device->memory.heaps[0] = (struct anv_memory_heap) {
457 /* If there is a vram_non_mappable, use that for the device only
458 * heap. Otherwise use the vram_mappable.
460 .size = device->vram_non_mappable.size != 0 ?
461 device->vram_non_mappable.size : device->vram_mappable.size,
462 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
463 .is_local_mem = true,
465 device->memory.heaps[1] = (struct anv_memory_heap) {
466 .size = device->sys.size,
468 .is_local_mem = false,
470 /* Add an additional smaller vram mappable heap if we can't map all the
473 if (device->vram_non_mappable.size > 0) {
474 device->memory.heap_count++;
475 device->memory.heaps[2] = (struct anv_memory_heap) {
476 .size = device->vram_mappable.size,
477 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
478 .is_local_mem = true,
482 device->memory.type_count = 3;
483 device->memory.types[0] = (struct anv_memory_type) {
484 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
487 device->memory.types[1] = (struct anv_memory_type) {
488 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
489 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
490 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
493 device->memory.types[2] = (struct anv_memory_type) {
494 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
495 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
496 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
497 /* This memory type either comes from heaps[0] if there is only
498 * mappable vram region, or from heaps[2] if there is both mappable &
499 * non-mappable vram regions.
501 .heapIndex = device->vram_non_mappable.size > 0 ? 2 : 0,
503 } else if (device->info.has_llc) {
504 device->memory.heap_count = 1;
505 device->memory.heaps[0] = (struct anv_memory_heap) {
506 .size = device->sys.size,
507 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
508 .is_local_mem = false,
511 /* Big core GPUs share LLC with the CPU and thus one memory type can be
512 * both cached and coherent at the same time.
514 device->memory.type_count = 1;
515 device->memory.types[0] = (struct anv_memory_type) {
516 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
517 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
518 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
519 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
523 device->memory.heap_count = 1;
524 device->memory.heaps[0] = (struct anv_memory_heap) {
525 .size = device->sys.size,
526 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
527 .is_local_mem = false,
530 /* The spec requires that we expose a host-visible, coherent memory
531 * type, but Atom GPUs don't share LLC. Thus we offer two memory types
532 * to give the application a choice between cached, but not coherent and
533 * coherent but uncached (WC though).
535 device->memory.type_count = 2;
536 device->memory.types[0] = (struct anv_memory_type) {
537 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
538 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
539 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
542 device->memory.types[1] = (struct anv_memory_type) {
543 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
544 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
545 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
550 for (unsigned i = 0; i < device->memory.type_count; i++) {
551 VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags;
552 if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
553 !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
554 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
555 device->memory.need_clflush = true;
557 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
558 "Memory configuration requires flushing, but it's not implemented for this architecture");
566 anv_physical_device_init_uuids(struct anv_physical_device *device)
568 const struct build_id_note *note =
569 build_id_find_nhdr_for_addr(anv_physical_device_init_uuids);
571 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
572 "Failed to find build-id");
575 unsigned build_id_len = build_id_length(note);
576 if (build_id_len < 20) {
577 return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
578 "build-id too short. It needs to be a SHA");
581 memcpy(device->driver_build_sha1, build_id_data(note), 20);
583 struct mesa_sha1 sha1_ctx;
585 STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
587 /* The pipeline cache UUID is used for determining when a pipeline cache is
588 * invalid. It needs both a driver build and the PCI ID of the device.
590 _mesa_sha1_init(&sha1_ctx);
591 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
592 _mesa_sha1_update(&sha1_ctx, &device->info.pci_device_id,
593 sizeof(device->info.pci_device_id));
594 _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless,
595 sizeof(device->always_use_bindless));
596 _mesa_sha1_final(&sha1_ctx, sha1);
597 memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
599 intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE);
600 intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE);
606 anv_physical_device_init_disk_cache(struct anv_physical_device *device)
608 #ifdef ENABLE_SHADER_CACHE
610 ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x",
611 device->info.pci_device_id);
612 assert(len == sizeof(renderer) - 2);
615 _mesa_sha1_format(timestamp, device->driver_build_sha1);
617 const uint64_t driver_flags =
618 brw_get_compiler_config_value(device->compiler);
619 device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags);
624 anv_physical_device_free_disk_cache(struct anv_physical_device *device)
626 #ifdef ENABLE_SHADER_CACHE
627 if (device->vk.disk_cache) {
628 disk_cache_destroy(device->vk.disk_cache);
629 device->vk.disk_cache = NULL;
632 assert(device->vk.disk_cache == NULL);
636 /* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of
639 * To override the number queues:
640 * * "gc" is for graphics queues with compute support
641 * * "g" is for graphics queues with no compute support
642 * * "c" is for compute queues with no graphics support
644 * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of
645 * advertised queues to be 2 queues with graphics+compute support, and 1 queue
646 * with compute-only support.
648 * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to
649 * include 1 queue with compute-only support, but it will not change the
650 * number of graphics+compute queues.
652 * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues
653 * to include 1 queue with compute-only support, and it would override the
654 * number of graphics+compute queues to be 0.
657 anv_override_engine_counts(int *gc_count, int *g_count, int *c_count)
659 int gc_override = -1;
662 char *env = getenv("ANV_QUEUE_OVERRIDE");
669 char *next = strtok_r(env, ",", &save);
670 while (next != NULL) {
671 if (strncmp(next, "gc=", 3) == 0) {
672 gc_override = strtol(next + 3, NULL, 0);
673 } else if (strncmp(next, "g=", 2) == 0) {
674 g_override = strtol(next + 2, NULL, 0);
675 } else if (strncmp(next, "c=", 2) == 0) {
676 c_override = strtol(next + 2, NULL, 0);
678 mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next);
680 next = strtok_r(NULL, ",", &save);
683 if (gc_override >= 0)
684 *gc_count = gc_override;
686 *g_count = g_override;
687 if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0))
688 mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the "
689 "Vulkan specification");
691 *c_count = c_override;
695 anv_physical_device_init_queue_families(struct anv_physical_device *pdevice)
697 uint32_t family_count = 0;
699 if (pdevice->engine_info) {
701 intel_engines_count(pdevice->engine_info,
702 INTEL_ENGINE_CLASS_RENDER);
705 if (debug_get_bool_option("INTEL_COMPUTE_CLASS", false))
706 c_count = intel_engines_count(pdevice->engine_info,
707 INTEL_ENGINE_CLASS_COMPUTE);
708 enum intel_engine_class compute_class =
709 c_count < 1 ? INTEL_ENGINE_CLASS_RENDER : INTEL_ENGINE_CLASS_COMPUTE;
711 anv_override_engine_counts(&gc_count, &g_count, &c_count);
714 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
715 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
716 VK_QUEUE_COMPUTE_BIT |
717 VK_QUEUE_TRANSFER_BIT,
718 .queueCount = gc_count,
719 .engine_class = INTEL_ENGINE_CLASS_RENDER,
723 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
724 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
725 VK_QUEUE_TRANSFER_BIT,
726 .queueCount = g_count,
727 .engine_class = INTEL_ENGINE_CLASS_RENDER,
731 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
732 .queueFlags = VK_QUEUE_COMPUTE_BIT |
733 VK_QUEUE_TRANSFER_BIT,
734 .queueCount = c_count,
735 .engine_class = compute_class,
738 /* Increase count below when other families are added as a reminder to
739 * increase the ANV_MAX_QUEUE_FAMILIES value.
741 STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 3);
743 /* Default to a single render queue */
744 pdevice->queue.families[family_count++] = (struct anv_queue_family) {
745 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
746 VK_QUEUE_COMPUTE_BIT |
747 VK_QUEUE_TRANSFER_BIT,
749 .engine_class = INTEL_ENGINE_CLASS_RENDER,
753 assert(family_count <= ANV_MAX_QUEUE_FAMILIES);
754 pdevice->queue.family_count = family_count;
758 anv_i915_physical_device_get_parameters(struct anv_physical_device *device)
760 VkResult result = VK_SUCCESS;
761 int val, fd = device->local_fd;
763 if (!intel_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT, &val) || !val) {
764 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
765 "kernel missing gem wait");
769 if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2, &val) || !val) {
770 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
771 "kernel missing execbuf2");
775 if (!device->info.has_llc &&
776 (!intel_gem_get_param(fd, I915_PARAM_MMAP_VERSION, &val) || val < 1)) {
777 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
778 "kernel missing wc mmap");
782 if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN, &val) || !val) {
783 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
784 "kernel missing softpin");
788 if (!intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_FENCE_ARRAY, &val) || !val) {
789 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
790 "kernel missing syncobj support");
794 if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_ASYNC, &val))
795 device->has_exec_async = val;
796 if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_CAPTURE, &val))
797 device->has_exec_capture = val;
799 /* Start with medium; sorted low to high */
800 const VkQueueGlobalPriorityKHR priorities[] = {
801 VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
802 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
803 VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
804 VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
806 device->max_context_priority = VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR;
807 for (unsigned i = 0; i < ARRAY_SIZE(priorities); i++) {
808 if (!anv_gem_has_context_priority(fd, priorities[i]))
810 device->max_context_priority = priorities[i];
813 if (intel_gem_get_param(fd, I915_PARAM_HAS_EXEC_TIMELINE_FENCES, &val))
814 device->has_exec_timeline = val;
820 anv_physical_device_get_parameters(struct anv_physical_device *device)
822 return anv_i915_physical_device_get_parameters(device);
826 anv_physical_device_try_create(struct vk_instance *vk_instance,
827 struct _drmDevice *drm_device,
828 struct vk_physical_device **out)
830 struct anv_instance *instance =
831 container_of(vk_instance, struct anv_instance, vk);
833 if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) ||
834 drm_device->bustype != DRM_BUS_PCI ||
835 drm_device->deviceinfo.pci->vendor_id != 0x8086)
836 return VK_ERROR_INCOMPATIBLE_DRIVER;
838 const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY];
839 const char *path = drm_device->nodes[DRM_NODE_RENDER];
844 brw_process_intel_debug_variable();
846 fd = open(path, O_RDWR | O_CLOEXEC);
848 if (errno == ENOMEM) {
849 return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
850 "Unable to open device %s: out of memory", path);
852 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
853 "Unable to open device %s: %m", path);
856 struct intel_device_info devinfo;
857 if (!intel_get_device_info_from_fd(fd, &devinfo)) {
858 result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
862 if (devinfo.ver > 12) {
863 result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
864 "Vulkan not yet supported on %s", devinfo.name);
866 } else if (devinfo.ver < 9) {
867 /* Silently fail here, hasvk should pick up this device. */
868 result = VK_ERROR_INCOMPATIBLE_DRIVER;
872 struct anv_physical_device *device =
873 vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
874 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
875 if (device == NULL) {
876 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
880 struct vk_physical_device_dispatch_table dispatch_table;
881 vk_physical_device_dispatch_table_from_entrypoints(
882 &dispatch_table, &anv_physical_device_entrypoints, true);
883 vk_physical_device_dispatch_table_from_entrypoints(
884 &dispatch_table, &wsi_physical_device_entrypoints, false);
886 result = vk_physical_device_init(&device->vk, &instance->vk,
887 NULL, /* We set up extensions later */
889 if (result != VK_SUCCESS) {
890 vk_error(instance, result);
893 device->instance = instance;
895 assert(strlen(path) < ARRAY_SIZE(device->path));
896 snprintf(device->path, ARRAY_SIZE(device->path), "%s", path);
898 device->info = devinfo;
900 device->local_fd = fd;
901 result = anv_physical_device_get_parameters(device);
902 if (result != VK_SUCCESS)
905 device->gtt_size = device->info.gtt_size ? device->info.gtt_size :
906 device->info.aperture_bytes;
908 /* We only allow 48-bit addresses with softpin because knowing the actual
909 * address is required for the vertex cache flush workaround.
911 device->supports_48bit_addresses =
912 device->gtt_size > (4ULL << 30 /* GiB */);
914 /* We currently only have the right bits for instructions in Gen12+. If the
915 * kernel ever starts supporting that feature on previous generations,
916 * we'll need to edit genxml prior to enabling here.
918 device->has_protected_contexts = device->info.ver >= 12 &&
919 intel_gem_supports_protected_context(fd);
921 result = anv_physical_device_init_heaps(device, fd);
922 if (result != VK_SUCCESS)
925 if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false))
926 device->has_exec_timeline = false;
930 device->sync_syncobj_type = vk_drm_syncobj_get_type(fd);
931 if (!device->has_exec_timeline)
932 device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
933 device->sync_types[st_idx++] = &device->sync_syncobj_type;
935 if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT))
936 device->sync_types[st_idx++] = &anv_bo_sync_type;
938 if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) {
939 device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type);
940 device->sync_types[st_idx++] = &device->sync_timeline_type.sync;
943 device->sync_types[st_idx++] = NULL;
944 assert(st_idx <= ARRAY_SIZE(device->sync_types));
945 device->vk.supported_sync_types = device->sync_types;
947 device->vk.pipeline_cache_import_ops = anv_cache_import_ops;
949 device->always_use_bindless =
950 debug_get_bool_option("ANV_ALWAYS_BINDLESS", false);
952 device->use_call_secondary =
953 !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false);
955 device->has_implicit_ccs = device->info.has_aux_map ||
956 device->info.verx10 >= 125;
958 /* Check if we can read the GPU timestamp register from the CPU */
960 device->has_reg_timestamp = intel_gem_read_render_timestamp(fd, &u64_ignore);
962 device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) ||
963 driQueryOptionb(&instance->dri_options, "always_flush_cache");
965 device->compiler = brw_compiler_create(NULL, &device->info);
966 if (device->compiler == NULL) {
967 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
970 device->compiler->shader_debug_log = compiler_debug_log;
971 device->compiler->shader_perf_log = compiler_perf_log;
972 device->compiler->constant_buffer_0_is_relative =
973 !device->info.has_context_isolation;
974 device->compiler->supports_shader_constants = true;
975 device->compiler->indirect_ubos_use_sampler = device->info.ver < 12;
977 isl_device_init(&device->isl_dev, &device->info);
979 result = anv_physical_device_init_uuids(device);
980 if (result != VK_SUCCESS)
983 anv_physical_device_init_disk_cache(device);
985 if (instance->vk.enabled_extensions.KHR_display) {
986 master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
987 if (master_fd >= 0) {
988 /* fail if we don't have permission to even render on this device */
989 if (!intel_gem_can_render_on_fd(master_fd)) {
995 device->master_fd = master_fd;
997 device->engine_info = intel_engine_get_info(fd);
998 anv_physical_device_init_queue_families(device);
1000 anv_physical_device_init_perf(device, fd);
1002 get_device_extensions(device, &device->vk.supported_extensions);
1004 /* Gather major/minor before WSI. */
1007 if (stat(primary_path, &st) == 0) {
1008 device->has_master = true;
1009 device->master_major = major(st.st_rdev);
1010 device->master_minor = minor(st.st_rdev);
1012 device->has_master = false;
1013 device->master_major = 0;
1014 device->master_minor = 0;
1017 if (stat(path, &st) == 0) {
1018 device->has_local = true;
1019 device->local_major = major(st.st_rdev);
1020 device->local_minor = minor(st.st_rdev);
1022 device->has_local = false;
1023 device->local_major = 0;
1024 device->local_minor = 0;
1027 result = anv_init_wsi(device);
1028 if (result != VK_SUCCESS)
1031 anv_measure_device_init(device);
1033 anv_genX(&device->info, init_physical_device_state)(device);
1040 ralloc_free(device->perf);
1041 free(device->engine_info);
1042 anv_physical_device_free_disk_cache(device);
1044 ralloc_free(device->compiler);
1046 vk_physical_device_finish(&device->vk);
1048 vk_free(&instance->vk.alloc, device);
1051 if (master_fd != -1)
1057 anv_physical_device_destroy(struct vk_physical_device *vk_device)
1059 struct anv_physical_device *device =
1060 container_of(vk_device, struct anv_physical_device, vk);
1062 anv_finish_wsi(device);
1063 anv_measure_device_destroy(device);
1064 free(device->engine_info);
1065 anv_physical_device_free_disk_cache(device);
1066 ralloc_free(device->compiler);
1067 ralloc_free(device->perf);
1068 close(device->local_fd);
1069 if (device->master_fd >= 0)
1070 close(device->master_fd);
1071 vk_physical_device_finish(&device->vk);
1072 vk_free(&device->instance->vk.alloc, device);
1075 VkResult anv_EnumerateInstanceExtensionProperties(
1076 const char* pLayerName,
1077 uint32_t* pPropertyCount,
1078 VkExtensionProperties* pProperties)
1081 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1083 return vk_enumerate_instance_extension_properties(
1084 &instance_extensions, pPropertyCount, pProperties);
1088 anv_init_dri_options(struct anv_instance *instance)
1090 driParseOptionInfo(&instance->available_dri_options, anv_dri_options,
1091 ARRAY_SIZE(anv_dri_options));
1092 driParseConfigFiles(&instance->dri_options,
1093 &instance->available_dri_options, 0, "anv", NULL, NULL,
1094 instance->vk.app_info.app_name,
1095 instance->vk.app_info.app_version,
1096 instance->vk.app_info.engine_name,
1097 instance->vk.app_info.engine_version);
1099 instance->assume_full_subgroups =
1100 driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups");
1101 instance->limit_trig_input_range =
1102 driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
1103 instance->sample_mask_out_opengl_behaviour =
1104 driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
1105 instance->lower_depth_range_rate =
1106 driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
1107 instance->fp64_workaround_enabled =
1108 driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled");
1111 VkResult anv_CreateInstance(
1112 const VkInstanceCreateInfo* pCreateInfo,
1113 const VkAllocationCallbacks* pAllocator,
1114 VkInstance* pInstance)
1116 struct anv_instance *instance;
1119 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1121 if (pAllocator == NULL)
1122 pAllocator = vk_default_allocator();
1124 instance = vk_alloc(pAllocator, sizeof(*instance), 8,
1125 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1127 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1129 struct vk_instance_dispatch_table dispatch_table;
1130 vk_instance_dispatch_table_from_entrypoints(
1131 &dispatch_table, &anv_instance_entrypoints, true);
1132 vk_instance_dispatch_table_from_entrypoints(
1133 &dispatch_table, &wsi_instance_entrypoints, false);
1135 result = vk_instance_init(&instance->vk, &instance_extensions,
1136 &dispatch_table, pCreateInfo, pAllocator);
1137 if (result != VK_SUCCESS) {
1138 vk_free(pAllocator, instance);
1139 return vk_error(NULL, result);
1142 instance->vk.physical_devices.try_create_for_drm = anv_physical_device_try_create;
1143 instance->vk.physical_devices.destroy = anv_physical_device_destroy;
1145 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1147 anv_init_dri_options(instance);
1149 intel_driver_ds_init();
1151 *pInstance = anv_instance_to_handle(instance);
1156 void anv_DestroyInstance(
1157 VkInstance _instance,
1158 const VkAllocationCallbacks* pAllocator)
1160 ANV_FROM_HANDLE(anv_instance, instance, _instance);
1165 VG(VALGRIND_DESTROY_MEMPOOL(instance));
1167 driDestroyOptionCache(&instance->dri_options);
1168 driDestroyOptionInfo(&instance->available_dri_options);
1170 vk_instance_finish(&instance->vk);
1171 vk_free(&instance->vk.alloc, instance);
1174 void anv_GetPhysicalDeviceFeatures(
1175 VkPhysicalDevice physicalDevice,
1176 VkPhysicalDeviceFeatures* pFeatures)
1178 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1180 /* Just pick one; they're all the same */
1181 const bool has_astc_ldr =
1182 isl_format_supports_sampling(&pdevice->info,
1183 ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16);
1185 *pFeatures = (VkPhysicalDeviceFeatures) {
1186 .robustBufferAccess = true,
1187 .fullDrawIndexUint32 = true,
1188 .imageCubeArray = true,
1189 .independentBlend = true,
1190 .geometryShader = true,
1191 .tessellationShader = true,
1192 .sampleRateShading = true,
1193 .dualSrcBlend = true,
1195 .multiDrawIndirect = true,
1196 .drawIndirectFirstInstance = true,
1198 .depthBiasClamp = true,
1199 .fillModeNonSolid = true,
1200 .depthBounds = pdevice->info.ver >= 12,
1202 .largePoints = true,
1204 .multiViewport = true,
1205 .samplerAnisotropy = true,
1206 .textureCompressionETC2 = true,
1207 .textureCompressionASTC_LDR = has_astc_ldr,
1208 .textureCompressionBC = true,
1209 .occlusionQueryPrecise = true,
1210 .pipelineStatisticsQuery = true,
1211 .fragmentStoresAndAtomics = true,
1212 .shaderTessellationAndGeometryPointSize = true,
1213 .shaderImageGatherExtended = true,
1214 .shaderStorageImageExtendedFormats = true,
1215 .shaderStorageImageMultisample = false,
1216 .shaderStorageImageReadWithoutFormat = false,
1217 .shaderStorageImageWriteWithoutFormat = true,
1218 .shaderUniformBufferArrayDynamicIndexing = true,
1219 .shaderSampledImageArrayDynamicIndexing = true,
1220 .shaderStorageBufferArrayDynamicIndexing = true,
1221 .shaderStorageImageArrayDynamicIndexing = true,
1222 .shaderClipDistance = true,
1223 .shaderCullDistance = true,
1224 .shaderFloat64 = pdevice->info.has_64bit_float,
1225 .shaderInt64 = true,
1226 .shaderInt16 = true,
1227 .shaderResourceMinLod = true,
1228 .variableMultisampleRate = true,
1229 .inheritedQueries = true,
1232 /* We can't do image stores in vec4 shaders */
1233 pFeatures->vertexPipelineStoresAndAtomics =
1234 pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
1235 pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
1237 struct vk_app_info *app_info = &pdevice->instance->vk.app_info;
1239 /* The new DOOM and Wolfenstein games require depthBounds without
1240 * checking for it. They seem to run fine without it so just claim it's
1241 * there and accept the consequences.
1243 if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0)
1244 pFeatures->depthBounds = true;
1248 anv_get_physical_device_features_1_1(struct anv_physical_device *pdevice,
1249 VkPhysicalDeviceVulkan11Features *f)
1251 assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
1253 f->storageBuffer16BitAccess = true;
1254 f->uniformAndStorageBuffer16BitAccess = true;
1255 f->storagePushConstant16 = true;
1256 f->storageInputOutput16 = false;
1257 f->multiview = true;
1258 f->multiviewGeometryShader = true;
1259 f->multiviewTessellationShader = true;
1260 f->variablePointersStorageBuffer = true;
1261 f->variablePointers = true;
1262 f->protectedMemory = false;
1263 f->samplerYcbcrConversion = true;
1264 f->shaderDrawParameters = true;
1268 anv_get_physical_device_features_1_2(struct anv_physical_device *pdevice,
1269 VkPhysicalDeviceVulkan12Features *f)
1271 assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
1273 f->samplerMirrorClampToEdge = true;
1274 f->drawIndirectCount = true;
1275 f->storageBuffer8BitAccess = true;
1276 f->uniformAndStorageBuffer8BitAccess = true;
1277 f->storagePushConstant8 = true;
1278 f->shaderBufferInt64Atomics = true;
1279 f->shaderSharedInt64Atomics = false;
1280 f->shaderFloat16 = true;
1281 f->shaderInt8 = true;
1283 f->descriptorIndexing = true;
1284 f->shaderInputAttachmentArrayDynamicIndexing = false;
1285 f->shaderUniformTexelBufferArrayDynamicIndexing = true;
1286 f->shaderStorageTexelBufferArrayDynamicIndexing = true;
1287 f->shaderUniformBufferArrayNonUniformIndexing = false;
1288 f->shaderSampledImageArrayNonUniformIndexing = true;
1289 f->shaderStorageBufferArrayNonUniformIndexing = true;
1290 f->shaderStorageImageArrayNonUniformIndexing = true;
1291 f->shaderInputAttachmentArrayNonUniformIndexing = false;
1292 f->shaderUniformTexelBufferArrayNonUniformIndexing = true;
1293 f->shaderStorageTexelBufferArrayNonUniformIndexing = true;
1294 f->descriptorBindingUniformBufferUpdateAfterBind = true;
1295 f->descriptorBindingSampledImageUpdateAfterBind = true;
1296 f->descriptorBindingStorageImageUpdateAfterBind = true;
1297 f->descriptorBindingStorageBufferUpdateAfterBind = true;
1298 f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
1299 f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
1300 f->descriptorBindingUpdateUnusedWhilePending = true;
1301 f->descriptorBindingPartiallyBound = true;
1302 f->descriptorBindingVariableDescriptorCount = true;
1303 f->runtimeDescriptorArray = true;
1305 f->samplerFilterMinmax = true;
1306 f->scalarBlockLayout = true;
1307 f->imagelessFramebuffer = true;
1308 f->uniformBufferStandardLayout = true;
1309 f->shaderSubgroupExtendedTypes = true;
1310 f->separateDepthStencilLayouts = true;
1311 f->hostQueryReset = true;
1312 f->timelineSemaphore = true;
1313 f->bufferDeviceAddress = true;
1314 f->bufferDeviceAddressCaptureReplay = true;
1315 f->bufferDeviceAddressMultiDevice = false;
1316 f->vulkanMemoryModel = true;
1317 f->vulkanMemoryModelDeviceScope = true;
1318 f->vulkanMemoryModelAvailabilityVisibilityChains = true;
1319 f->shaderOutputViewportIndex = true;
1320 f->shaderOutputLayer = true;
1321 f->subgroupBroadcastDynamicId = true;
1325 anv_get_physical_device_features_1_3(struct anv_physical_device *pdevice,
1326 VkPhysicalDeviceVulkan13Features *f)
1328 assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES);
1330 f->robustImageAccess = true;
1331 f->inlineUniformBlock = true;
1332 f->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1333 f->pipelineCreationCacheControl = true;
1334 f->privateData = true;
1335 f->shaderDemoteToHelperInvocation = true;
1336 f->shaderTerminateInvocation = true;
1337 f->subgroupSizeControl = true;
1338 f->computeFullSubgroups = true;
1339 f->synchronization2 = true;
1340 f->textureCompressionASTC_HDR = false;
1341 f->shaderZeroInitializeWorkgroupMemory = true;
1342 f->dynamicRendering = true;
1343 f->shaderIntegerDotProduct = true;
1344 f->maintenance4 = true;
1347 void anv_GetPhysicalDeviceFeatures2(
1348 VkPhysicalDevice physicalDevice,
1349 VkPhysicalDeviceFeatures2* pFeatures)
1351 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1352 anv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1354 VkPhysicalDeviceVulkan11Features core_1_1 = {
1355 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1357 anv_get_physical_device_features_1_1(pdevice, &core_1_1);
1359 VkPhysicalDeviceVulkan12Features core_1_2 = {
1360 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1362 anv_get_physical_device_features_1_2(pdevice, &core_1_2);
1364 VkPhysicalDeviceVulkan13Features core_1_3 = {
1365 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
1367 anv_get_physical_device_features_1_3(pdevice, &core_1_3);
1369 vk_foreach_struct(ext, pFeatures->pNext) {
1370 if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1))
1372 if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2))
1374 if (vk_get_physical_device_core_1_3_feature_ext(ext, &core_1_3))
1377 switch (ext->sType) {
1378 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
1379 VkPhysicalDevice4444FormatsFeaturesEXT *features =
1380 (VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
1381 features->formatA4R4G4B4 = true;
1382 features->formatA4B4G4R4 = false;
1386 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR: {
1387 VkPhysicalDeviceAccelerationStructureFeaturesKHR *features = (void *)ext;
1388 features->accelerationStructure =
1389 ANV_SUPPORT_RT && pdevice->info.has_ray_tracing;
1390 features->accelerationStructureCaptureReplay = false; /* TODO */
1391 features->accelerationStructureIndirectBuild = false; /* TODO */
1392 features->accelerationStructureHostCommands = false;
1393 features->descriptorBindingAccelerationStructureUpdateAfterBind =
1394 ANV_SUPPORT_RT && pdevice->info.has_ray_tracing;
1398 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
1399 VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features = (void *)ext;
1400 features->bufferDeviceAddress = true;
1401 features->bufferDeviceAddressCaptureReplay = false;
1402 features->bufferDeviceAddressMultiDevice = false;
1406 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT: {
1407 VkPhysicalDeviceBorderColorSwizzleFeaturesEXT *features =
1408 (VkPhysicalDeviceBorderColorSwizzleFeaturesEXT *)ext;
1409 features->borderColorSwizzle = true;
1410 features->borderColorSwizzleFromImage = true;
1414 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {
1415 VkPhysicalDeviceColorWriteEnableFeaturesEXT *features =
1416 (VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext;
1417 features->colorWriteEnable = true;
1421 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT: {
1422 VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *features =
1423 (VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *)ext;
1424 features->image2DViewOf3D = true;
1425 features->sampler2DViewOf3D = true;
1429 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1430 VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1431 (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1432 features->computeDerivativeGroupQuads = true;
1433 features->computeDerivativeGroupLinear = true;
1437 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
1438 VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
1439 (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
1440 features->conditionalRendering = true;
1441 features->inheritedConditionalRendering = true;
1445 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1446 VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
1447 (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
1448 features->customBorderColors = true;
1449 features->customBorderColorWithoutFormat = true;
1453 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLAMP_ZERO_ONE_FEATURES_EXT: {
1454 VkPhysicalDeviceDepthClampZeroOneFeaturesEXT *features =
1455 (VkPhysicalDeviceDepthClampZeroOneFeaturesEXT *)ext;
1456 features->depthClampZeroOne = true;
1460 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
1461 VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
1462 (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
1463 features->depthClipEnable = true;
1467 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT: {
1468 VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *features =
1469 (VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *)ext;
1470 features->fragmentShaderSampleInterlock = true;
1471 features->fragmentShaderPixelInterlock = true;
1472 features->fragmentShaderShadingRateInterlock = false;
1476 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_KHR: {
1477 VkPhysicalDeviceGlobalPriorityQueryFeaturesKHR *features =
1478 (VkPhysicalDeviceGlobalPriorityQueryFeaturesKHR *)ext;
1479 features->globalPriorityQuery = true;
1483 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
1484 VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
1485 (VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
1486 features->attachmentFragmentShadingRate = false;
1487 features->pipelineFragmentShadingRate = true;
1488 features->primitiveFragmentShadingRate =
1489 pdevice->info.has_coarse_pixel_primitive_and_cb;
1490 features->attachmentFragmentShadingRate =
1491 pdevice->info.has_coarse_pixel_primitive_and_cb;
1495 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_MIN_LOD_FEATURES_EXT: {
1496 VkPhysicalDeviceImageViewMinLodFeaturesEXT *features =
1497 (VkPhysicalDeviceImageViewMinLodFeaturesEXT *)ext;
1498 features->minLod = true;
1502 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1503 VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1504 (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1505 features->indexTypeUint8 = true;
1509 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
1510 VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
1511 (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
1512 /* Rectangular lines must use the strict algorithm, which is not
1513 * supported for wide lines prior to ICL. See rasterization_mode for
1514 * details and how the HW states are programmed.
1516 features->rectangularLines = pdevice->info.ver >= 10;
1517 features->bresenhamLines = true;
1518 /* Support for Smooth lines with MSAA was removed on gfx11. From the
1519 * BSpec section "Multisample ModesState" table for "AA Line Support
1522 * GFX10:BUG:######## NUM_MULTISAMPLES == 1
1524 * Fortunately, this isn't a case most people care about.
1526 features->smoothLines = pdevice->info.ver < 10;
1527 features->stippledRectangularLines = false;
1528 features->stippledBresenhamLines = true;
1529 features->stippledSmoothLines = false;
1533 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV: {
1534 VkPhysicalDeviceMeshShaderFeaturesNV *features =
1535 (VkPhysicalDeviceMeshShaderFeaturesNV *)ext;
1536 features->taskShader = pdevice->vk.supported_extensions.NV_mesh_shader;
1537 features->meshShader = pdevice->vk.supported_extensions.NV_mesh_shader;
1541 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT: {
1542 VkPhysicalDeviceMeshShaderFeaturesEXT *features =
1543 (VkPhysicalDeviceMeshShaderFeaturesEXT *)ext;
1544 features->meshShader = pdevice->vk.supported_extensions.EXT_mesh_shader;
1545 features->taskShader = pdevice->vk.supported_extensions.EXT_mesh_shader;
1546 features->multiviewMeshShader = false;
1547 features->primitiveFragmentShadingRateMeshShader = features->meshShader;
1548 features->meshShaderQueries = false;
1552 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT: {
1553 VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT *features =
1554 (VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT *)ext;
1555 features->mutableDescriptorType = true;
1559 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
1560 VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
1561 (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
1562 feature->performanceCounterQueryPools = true;
1563 /* HW only supports a single configuration at a time. */
1564 feature->performanceCounterMultipleQueryPools = false;
1568 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1569 VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1570 (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1571 features->pipelineExecutableInfo = true;
1575 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: {
1576 VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features =
1577 (VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *)ext;
1578 features->primitivesGeneratedQuery = true;
1579 features->primitivesGeneratedQueryWithRasterizerDiscard = false;
1580 features->primitivesGeneratedQueryWithNonZeroStreams = false;
1584 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
1585 VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
1586 (VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
1587 features->provokingVertexLast = true;
1588 features->transformFeedbackPreservesProvokingVertex = true;
1592 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_QUERY_FEATURES_KHR: {
1593 VkPhysicalDeviceRayQueryFeaturesKHR *features = (void *)ext;
1594 features->rayQuery = ANV_SUPPORT_RT && pdevice->info.has_ray_tracing;
1598 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR: {
1599 VkPhysicalDeviceRayTracingPipelineFeaturesKHR *features = (void *)ext;
1600 features->rayTracingPipeline = pdevice->info.has_ray_tracing;
1601 features->rayTracingPipelineShaderGroupHandleCaptureReplay = false;
1602 features->rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false;
1603 features->rayTracingPipelineTraceRaysIndirect = true;
1604 features->rayTraversalPrimitiveCulling = true;
1608 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1609 VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
1610 features->robustBufferAccess2 = true;
1611 features->robustImageAccess2 = true;
1612 features->nullDescriptor = true;
1616 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
1617 VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features = (void *)ext;
1618 features->shaderBufferFloat32Atomics = true;
1619 features->shaderBufferFloat32AtomicAdd = pdevice->info.has_lsc;
1620 features->shaderBufferFloat64Atomics =
1621 pdevice->info.has_64bit_float && pdevice->info.has_lsc;
1622 features->shaderBufferFloat64AtomicAdd = false;
1623 features->shaderSharedFloat32Atomics = true;
1624 features->shaderSharedFloat32AtomicAdd = false;
1625 features->shaderSharedFloat64Atomics = false;
1626 features->shaderSharedFloat64AtomicAdd = false;
1627 features->shaderImageFloat32Atomics = true;
1628 features->shaderImageFloat32AtomicAdd = false;
1629 features->sparseImageFloat32Atomics = false;
1630 features->sparseImageFloat32AtomicAdd = false;
1634 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: {
1635 VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features = (void *)ext;
1636 features->shaderBufferFloat16Atomics = pdevice->info.has_lsc;
1637 features->shaderBufferFloat16AtomicAdd = false;
1638 features->shaderBufferFloat16AtomicMinMax = pdevice->info.has_lsc;
1639 features->shaderBufferFloat32AtomicMinMax = true;
1640 features->shaderBufferFloat64AtomicMinMax =
1641 pdevice->info.has_64bit_float && pdevice->info.has_lsc;
1642 features->shaderSharedFloat16Atomics = pdevice->info.has_lsc;
1643 features->shaderSharedFloat16AtomicAdd = false;
1644 features->shaderSharedFloat16AtomicMinMax = pdevice->info.has_lsc;
1645 features->shaderSharedFloat32AtomicMinMax = true;
1646 features->shaderSharedFloat64AtomicMinMax = false;
1647 features->shaderImageFloat32AtomicMinMax = false;
1648 features->sparseImageFloat32AtomicMinMax = false;
1652 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
1653 VkPhysicalDeviceShaderClockFeaturesKHR *features =
1654 (VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
1655 features->shaderSubgroupClock = true;
1656 features->shaderDeviceClock = false;
1660 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_FUNCTIONS_2_FEATURES_INTEL: {
1661 VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *features =
1662 (VkPhysicalDeviceShaderIntegerFunctions2FeaturesINTEL *)ext;
1663 features->shaderIntegerFunctions2 = true;
1667 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_FEATURES_EXT: {
1668 VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *features =
1669 (VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *)ext;
1670 features->shaderModuleIdentifier = true;
1674 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR: {
1675 VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *features =
1676 (VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *)ext;
1677 features->shaderSubgroupUniformControlFlow = true;
1681 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
1682 VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
1683 (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
1684 features->texelBufferAlignment = true;
1688 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
1689 VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
1690 (VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
1691 features->transformFeedback = true;
1692 features->geometryStreams = true;
1696 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
1697 VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
1698 (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
1699 features->vertexAttributeInstanceRateDivisor = true;
1700 features->vertexAttributeInstanceRateZeroDivisor = true;
1704 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {
1705 VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =
1706 (VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;
1707 features->workgroupMemoryExplicitLayout = true;
1708 features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
1709 features->workgroupMemoryExplicitLayout8BitAccess = true;
1710 features->workgroupMemoryExplicitLayout16BitAccess = true;
1714 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1715 VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1716 (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;
1717 features->ycbcrImageArrays = true;
1721 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
1722 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
1723 (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *)ext;
1724 features->extendedDynamicState = true;
1728 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
1729 VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =
1730 (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;
1731 features->extendedDynamicState2 = true;
1732 features->extendedDynamicState2LogicOp = true;
1733 features->extendedDynamicState2PatchControlPoints = false;
1737 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_FEATURES_EXT: {
1738 VkPhysicalDeviceExtendedDynamicState3FeaturesEXT *features =
1739 (VkPhysicalDeviceExtendedDynamicState3FeaturesEXT *)ext;
1740 features->extendedDynamicState3PolygonMode = true;
1741 features->extendedDynamicState3TessellationDomainOrigin = true;
1742 features->extendedDynamicState3RasterizationStream = true;
1743 features->extendedDynamicState3LineStippleEnable = true;
1744 features->extendedDynamicState3LineRasterizationMode = true;
1745 features->extendedDynamicState3LogicOpEnable = true;
1746 features->extendedDynamicState3AlphaToOneEnable = true;
1747 features->extendedDynamicState3DepthClipEnable = true;
1748 features->extendedDynamicState3DepthClampEnable = true;
1749 features->extendedDynamicState3DepthClipNegativeOneToOne = true;
1750 features->extendedDynamicState3ProvokingVertexMode = true;
1751 features->extendedDynamicState3ColorBlendEnable = true;
1752 features->extendedDynamicState3ColorWriteMask = true;
1753 features->extendedDynamicState3ColorBlendEquation = true;
1754 features->extendedDynamicState3SampleLocationsEnable = true;
1755 features->extendedDynamicState3SampleMask = true;
1757 features->extendedDynamicState3RasterizationSamples = false;
1758 features->extendedDynamicState3AlphaToCoverageEnable = false;
1759 features->extendedDynamicState3ConservativeRasterizationMode = false;
1760 features->extendedDynamicState3ExtraPrimitiveOverestimationSize = false;
1761 features->extendedDynamicState3ViewportWScalingEnable = false;
1762 features->extendedDynamicState3ViewportSwizzle = false;
1763 features->extendedDynamicState3ShadingRateImageEnable = false;
1764 features->extendedDynamicState3CoverageToColorEnable = false;
1765 features->extendedDynamicState3CoverageToColorLocation = false;
1766 features->extendedDynamicState3CoverageModulationMode = false;
1767 features->extendedDynamicState3CoverageModulationTableEnable = false;
1768 features->extendedDynamicState3CoverageModulationTable = false;
1769 features->extendedDynamicState3CoverageReductionMode = false;
1770 features->extendedDynamicState3RepresentativeFragmentTestEnable = false;
1771 features->extendedDynamicState3ColorBlendAdvanced = false;
1776 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: {
1777 VkPhysicalDeviceMultiDrawFeaturesEXT *features = (VkPhysicalDeviceMultiDrawFeaturesEXT *)ext;
1778 features->multiDraw = true;
1782 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_NON_SEAMLESS_CUBE_MAP_FEATURES_EXT : {
1783 VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT *features =
1784 (VkPhysicalDeviceNonSeamlessCubeMapFeaturesEXT *)ext;
1785 features->nonSeamlessCubeMap = true;
1789 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: {
1790 VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *features =
1791 (VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *)ext;
1792 features->primitiveTopologyListRestart = true;
1793 features->primitiveTopologyPatchListRestart = true;
1797 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT: {
1798 VkPhysicalDeviceDepthClipControlFeaturesEXT *features =
1799 (VkPhysicalDeviceDepthClipControlFeaturesEXT *)ext;
1800 features->depthClipControl = true;
1804 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENT_ID_FEATURES_KHR: {
1805 VkPhysicalDevicePresentIdFeaturesKHR *features =
1806 (VkPhysicalDevicePresentIdFeaturesKHR *) ext;
1807 features->presentId = pdevice->vk.supported_extensions.KHR_present_id;
1811 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENT_WAIT_FEATURES_KHR: {
1812 VkPhysicalDevicePresentWaitFeaturesKHR *features =
1813 (VkPhysicalDevicePresentWaitFeaturesKHR *) ext;
1814 features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait;
1819 anv_debug_ignored_stype(ext->sType);
1826 #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64
1828 #define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
1829 #define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256
1831 #define MAX_CUSTOM_BORDER_COLORS 4096
1833 void anv_GetPhysicalDeviceProperties(
1834 VkPhysicalDevice physicalDevice,
1835 VkPhysicalDeviceProperties* pProperties)
1837 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
1838 const struct intel_device_info *devinfo = &pdevice->info;
1840 const uint32_t max_ssbos = UINT16_MAX;
1841 const uint32_t max_textures = UINT16_MAX;
1842 const uint32_t max_samplers = UINT16_MAX;
1843 const uint32_t max_images = UINT16_MAX;
1845 /* Claim a high per-stage limit since we have bindless. */
1846 const uint32_t max_per_stage = UINT32_MAX;
1848 const uint32_t max_workgroup_size =
1849 MIN2(1024, 32 * devinfo->max_cs_workgroup_threads);
1851 VkSampleCountFlags sample_counts =
1852 isl_device_get_sample_counts(&pdevice->isl_dev);
1855 VkPhysicalDeviceLimits limits = {
1856 .maxImageDimension1D = (1 << 14),
1857 .maxImageDimension2D = (1 << 14),
1858 .maxImageDimension3D = (1 << 11),
1859 .maxImageDimensionCube = (1 << 14),
1860 .maxImageArrayLayers = (1 << 11),
1861 .maxTexelBufferElements = 128 * 1024 * 1024,
1862 .maxUniformBufferRange = pdevice->compiler->indirect_ubos_use_sampler ? (1u << 27) : (1u << 30),
1863 .maxStorageBufferRange = pdevice->isl_dev.max_buffer_size,
1864 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1865 .maxMemoryAllocationCount = UINT32_MAX,
1866 .maxSamplerAllocationCount = 64 * 1024,
1867 .bufferImageGranularity = 1,
1868 .sparseAddressSpaceSize = 0,
1869 .maxBoundDescriptorSets = MAX_SETS,
1870 .maxPerStageDescriptorSamplers = max_samplers,
1871 .maxPerStageDescriptorUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
1872 .maxPerStageDescriptorStorageBuffers = max_ssbos,
1873 .maxPerStageDescriptorSampledImages = max_textures,
1874 .maxPerStageDescriptorStorageImages = max_images,
1875 .maxPerStageDescriptorInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
1876 .maxPerStageResources = max_per_stage,
1877 .maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
1878 .maxDescriptorSetUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, /* number of stages * maxPerStageDescriptorUniformBuffers */
1879 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1880 .maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */
1881 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
1882 .maxDescriptorSetSampledImages = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
1883 .maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */
1884 .maxDescriptorSetInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
1885 .maxVertexInputAttributes = MAX_VES,
1886 .maxVertexInputBindings = MAX_VBS,
1887 /* Broadwell PRMs: Volume 2d: Command Reference: Structures:
1889 * VERTEX_ELEMENT_STATE::Source Element Offset: [0,2047]
1891 .maxVertexInputAttributeOffset = 2047,
1892 /* Skylake PRMs: Volume 2d: Command Reference: Structures:
1894 * VERTEX_BUFFER_STATE::Buffer Pitch: [0,4095]
1896 .maxVertexInputBindingStride = 4095,
1897 .maxVertexOutputComponents = 128,
1898 .maxTessellationGenerationLevel = 64,
1899 .maxTessellationPatchSize = 32,
1900 .maxTessellationControlPerVertexInputComponents = 128,
1901 .maxTessellationControlPerVertexOutputComponents = 128,
1902 .maxTessellationControlPerPatchOutputComponents = 128,
1903 .maxTessellationControlTotalOutputComponents = 2048,
1904 .maxTessellationEvaluationInputComponents = 128,
1905 .maxTessellationEvaluationOutputComponents = 128,
1906 .maxGeometryShaderInvocations = 32,
1907 .maxGeometryInputComponents = 128,
1908 .maxGeometryOutputComponents = 128,
1909 .maxGeometryOutputVertices = 256,
1910 .maxGeometryTotalOutputComponents = 1024,
1911 .maxFragmentInputComponents = 116, /* 128 components - (PSIZ, CLIP_DIST0, CLIP_DIST1) */
1912 .maxFragmentOutputAttachments = 8,
1913 .maxFragmentDualSrcAttachments = 1,
1914 .maxFragmentCombinedOutputResources = MAX_RTS + max_ssbos + max_images,
1915 .maxComputeSharedMemorySize = 64 * 1024,
1916 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1917 .maxComputeWorkGroupInvocations = max_workgroup_size,
1918 .maxComputeWorkGroupSize = {
1923 .subPixelPrecisionBits = 8,
1924 .subTexelPrecisionBits = 8,
1925 .mipmapPrecisionBits = 8,
1926 .maxDrawIndexedIndexValue = UINT32_MAX,
1927 .maxDrawIndirectCount = UINT32_MAX,
1928 .maxSamplerLodBias = 16,
1929 .maxSamplerAnisotropy = 16,
1930 .maxViewports = MAX_VIEWPORTS,
1931 .maxViewportDimensions = { (1 << 14), (1 << 14) },
1932 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1933 .viewportSubPixelBits = 13, /* We take a float? */
1934 .minMemoryMapAlignment = 4096, /* A page */
1935 /* The dataport requires texel alignment so we need to assume a worst
1936 * case of R32G32B32A32 which is 16 bytes.
1938 .minTexelBufferOffsetAlignment = 16,
1939 .minUniformBufferOffsetAlignment = ANV_UBO_ALIGNMENT,
1940 .minStorageBufferOffsetAlignment = ANV_SSBO_ALIGNMENT,
1941 .minTexelOffset = -8,
1942 .maxTexelOffset = 7,
1943 .minTexelGatherOffset = -32,
1944 .maxTexelGatherOffset = 31,
1945 .minInterpolationOffset = -0.5,
1946 .maxInterpolationOffset = 0.4375,
1947 .subPixelInterpolationOffsetBits = 4,
1948 .maxFramebufferWidth = (1 << 14),
1949 .maxFramebufferHeight = (1 << 14),
1950 .maxFramebufferLayers = (1 << 11),
1951 .framebufferColorSampleCounts = sample_counts,
1952 .framebufferDepthSampleCounts = sample_counts,
1953 .framebufferStencilSampleCounts = sample_counts,
1954 .framebufferNoAttachmentsSampleCounts = sample_counts,
1955 .maxColorAttachments = MAX_RTS,
1956 .sampledImageColorSampleCounts = sample_counts,
1957 .sampledImageIntegerSampleCounts = sample_counts,
1958 .sampledImageDepthSampleCounts = sample_counts,
1959 .sampledImageStencilSampleCounts = sample_counts,
1960 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1961 .maxSampleMaskWords = 1,
1962 .timestampComputeAndGraphics = true,
1963 .timestampPeriod = 1000000000.0 / devinfo->timestamp_frequency,
1964 .maxClipDistances = 8,
1965 .maxCullDistances = 8,
1966 .maxCombinedClipAndCullDistances = 8,
1967 .discreteQueuePriorities = 2,
1968 .pointSizeRange = { 0.125, 255.875 },
1969 /* While SKL and up support much wider lines than we are setting here,
1970 * in practice we run into conformance issues if we go past this limit.
1971 * Since the Windows driver does the same, it's probably fair to assume
1972 * that no one needs more than this.
1974 .lineWidthRange = { 0.0, 8.0 },
1975 .pointSizeGranularity = (1.0 / 8.0),
1976 .lineWidthGranularity = (1.0 / 128.0),
1977 .strictLines = false,
1978 .standardSampleLocations = true,
1979 .optimalBufferCopyOffsetAlignment = 128,
1980 .optimalBufferCopyRowPitchAlignment = 128,
1981 .nonCoherentAtomSize = 64,
1984 *pProperties = (VkPhysicalDeviceProperties) {
1985 .apiVersion = ANV_API_VERSION,
1986 .driverVersion = vk_get_driver_version(),
1988 .deviceID = pdevice->info.pci_device_id,
1989 .deviceType = pdevice->info.has_local_mem ?
1990 VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU :
1991 VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1993 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
1996 snprintf(pProperties->deviceName, sizeof(pProperties->deviceName),
1997 "%s", pdevice->info.name);
1998 memcpy(pProperties->pipelineCacheUUID,
1999 pdevice->pipeline_cache_uuid, VK_UUID_SIZE);
2003 anv_get_physical_device_properties_1_1(struct anv_physical_device *pdevice,
2004 VkPhysicalDeviceVulkan11Properties *p)
2006 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
2008 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
2009 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
2010 memset(p->deviceLUID, 0, VK_LUID_SIZE);
2011 p->deviceNodeMask = 0;
2012 p->deviceLUIDValid = false;
2014 p->subgroupSize = BRW_SUBGROUP_SIZE;
2015 VkShaderStageFlags scalar_stages = 0;
2016 for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
2017 if (pdevice->compiler->scalar_stage[stage])
2018 scalar_stages |= mesa_to_vk_shader_stage(stage);
2020 if (pdevice->vk.supported_extensions.KHR_ray_tracing_pipeline) {
2021 scalar_stages |= VK_SHADER_STAGE_RAYGEN_BIT_KHR |
2022 VK_SHADER_STAGE_ANY_HIT_BIT_KHR |
2023 VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR |
2024 VK_SHADER_STAGE_MISS_BIT_KHR |
2025 VK_SHADER_STAGE_INTERSECTION_BIT_KHR |
2026 VK_SHADER_STAGE_CALLABLE_BIT_KHR;
2028 if (pdevice->vk.supported_extensions.NV_mesh_shader ||
2029 pdevice->vk.supported_extensions.EXT_mesh_shader) {
2030 scalar_stages |= VK_SHADER_STAGE_TASK_BIT_EXT |
2031 VK_SHADER_STAGE_MESH_BIT_EXT;
2033 p->subgroupSupportedStages = scalar_stages;
2034 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
2035 VK_SUBGROUP_FEATURE_VOTE_BIT |
2036 VK_SUBGROUP_FEATURE_BALLOT_BIT |
2037 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
2038 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
2039 VK_SUBGROUP_FEATURE_QUAD_BIT |
2040 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
2041 VK_SUBGROUP_FEATURE_CLUSTERED_BIT;
2042 p->subgroupQuadOperationsInAllStages = true;
2044 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_USER_CLIP_PLANES_ONLY;
2045 p->maxMultiviewViewCount = 16;
2046 p->maxMultiviewInstanceIndex = UINT32_MAX / 16;
2047 p->protectedNoFault = false;
2048 /* This value doesn't matter for us today as our per-stage descriptors are
2051 p->maxPerSetDescriptors = 1024;
2052 p->maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE;
2056 anv_get_physical_device_properties_1_2(struct anv_physical_device *pdevice,
2057 VkPhysicalDeviceVulkan12Properties *p)
2059 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
2061 p->driverID = VK_DRIVER_ID_INTEL_OPEN_SOURCE_MESA;
2062 memset(p->driverName, 0, sizeof(p->driverName));
2063 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
2064 "Intel open-source Mesa driver");
2065 memset(p->driverInfo, 0, sizeof(p->driverInfo));
2066 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
2067 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
2069 p->conformanceVersion = (VkConformanceVersion) {
2076 p->denormBehaviorIndependence =
2077 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
2078 p->roundingModeIndependence =
2079 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_NONE;
2081 /* Broadwell does not support HF denorms and there are restrictions
2082 * other gens. According to Kabylake's PRM:
2084 * "math - Extended Math Function
2086 * Restriction : Half-float denorms are always retained."
2088 p->shaderDenormFlushToZeroFloat16 = false;
2089 p->shaderDenormPreserveFloat16 = pdevice->info.ver > 8;
2090 p->shaderRoundingModeRTEFloat16 = true;
2091 p->shaderRoundingModeRTZFloat16 = true;
2092 p->shaderSignedZeroInfNanPreserveFloat16 = true;
2094 p->shaderDenormFlushToZeroFloat32 = true;
2095 p->shaderDenormPreserveFloat32 = true;
2096 p->shaderRoundingModeRTEFloat32 = true;
2097 p->shaderRoundingModeRTZFloat32 = true;
2098 p->shaderSignedZeroInfNanPreserveFloat32 = true;
2100 p->shaderDenormFlushToZeroFloat64 = true;
2101 p->shaderDenormPreserveFloat64 = true;
2102 p->shaderRoundingModeRTEFloat64 = true;
2103 p->shaderRoundingModeRTZFloat64 = true;
2104 p->shaderSignedZeroInfNanPreserveFloat64 = true;
2106 /* It's a bit hard to exactly map our implementation to the limits
2107 * described by Vulkan. The bindless surface handle in the extended
2108 * message descriptors is 20 bits and it's an index into the table of
2109 * RENDER_SURFACE_STATE structs that starts at bindless surface base
2110 * address. This means that we can have at must 1M surface states
2111 * allocated at any given time. Since most image views take two
2112 * descriptors, this means we have a limit of about 500K image views.
2114 * However, since we allocate surface states at vkCreateImageView time,
2115 * this means our limit is actually something on the order of 500K image
2116 * views allocated at any time. The actual limit describe by Vulkan, on
2117 * the other hand, is a limit of how many you can have in a descriptor set.
2118 * Assuming anyone using 1M descriptors will be using the same image view
2119 * twice a bunch of times (or a bunch of null descriptors), we can safely
2120 * advertise a larger limit here.
2122 const unsigned max_bindless_views = 1 << 20;
2123 p->maxUpdateAfterBindDescriptorsInAllPools = max_bindless_views;
2124 p->shaderUniformBufferArrayNonUniformIndexingNative = false;
2125 p->shaderSampledImageArrayNonUniformIndexingNative = false;
2126 p->shaderStorageBufferArrayNonUniformIndexingNative = true;
2127 p->shaderStorageImageArrayNonUniformIndexingNative = false;
2128 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
2129 p->robustBufferAccessUpdateAfterBind = true;
2130 p->quadDivergentImplicitLod = false;
2131 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_bindless_views;
2132 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
2133 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
2134 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_bindless_views;
2135 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_bindless_views;
2136 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
2137 p->maxPerStageUpdateAfterBindResources = UINT32_MAX;
2138 p->maxDescriptorSetUpdateAfterBindSamplers = max_bindless_views;
2139 p->maxDescriptorSetUpdateAfterBindUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
2140 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
2141 p->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX;
2142 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
2143 p->maxDescriptorSetUpdateAfterBindSampledImages = max_bindless_views;
2144 p->maxDescriptorSetUpdateAfterBindStorageImages = max_bindless_views;
2145 p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
2147 /* We support all of the depth resolve modes */
2148 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
2149 VK_RESOLVE_MODE_AVERAGE_BIT |
2150 VK_RESOLVE_MODE_MIN_BIT |
2151 VK_RESOLVE_MODE_MAX_BIT;
2152 /* Average doesn't make sense for stencil so we don't support that */
2153 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT |
2154 VK_RESOLVE_MODE_MIN_BIT |
2155 VK_RESOLVE_MODE_MAX_BIT;
2156 p->independentResolveNone = true;
2157 p->independentResolve = true;
2159 p->filterMinmaxSingleComponentFormats = true;
2160 p->filterMinmaxImageComponentMapping = true;
2162 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
2164 p->framebufferIntegerColorSampleCounts =
2165 isl_device_get_sample_counts(&pdevice->isl_dev);
2169 anv_get_physical_device_properties_1_3(struct anv_physical_device *pdevice,
2170 VkPhysicalDeviceVulkan13Properties *p)
2172 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES);
2174 p->minSubgroupSize = 8;
2175 p->maxSubgroupSize = 32;
2176 p->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
2177 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT |
2178 VK_SHADER_STAGE_TASK_BIT_EXT |
2179 VK_SHADER_STAGE_MESH_BIT_EXT;
2181 p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
2182 p->maxPerStageDescriptorInlineUniformBlocks =
2183 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2184 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
2185 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2186 p->maxDescriptorSetInlineUniformBlocks =
2187 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2188 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
2189 MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS;
2190 p->maxInlineUniformTotalSize = UINT16_MAX;
2192 p->integerDotProduct8BitUnsignedAccelerated = false;
2193 p->integerDotProduct8BitSignedAccelerated = false;
2194 p->integerDotProduct8BitMixedSignednessAccelerated = false;
2195 p->integerDotProduct4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
2196 p->integerDotProduct4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
2197 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
2198 p->integerDotProduct16BitUnsignedAccelerated = false;
2199 p->integerDotProduct16BitSignedAccelerated = false;
2200 p->integerDotProduct16BitMixedSignednessAccelerated = false;
2201 p->integerDotProduct32BitUnsignedAccelerated = false;
2202 p->integerDotProduct32BitSignedAccelerated = false;
2203 p->integerDotProduct32BitMixedSignednessAccelerated = false;
2204 p->integerDotProduct64BitUnsignedAccelerated = false;
2205 p->integerDotProduct64BitSignedAccelerated = false;
2206 p->integerDotProduct64BitMixedSignednessAccelerated = false;
2207 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
2208 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
2209 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
2210 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = pdevice->info.ver >= 12;
2211 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = pdevice->info.ver >= 12;
2212 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = pdevice->info.ver >= 12;
2213 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
2214 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
2215 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
2216 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
2217 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
2218 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
2219 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
2220 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
2221 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
2223 /* From the SKL PRM Vol. 2d, docs for RENDER_SURFACE_STATE::Surface
2226 * "For SURFTYPE_BUFFER non-rendertarget surfaces, this field
2227 * specifies the base address of the first element of the surface,
2228 * computed in software by adding the surface base address to the
2229 * byte offset of the element in the buffer. The base address must
2230 * be aligned to element size."
2232 * The typed dataport messages require that things be texel aligned.
2233 * Otherwise, we may just load/store the wrong data or, in the worst
2234 * case, there may be hangs.
2236 p->storageTexelBufferOffsetAlignmentBytes = 16;
2237 p->storageTexelBufferOffsetSingleTexelAlignment = true;
2239 /* The sampler, however, is much more forgiving and it can handle
2240 * arbitrary byte alignment for linear and buffer surfaces. It's
2241 * hard to find a good PRM citation for this but years of empirical
2242 * experience demonstrate that this is true.
2244 p->uniformTexelBufferOffsetAlignmentBytes = 1;
2245 p->uniformTexelBufferOffsetSingleTexelAlignment = false;
2247 p->maxBufferSize = pdevice->isl_dev.max_buffer_size;
2250 void anv_GetPhysicalDeviceProperties2(
2251 VkPhysicalDevice physicalDevice,
2252 VkPhysicalDeviceProperties2* pProperties)
2254 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2256 anv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
2258 VkPhysicalDeviceVulkan11Properties core_1_1 = {
2259 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
2261 anv_get_physical_device_properties_1_1(pdevice, &core_1_1);
2263 VkPhysicalDeviceVulkan12Properties core_1_2 = {
2264 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
2266 anv_get_physical_device_properties_1_2(pdevice, &core_1_2);
2268 VkPhysicalDeviceVulkan13Properties core_1_3 = {
2269 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
2271 anv_get_physical_device_properties_1_3(pdevice, &core_1_3);
2273 vk_foreach_struct(ext, pProperties->pNext) {
2274 if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
2276 if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
2278 if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
2281 switch (ext->sType) {
2282 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR: {
2283 VkPhysicalDeviceAccelerationStructurePropertiesKHR *props = (void *)ext;
2284 props->maxGeometryCount = (1u << 24) - 1;
2285 props->maxInstanceCount = (1u << 24) - 1;
2286 props->maxPrimitiveCount = (1u << 29) - 1;
2287 props->maxPerStageDescriptorAccelerationStructures = UINT16_MAX;
2288 props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = UINT16_MAX;
2289 props->maxDescriptorSetAccelerationStructures = UINT16_MAX;
2290 props->maxDescriptorSetUpdateAfterBindAccelerationStructures = UINT16_MAX;
2291 props->minAccelerationStructureScratchOffsetAlignment = 64;
2295 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
2296 /* TODO: Real limits */
2297 VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
2298 (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
2299 /* There's nothing in the public docs about this value as far as I
2300 * can tell. However, this is the value the Windows driver reports
2301 * and there's a comment on a rejected HW feature in the internal
2304 * "This is similar to conservative rasterization, except the
2305 * primitive area is not extended by 1/512 and..."
2307 * That's a bit of an obtuse reference but it's the best we've got
2310 properties->primitiveOverestimationSize = 1.0f / 512.0f;
2311 properties->maxExtraPrimitiveOverestimationSize = 0.0f;
2312 properties->extraPrimitiveOverestimationSizeGranularity = 0.0f;
2313 properties->primitiveUnderestimation = false;
2314 properties->conservativePointAndLineRasterization = false;
2315 properties->degenerateTrianglesRasterized = true;
2316 properties->degenerateLinesRasterized = false;
2317 properties->fullyCoveredFragmentShaderInputVariable = false;
2318 properties->conservativeRasterizationPostDepthCoverage = true;
2322 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
2323 VkPhysicalDeviceCustomBorderColorPropertiesEXT *properties =
2324 (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
2325 properties->maxCustomBorderColorSamplers = MAX_CUSTOM_BORDER_COLORS;
2329 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
2330 VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
2331 (VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
2332 props->primitiveFragmentShadingRateWithMultipleViewports =
2333 pdevice->info.has_coarse_pixel_primitive_and_cb;
2334 props->layeredShadingRateAttachments = pdevice->info.has_coarse_pixel_primitive_and_cb;
2335 props->fragmentShadingRateNonTrivialCombinerOps =
2336 pdevice->info.has_coarse_pixel_primitive_and_cb;
2337 props->maxFragmentSize = (VkExtent2D) { 4, 4 };
2338 props->maxFragmentSizeAspectRatio =
2339 pdevice->info.has_coarse_pixel_primitive_and_cb ?
2341 props->maxFragmentShadingRateCoverageSamples = 4 * 4 *
2342 (pdevice->info.has_coarse_pixel_primitive_and_cb ? 4 : 16);
2343 props->maxFragmentShadingRateRasterizationSamples =
2344 pdevice->info.has_coarse_pixel_primitive_and_cb ?
2345 VK_SAMPLE_COUNT_4_BIT : VK_SAMPLE_COUNT_16_BIT;
2346 props->fragmentShadingRateWithShaderDepthStencilWrites = false;
2347 props->fragmentShadingRateWithSampleMask = true;
2348 props->fragmentShadingRateWithShaderSampleMask = false;
2349 props->fragmentShadingRateWithConservativeRasterization = true;
2350 props->fragmentShadingRateWithFragmentShaderInterlock = true;
2351 props->fragmentShadingRateWithCustomSampleLocations = true;
2353 /* Fix in DG2_G10_C0 and DG2_G11_B0. Consider any other Sku as having
2356 props->fragmentShadingRateStrictMultiplyCombiner =
2357 pdevice->info.platform == INTEL_PLATFORM_DG2_G10 ?
2358 pdevice->info.revision >= 8 :
2359 pdevice->info.platform == INTEL_PLATFORM_DG2_G11 ?
2360 pdevice->info.revision >= 4 : true;
2362 if (pdevice->info.has_coarse_pixel_primitive_and_cb) {
2363 props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
2364 props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 8, 8 };
2365 props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
2367 /* Those must be 0 if attachmentFragmentShadingRate is not
2370 props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
2371 props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D) { 0, 0 };
2372 props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 0;
2377 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
2378 VkPhysicalDeviceDrmPropertiesEXT *props =
2379 (VkPhysicalDeviceDrmPropertiesEXT *)ext;
2381 props->hasPrimary = pdevice->has_master;
2382 props->primaryMajor = pdevice->master_major;
2383 props->primaryMinor = pdevice->master_minor;
2385 props->hasRender = pdevice->has_local;
2386 props->renderMajor = pdevice->local_major;
2387 props->renderMinor = pdevice->local_minor;
2392 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_3_PROPERTIES_EXT: {
2393 VkPhysicalDeviceExtendedDynamicState3PropertiesEXT *props =
2394 (VkPhysicalDeviceExtendedDynamicState3PropertiesEXT *) ext;
2395 props->dynamicPrimitiveTopologyUnrestricted = true;
2399 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
2400 VkPhysicalDeviceExternalMemoryHostPropertiesEXT *props =
2401 (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
2402 /* Userptr needs page aligned memory. */
2403 props->minImportedHostPointerAlignment = 4096;
2407 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
2408 VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
2409 (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
2410 /* In the Skylake PRM Vol. 7, subsection titled "GIQ (Diamond)
2411 * Sampling Rules - Legacy Mode", it says the following:
2413 * "Note that the device divides a pixel into a 16x16 array of
2414 * subpixels, referenced by their upper left corners."
2416 * This is the only known reference in the PRMs to the subpixel
2417 * precision of line rasterization and a "16x16 array of subpixels"
2418 * implies 4 subpixel precision bits. Empirical testing has shown
2419 * that 4 subpixel precision bits applies to all line rasterization
2422 props->lineSubPixelPrecisionBits = 4;
2426 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES: {
2427 VkPhysicalDeviceMaintenance4Properties *properties =
2428 (VkPhysicalDeviceMaintenance4Properties *)ext;
2429 properties->maxBufferSize = pdevice->isl_dev.max_buffer_size;
2433 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV: {
2434 VkPhysicalDeviceMeshShaderPropertiesNV *props =
2435 (VkPhysicalDeviceMeshShaderPropertiesNV *)ext;
2437 /* Bounded by the maximum representable size in
2438 * 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize. Same for Task.
2440 const uint32_t max_slm_size = 64 * 1024;
2442 /* Bounded by the maximum representable size in
2443 * 3DSTATE_MESH_SHADER_BODY::LocalXMaximum. Same for Task.
2445 const uint32_t max_workgroup_size = 1 << 10;
2447 /* Bounded by the maximum representable count in
2448 * 3DSTATE_MESH_SHADER_BODY::MaximumPrimitiveCount.
2450 const uint32_t max_primitives = 1024;
2452 /* TODO(mesh): Multiview. */
2453 const uint32_t max_view_count = 1;
2455 props->maxDrawMeshTasksCount = UINT32_MAX;
2457 /* TODO(mesh): Implement workgroup Y and Z sizes larger than one by
2458 * mapping them to/from the single value that HW provides us
2459 * (currently used for X).
2462 props->maxTaskWorkGroupInvocations = max_workgroup_size;
2463 props->maxTaskWorkGroupSize[0] = max_workgroup_size;
2464 props->maxTaskWorkGroupSize[1] = 1;
2465 props->maxTaskWorkGroupSize[2] = 1;
2466 props->maxTaskTotalMemorySize = max_slm_size;
2467 props->maxTaskOutputCount = UINT16_MAX;
2469 props->maxMeshWorkGroupInvocations = max_workgroup_size;
2470 props->maxMeshWorkGroupSize[0] = max_workgroup_size;
2471 props->maxMeshWorkGroupSize[1] = 1;
2472 props->maxMeshWorkGroupSize[2] = 1;
2473 props->maxMeshTotalMemorySize = max_slm_size / max_view_count;
2474 props->maxMeshOutputPrimitives = max_primitives / max_view_count;
2475 props->maxMeshMultiviewViewCount = max_view_count;
2477 /* Depends on what indices can be represented with IndexFormat. For
2478 * now we always use U32, so bound to the maximum unique vertices we
2479 * need for the maximum primitives.
2481 * TODO(mesh): Revisit this if we drop "U32" IndexFormat when adding
2482 * support for others.
2484 props->maxMeshOutputVertices = 3 * props->maxMeshOutputPrimitives;
2487 props->meshOutputPerVertexGranularity = 32;
2488 props->meshOutputPerPrimitiveGranularity = 32;
2493 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_EXT: {
2494 VkPhysicalDeviceMeshShaderPropertiesEXT *properties =
2495 (VkPhysicalDeviceMeshShaderPropertiesEXT *)ext;
2497 /* Bounded by the maximum representable size in
2498 * 3DSTATE_MESH_SHADER_BODY::SharedLocalMemorySize. Same for Task.
2500 const uint32_t max_slm_size = 64 * 1024;
2502 /* Bounded by the maximum representable size in
2503 * 3DSTATE_MESH_SHADER_BODY::LocalXMaximum. Same for Task.
2505 const uint32_t max_workgroup_size = 1 << 10;
2507 /* 3DMESH_3D limitation. */
2508 const uint32_t max_threadgroup_count = 1 << 22;
2510 /* 3DMESH_3D limitation. */
2511 const uint32_t max_threadgroup_xyz = 65535;
2513 const uint32_t max_urb_size = 64 * 1024;
2515 properties->maxTaskWorkGroupTotalCount = max_threadgroup_count;
2516 properties->maxTaskWorkGroupCount[0] = max_threadgroup_xyz;
2517 properties->maxTaskWorkGroupCount[1] = max_threadgroup_xyz;
2518 properties->maxTaskWorkGroupCount[2] = max_threadgroup_xyz;
2520 properties->maxTaskWorkGroupInvocations = max_workgroup_size;
2521 properties->maxTaskWorkGroupSize[0] = max_workgroup_size;
2522 properties->maxTaskWorkGroupSize[1] = max_workgroup_size;
2523 properties->maxTaskWorkGroupSize[2] = max_workgroup_size;
2525 /* TUE header with padding */
2526 const uint32_t task_payload_reserved = 32;
2528 properties->maxTaskPayloadSize = max_urb_size - task_payload_reserved;
2529 properties->maxTaskSharedMemorySize = max_slm_size;
2530 properties->maxTaskPayloadAndSharedMemorySize =
2531 properties->maxTaskPayloadSize +
2532 properties->maxTaskSharedMemorySize;
2534 properties->maxMeshWorkGroupTotalCount = max_threadgroup_count;
2535 properties->maxMeshWorkGroupCount[0] = max_threadgroup_xyz;
2536 properties->maxMeshWorkGroupCount[1] = max_threadgroup_xyz;
2537 properties->maxMeshWorkGroupCount[2] = max_threadgroup_xyz;
2539 properties->maxMeshWorkGroupInvocations = max_workgroup_size;
2540 properties->maxMeshWorkGroupSize[0] = max_workgroup_size;
2541 properties->maxMeshWorkGroupSize[1] = max_workgroup_size;
2542 properties->maxMeshWorkGroupSize[2] = max_workgroup_size;
2544 properties->maxMeshSharedMemorySize = max_slm_size;
2545 properties->maxMeshPayloadAndSharedMemorySize =
2546 properties->maxTaskPayloadSize +
2547 properties->maxMeshSharedMemorySize;
2549 /* Unfortunately spec's formula for the max output size doesn't match our hardware
2550 * (because some per-primitive and per-vertex attributes have alignment restrictions),
2551 * so we have to advertise the minimum value mandated by the spec to not overflow it.
2553 properties->maxMeshOutputPrimitives = 256;
2554 properties->maxMeshOutputVertices = 256;
2556 /* NumPrim + Primitive Data List */
2557 const uint32_t max_indices_memory =
2558 ALIGN(sizeof(uint32_t) +
2559 sizeof(uint32_t) * properties->maxMeshOutputVertices, 32);
2561 properties->maxMeshOutputMemorySize = MIN2(max_urb_size - max_indices_memory, 32768);
2563 properties->maxMeshPayloadAndOutputMemorySize =
2564 properties->maxTaskPayloadSize +
2565 properties->maxMeshOutputMemorySize;
2567 properties->maxMeshOutputComponents = 128;
2569 /* RTAIndex is 11-bits wide */
2570 properties->maxMeshOutputLayers = 1 << 11;
2572 properties->maxMeshMultiviewViewCount = 1;
2574 /* Elements in Vertex Data Array must be aligned to 32 bytes (8 dwords). */
2575 properties->meshOutputPerVertexGranularity = 8;
2576 /* Elements in Primitive Data Array must be aligned to 32 bytes (8 dwords). */
2577 properties->meshOutputPerPrimitiveGranularity = 8;
2580 properties->maxPreferredTaskWorkGroupInvocations = 16;
2581 properties->maxPreferredMeshWorkGroupInvocations = 16;
2583 properties->prefersLocalInvocationVertexOutput = false;
2584 properties->prefersLocalInvocationPrimitiveOutput = false;
2585 properties->prefersCompactVertexOutput = false;
2586 properties->prefersCompactPrimitiveOutput = false;
2588 /* Spec minimum values */
2589 assert(properties->maxTaskWorkGroupTotalCount >= (1U << 22));
2590 assert(properties->maxTaskWorkGroupCount[0] >= 65535);
2591 assert(properties->maxTaskWorkGroupCount[1] >= 65535);
2592 assert(properties->maxTaskWorkGroupCount[2] >= 65535);
2594 assert(properties->maxTaskWorkGroupInvocations >= 128);
2595 assert(properties->maxTaskWorkGroupSize[0] >= 128);
2596 assert(properties->maxTaskWorkGroupSize[1] >= 128);
2597 assert(properties->maxTaskWorkGroupSize[2] >= 128);
2599 assert(properties->maxTaskPayloadSize >= 16384);
2600 assert(properties->maxTaskSharedMemorySize >= 32768);
2601 assert(properties->maxTaskPayloadAndSharedMemorySize >= 32768);
2604 assert(properties->maxMeshWorkGroupTotalCount >= (1U << 22));
2605 assert(properties->maxMeshWorkGroupCount[0] >= 65535);
2606 assert(properties->maxMeshWorkGroupCount[1] >= 65535);
2607 assert(properties->maxMeshWorkGroupCount[2] >= 65535);
2609 assert(properties->maxMeshWorkGroupInvocations >= 128);
2610 assert(properties->maxMeshWorkGroupSize[0] >= 128);
2611 assert(properties->maxMeshWorkGroupSize[1] >= 128);
2612 assert(properties->maxMeshWorkGroupSize[2] >= 128);
2614 assert(properties->maxMeshSharedMemorySize >= 28672);
2615 assert(properties->maxMeshPayloadAndSharedMemorySize >= 28672);
2616 assert(properties->maxMeshOutputMemorySize >= 32768);
2617 assert(properties->maxMeshPayloadAndOutputMemorySize >= 48128);
2619 assert(properties->maxMeshOutputComponents >= 128);
2621 assert(properties->maxMeshOutputVertices >= 256);
2622 assert(properties->maxMeshOutputPrimitives >= 256);
2623 assert(properties->maxMeshOutputLayers >= 8);
2624 assert(properties->maxMeshMultiviewViewCount >= 1);
2629 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
2630 VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
2631 (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
2632 properties->pciDomain = pdevice->info.pci_domain;
2633 properties->pciBus = pdevice->info.pci_bus;
2634 properties->pciDevice = pdevice->info.pci_dev;
2635 properties->pciFunction = pdevice->info.pci_func;
2639 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: {
2640 VkPhysicalDevicePerformanceQueryPropertiesKHR *properties =
2641 (VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext;
2642 /* We could support this by spawning a shader to do the equation
2645 properties->allowCommandBufferQueryCopies = false;
2649 #pragma GCC diagnostic push
2650 #pragma GCC diagnostic ignored "-Wswitch"
2651 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRESENTATION_PROPERTIES_ANDROID: {
2652 VkPhysicalDevicePresentationPropertiesANDROID *props =
2653 (VkPhysicalDevicePresentationPropertiesANDROID *)ext;
2654 props->sharedImage = VK_FALSE;
2657 #pragma GCC diagnostic pop
2659 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
2660 VkPhysicalDeviceProvokingVertexPropertiesEXT *properties =
2661 (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
2662 properties->provokingVertexModePerPipeline = true;
2663 properties->transformFeedbackPreservesTriangleFanProvokingVertex = false;
2667 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
2668 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
2669 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
2670 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
2674 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR: {
2675 VkPhysicalDeviceRayTracingPipelinePropertiesKHR *props = (void *)ext;
2677 props->shaderGroupHandleSize = 32;
2678 props->maxRayRecursionDepth = 31;
2679 /* MemRay::hitGroupSRStride is 16 bits */
2680 props->maxShaderGroupStride = UINT16_MAX;
2681 /* MemRay::hitGroupSRBasePtr requires 16B alignment */
2682 props->shaderGroupBaseAlignment = 16;
2683 props->shaderGroupHandleAlignment = 16;
2684 props->shaderGroupHandleCaptureReplaySize = 32;
2685 props->maxRayDispatchInvocationCount = 1U << 30; /* required min limit */
2686 props->maxRayHitAttributeSize = BRW_RT_SIZEOF_HIT_ATTRIB_DATA;
2690 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
2691 VkPhysicalDeviceRobustness2PropertiesEXT *properties = (void *)ext;
2692 properties->robustStorageBufferAccessSizeAlignment =
2693 ANV_SSBO_BOUNDS_CHECK_ALIGNMENT;
2694 properties->robustUniformBufferAccessSizeAlignment =
2699 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
2700 VkPhysicalDeviceSampleLocationsPropertiesEXT *props =
2701 (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
2703 props->sampleLocationSampleCounts =
2704 isl_device_get_sample_counts(&pdevice->isl_dev);
2706 /* See also anv_GetPhysicalDeviceMultisamplePropertiesEXT */
2707 props->maxSampleLocationGridSize.width = 1;
2708 props->maxSampleLocationGridSize.height = 1;
2710 props->sampleLocationCoordinateRange[0] = 0;
2711 props->sampleLocationCoordinateRange[1] = 0.9375;
2712 props->sampleLocationSubPixelBits = 4;
2714 props->variableSampleLocations = true;
2718 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT: {
2719 VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *props =
2720 (VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext;
2721 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
2722 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
2723 memcpy(props->shaderModuleIdentifierAlgorithmUUID,
2724 vk_shaderModuleIdentifierAlgorithmUUID,
2725 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
2729 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
2730 VkPhysicalDeviceTransformFeedbackPropertiesEXT *props =
2731 (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
2733 props->maxTransformFeedbackStreams = MAX_XFB_STREAMS;
2734 props->maxTransformFeedbackBuffers = MAX_XFB_BUFFERS;
2735 props->maxTransformFeedbackBufferSize = (1ull << 32);
2736 props->maxTransformFeedbackStreamDataSize = 128 * 4;
2737 props->maxTransformFeedbackBufferDataSize = 128 * 4;
2738 props->maxTransformFeedbackBufferDataStride = 2048;
2739 props->transformFeedbackQueries = true;
2740 props->transformFeedbackStreamsLinesTriangles = false;
2741 props->transformFeedbackRasterizationStreamSelect = false;
2742 props->transformFeedbackDraw = true;
2746 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
2747 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
2748 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
2749 /* We have to restrict this a bit for multiview */
2750 props->maxVertexAttribDivisor = UINT32_MAX / 16;
2754 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
2755 VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
2756 props->maxMultiDrawCount = 2048;
2761 anv_debug_ignored_stype(ext->sType);
2767 static const VkQueueFamilyProperties
2768 anv_queue_family_properties_template = {
2769 .timestampValidBits = 36, /* XXX: Real value here */
2770 .minImageTransferGranularity = { 1, 1, 1 },
2773 void anv_GetPhysicalDeviceQueueFamilyProperties2(
2774 VkPhysicalDevice physicalDevice,
2775 uint32_t* pQueueFamilyPropertyCount,
2776 VkQueueFamilyProperties2* pQueueFamilyProperties)
2778 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
2779 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
2780 pQueueFamilyProperties, pQueueFamilyPropertyCount);
2782 for (uint32_t i = 0; i < pdevice->queue.family_count; i++) {
2783 struct anv_queue_family *queue_family = &pdevice->queue.families[i];
2784 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
2785 p->queueFamilyProperties = anv_queue_family_properties_template;
2786 p->queueFamilyProperties.queueFlags = queue_family->queueFlags;
2787 p->queueFamilyProperties.queueCount = queue_family->queueCount;
2789 vk_foreach_struct(ext, p->pNext) {
2790 switch (ext->sType) {
2791 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2792 VkQueueFamilyGlobalPriorityPropertiesKHR *properties =
2793 (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2795 /* Deliberately sorted low to high */
2796 VkQueueGlobalPriorityKHR all_priorities[] = {
2797 VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2798 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2799 VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2800 VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2804 for (unsigned i = 0; i < ARRAY_SIZE(all_priorities); i++) {
2805 if (all_priorities[i] > pdevice->max_context_priority)
2808 properties->priorities[count++] = all_priorities[i];
2810 properties->priorityCount = count;
2815 anv_debug_ignored_stype(ext->sType);
2822 void anv_GetPhysicalDeviceMemoryProperties(
2823 VkPhysicalDevice physicalDevice,
2824 VkPhysicalDeviceMemoryProperties* pMemoryProperties)
2826 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
2828 pMemoryProperties->memoryTypeCount = physical_device->memory.type_count;
2829 for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
2830 pMemoryProperties->memoryTypes[i] = (VkMemoryType) {
2831 .propertyFlags = physical_device->memory.types[i].propertyFlags,
2832 .heapIndex = physical_device->memory.types[i].heapIndex,
2836 pMemoryProperties->memoryHeapCount = physical_device->memory.heap_count;
2837 for (uint32_t i = 0; i < physical_device->memory.heap_count; i++) {
2838 pMemoryProperties->memoryHeaps[i] = (VkMemoryHeap) {
2839 .size = physical_device->memory.heaps[i].size,
2840 .flags = physical_device->memory.heaps[i].flags,
2846 anv_get_memory_budget(VkPhysicalDevice physicalDevice,
2847 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2849 ANV_FROM_HANDLE(anv_physical_device, device, physicalDevice);
2851 if (!device->vk.supported_extensions.EXT_memory_budget)
2854 anv_update_meminfo(device, device->local_fd);
2856 VkDeviceSize total_sys_heaps_size = 0, total_vram_heaps_size = 0;
2857 for (size_t i = 0; i < device->memory.heap_count; i++) {
2858 if (device->memory.heaps[i].is_local_mem) {
2859 total_vram_heaps_size += device->memory.heaps[i].size;
2861 total_sys_heaps_size += device->memory.heaps[i].size;
2865 for (size_t i = 0; i < device->memory.heap_count; i++) {
2866 VkDeviceSize heap_size = device->memory.heaps[i].size;
2867 VkDeviceSize heap_used = device->memory.heaps[i].used;
2868 VkDeviceSize heap_budget, total_heaps_size;
2869 uint64_t mem_available = 0;
2871 if (device->memory.heaps[i].is_local_mem) {
2872 total_heaps_size = total_vram_heaps_size;
2873 if (device->vram_non_mappable.size > 0 && i == 0) {
2874 mem_available = device->vram_non_mappable.available;
2876 mem_available = device->vram_mappable.available;
2879 total_heaps_size = total_sys_heaps_size;
2880 mem_available = device->sys.available;
2883 double heap_proportion = (double) heap_size / total_heaps_size;
2884 VkDeviceSize available_prop = mem_available * heap_proportion;
2887 * Let's not incite the app to starve the system: report at most 90% of
2888 * the available heap memory.
2890 uint64_t heap_available = available_prop * 9 / 10;
2891 heap_budget = MIN2(heap_size, heap_used + heap_available);
2894 * Round down to the nearest MB
2896 heap_budget &= ~((1ull << 20) - 1);
2899 * The heapBudget value must be non-zero for array elements less than
2900 * VkPhysicalDeviceMemoryProperties::memoryHeapCount. The heapBudget
2901 * value must be less than or equal to VkMemoryHeap::size for each heap.
2903 assert(0 < heap_budget && heap_budget <= heap_size);
2905 memoryBudget->heapUsage[i] = heap_used;
2906 memoryBudget->heapBudget[i] = heap_budget;
2909 /* The heapBudget and heapUsage values must be zero for array elements
2910 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
2912 for (uint32_t i = device->memory.heap_count; i < VK_MAX_MEMORY_HEAPS; i++) {
2913 memoryBudget->heapBudget[i] = 0;
2914 memoryBudget->heapUsage[i] = 0;
2918 void anv_GetPhysicalDeviceMemoryProperties2(
2919 VkPhysicalDevice physicalDevice,
2920 VkPhysicalDeviceMemoryProperties2* pMemoryProperties)
2922 anv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2923 &pMemoryProperties->memoryProperties);
2925 vk_foreach_struct(ext, pMemoryProperties->pNext) {
2926 switch (ext->sType) {
2927 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT:
2928 anv_get_memory_budget(physicalDevice, (void*)ext);
2931 anv_debug_ignored_stype(ext->sType);
2938 anv_GetDeviceGroupPeerMemoryFeatures(
2941 uint32_t localDeviceIndex,
2942 uint32_t remoteDeviceIndex,
2943 VkPeerMemoryFeatureFlags* pPeerMemoryFeatures)
2945 assert(localDeviceIndex == 0 && remoteDeviceIndex == 0);
2946 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
2947 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
2948 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
2949 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
2952 PFN_vkVoidFunction anv_GetInstanceProcAddr(
2953 VkInstance _instance,
2956 ANV_FROM_HANDLE(anv_instance, instance, _instance);
2957 return vk_instance_get_proc_addr(&instance->vk,
2958 &anv_instance_entrypoints,
2962 /* With version 1+ of the loader interface the ICD should expose
2963 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
2966 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2967 VkInstance instance,
2971 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
2972 VkInstance instance,
2975 return anv_GetInstanceProcAddr(instance, pName);
2978 /* With version 4+ of the loader interface the ICD should expose
2979 * vk_icdGetPhysicalDeviceProcAddr()
2982 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
2983 VkInstance _instance,
2986 PFN_vkVoidFunction vk_icdGetPhysicalDeviceProcAddr(
2987 VkInstance _instance,
2990 ANV_FROM_HANDLE(anv_instance, instance, _instance);
2991 return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
2994 static struct anv_state
2995 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
2997 struct anv_state state;
2999 state = anv_state_pool_alloc(pool, size, align);
3000 memcpy(state.map, p, size);
3006 anv_device_init_border_colors(struct anv_device *device)
3008 if (device->info->platform == INTEL_PLATFORM_HSW) {
3009 static const struct hsw_border_color border_colors[] = {
3010 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
3011 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
3012 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
3013 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
3014 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
3015 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
3018 device->border_colors =
3019 anv_state_pool_emit_data(&device->dynamic_state_pool,
3020 sizeof(border_colors), 512, border_colors);
3022 static const struct gfx8_border_color border_colors[] = {
3023 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
3024 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
3025 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
3026 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } },
3027 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } },
3028 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } },
3031 device->border_colors =
3032 anv_state_pool_emit_data(&device->dynamic_state_pool,
3033 sizeof(border_colors), 64, border_colors);
3038 anv_device_init_trivial_batch(struct anv_device *device)
3040 VkResult result = anv_device_alloc_bo(device, "trivial-batch", 4096,
3041 ANV_BO_ALLOC_MAPPED,
3042 0 /* explicit_address */,
3043 &device->trivial_batch_bo);
3044 if (result != VK_SUCCESS)
3047 struct anv_batch batch = {
3048 .start = device->trivial_batch_bo->map,
3049 .next = device->trivial_batch_bo->map,
3050 .end = device->trivial_batch_bo->map + 4096,
3053 anv_batch_emit(&batch, GFX7_MI_BATCH_BUFFER_END, bbe);
3054 anv_batch_emit(&batch, GFX7_MI_NOOP, noop);
3056 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
3057 if (device->physical->memory.need_clflush)
3058 intel_clflush_range(batch.start, batch.next - batch.start);
3065 get_bo_from_pool(struct intel_batch_decode_bo *ret,
3066 struct anv_block_pool *pool,
3069 anv_block_pool_foreach_bo(bo, pool) {
3070 uint64_t bo_address = intel_48b_address(bo->offset);
3071 if (address >= bo_address && address < (bo_address + bo->size)) {
3072 *ret = (struct intel_batch_decode_bo) {
3083 /* Finding a buffer for batch decoding */
3084 static struct intel_batch_decode_bo
3085 decode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
3087 struct anv_device *device = v_batch;
3088 struct intel_batch_decode_bo ret_bo = {};
3092 if (get_bo_from_pool(&ret_bo, &device->dynamic_state_pool.block_pool, address))
3094 if (get_bo_from_pool(&ret_bo, &device->instruction_state_pool.block_pool, address))
3096 if (get_bo_from_pool(&ret_bo, &device->binding_table_pool.block_pool, address))
3098 if (get_bo_from_pool(&ret_bo, &device->scratch_surface_state_pool.block_pool, address))
3100 if (get_bo_from_pool(&ret_bo, &device->bindless_surface_state_pool.block_pool, address))
3102 if (get_bo_from_pool(&ret_bo, &device->internal_surface_state_pool.block_pool, address))
3105 if (!device->cmd_buffer_being_decoded)
3106 return (struct intel_batch_decode_bo) { };
3108 struct anv_batch_bo **bo;
3110 u_vector_foreach(bo, &device->cmd_buffer_being_decoded->seen_bbos) {
3111 /* The decoder zeroes out the top 16 bits, so we need to as well */
3112 uint64_t bo_address = (*bo)->bo->offset & (~0ull >> 16);
3114 if (address >= bo_address && address < bo_address + (*bo)->bo->size) {
3115 return (struct intel_batch_decode_bo) {
3117 .size = (*bo)->bo->size,
3118 .map = (*bo)->bo->map,
3123 return (struct intel_batch_decode_bo) { };
3126 struct intel_aux_map_buffer {
3127 struct intel_buffer base;
3128 struct anv_state state;
3131 static struct intel_buffer *
3132 intel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size)
3134 struct intel_aux_map_buffer *buf = malloc(sizeof(struct intel_aux_map_buffer));
3138 struct anv_device *device = (struct anv_device*)driver_ctx;
3139 assert(device->physical->supports_48bit_addresses);
3141 struct anv_state_pool *pool = &device->dynamic_state_pool;
3142 buf->state = anv_state_pool_alloc(pool, size, size);
3144 buf->base.gpu = pool->block_pool.bo->offset + buf->state.offset;
3145 buf->base.gpu_end = buf->base.gpu + buf->state.alloc_size;
3146 buf->base.map = buf->state.map;
3147 buf->base.driver_bo = &buf->state;
3152 intel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer)
3154 struct intel_aux_map_buffer *buf = (struct intel_aux_map_buffer*)buffer;
3155 struct anv_device *device = (struct anv_device*)driver_ctx;
3156 struct anv_state_pool *pool = &device->dynamic_state_pool;
3157 anv_state_pool_free(pool, buf->state);
3161 static struct intel_mapped_pinned_buffer_alloc aux_map_allocator = {
3162 .alloc = intel_aux_map_buffer_alloc,
3163 .free = intel_aux_map_buffer_free,
3166 static VkResult anv_device_check_status(struct vk_device *vk_device);
3169 anv_device_setup_context(struct anv_device *device,
3170 const VkDeviceCreateInfo *pCreateInfo,
3171 const uint32_t num_queues)
3173 struct anv_physical_device *physical_device = device->physical;
3174 VkResult result = VK_SUCCESS;
3176 if (device->physical->engine_info) {
3177 /* The kernel API supports at most 64 engines */
3178 assert(num_queues <= 64);
3179 enum intel_engine_class engine_classes[64];
3180 int engine_count = 0;
3181 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3182 const VkDeviceQueueCreateInfo *queueCreateInfo =
3183 &pCreateInfo->pQueueCreateInfos[i];
3185 assert(queueCreateInfo->queueFamilyIndex <
3186 physical_device->queue.family_count);
3187 struct anv_queue_family *queue_family =
3188 &physical_device->queue.families[queueCreateInfo->queueFamilyIndex];
3190 for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++)
3191 engine_classes[engine_count++] = queue_family->engine_class;
3193 if (!intel_gem_create_context_engines(device->fd,
3194 physical_device->engine_info,
3195 engine_count, engine_classes,
3196 (uint32_t *)&device->context_id))
3197 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
3198 "kernel context creation failed");
3200 assert(num_queues == 1);
3201 if (!intel_gem_create_context(device->fd, &device->context_id))
3202 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3205 if (result != VK_SUCCESS)
3208 /* Here we tell the kernel not to attempt to recover our context but
3209 * immediately (on the next batchbuffer submission) report that the
3210 * context is lost, and we will do the recovery ourselves. In the case
3211 * of Vulkan, recovery means throwing VK_ERROR_DEVICE_LOST and letting
3212 * the client clean up the pieces.
3214 anv_gem_set_context_param(device->fd, device->context_id,
3215 I915_CONTEXT_PARAM_RECOVERABLE, false);
3217 /* Check if client specified queue priority. */
3218 const VkDeviceQueueGlobalPriorityCreateInfoKHR *queue_priority =
3219 vk_find_struct_const(pCreateInfo->pQueueCreateInfos[0].pNext,
3220 DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
3222 VkQueueGlobalPriorityKHR priority =
3223 queue_priority ? queue_priority->globalPriority :
3224 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
3226 /* As per spec, the driver implementation may deny requests to acquire
3227 * a priority above the default priority (MEDIUM) if the caller does not
3228 * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_KHR
3231 if (physical_device->max_context_priority >= VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
3232 int err = anv_gem_set_context_param(device->fd, device->context_id,
3233 I915_CONTEXT_PARAM_PRIORITY,
3235 if (err != 0 && priority > VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR) {
3236 result = vk_error(device, VK_ERROR_NOT_PERMITTED_KHR);
3244 intel_gem_destroy_context(device->fd, device->context_id);
3248 VkResult anv_CreateDevice(
3249 VkPhysicalDevice physicalDevice,
3250 const VkDeviceCreateInfo* pCreateInfo,
3251 const VkAllocationCallbacks* pAllocator,
3254 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
3256 struct anv_device *device;
3258 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
3260 /* Check enabled features */
3261 bool robust_buffer_access = false;
3262 if (pCreateInfo->pEnabledFeatures) {
3263 if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
3264 robust_buffer_access = true;
3267 vk_foreach_struct_const(ext, pCreateInfo->pNext) {
3268 switch (ext->sType) {
3269 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
3270 const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
3271 if (features->features.robustBufferAccess)
3272 robust_buffer_access = true;
3282 /* Check requested queues and fail if we are requested to create any
3283 * queues with flags we don't support.
3285 assert(pCreateInfo->queueCreateInfoCount > 0);
3286 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3287 if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
3288 return vk_error(physical_device, VK_ERROR_INITIALIZATION_FAILED);
3291 device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
3293 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3295 return vk_error(physical_device, VK_ERROR_OUT_OF_HOST_MEMORY);
3297 struct vk_device_dispatch_table dispatch_table;
3299 bool override_initial_entrypoints = true;
3300 if (physical_device->instance->vk.app_info.app_name &&
3301 !strcmp(physical_device->instance->vk.app_info.app_name, "HITMAN3.exe")) {
3302 vk_device_dispatch_table_from_entrypoints(&dispatch_table, &hitman3_device_entrypoints, true);
3303 override_initial_entrypoints = false;
3305 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3306 anv_genX(&physical_device->info, device_entrypoints),
3307 override_initial_entrypoints);
3308 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3309 &anv_device_entrypoints, false);
3310 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
3311 &wsi_device_entrypoints, false);
3313 result = vk_device_init(&device->vk, &physical_device->vk,
3314 &dispatch_table, pCreateInfo, pAllocator);
3315 if (result != VK_SUCCESS)
3318 if (INTEL_DEBUG(DEBUG_BATCH)) {
3319 const unsigned decode_flags =
3320 INTEL_BATCH_DECODE_FULL |
3321 (INTEL_DEBUG(DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) |
3322 INTEL_BATCH_DECODE_OFFSETS |
3323 INTEL_BATCH_DECODE_FLOATS;
3325 intel_batch_decode_ctx_init(&device->decoder_ctx,
3326 &physical_device->compiler->isa,
3327 &physical_device->info,
3328 stderr, decode_flags, NULL,
3329 decode_get_bo, NULL, device);
3331 device->decoder_ctx.dynamic_base = DYNAMIC_STATE_POOL_MIN_ADDRESS;
3332 device->decoder_ctx.surface_base = INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS;
3333 device->decoder_ctx.instruction_base =
3334 INSTRUCTION_STATE_POOL_MIN_ADDRESS;
3337 anv_device_set_physical(device, physical_device);
3339 /* XXX(chadv): Can we dup() physicalDevice->fd here? */
3340 device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
3341 if (device->fd == -1) {
3342 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3346 device->vk.command_buffer_ops = &anv_cmd_buffer_ops;
3347 device->vk.check_status = anv_device_check_status;
3348 device->vk.create_sync_for_memory = anv_create_sync_for_memory;
3349 vk_device_set_drm_fd(&device->vk, device->fd);
3351 uint32_t num_queues = 0;
3352 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++)
3353 num_queues += pCreateInfo->pQueueCreateInfos[i].queueCount;
3355 result = anv_device_setup_context(device, pCreateInfo, num_queues);
3356 if (result != VK_SUCCESS)
3360 vk_zalloc(&device->vk.alloc, num_queues * sizeof(*device->queues), 8,
3361 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3362 if (device->queues == NULL) {
3363 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3364 goto fail_context_id;
3367 device->queue_count = 0;
3368 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3369 const VkDeviceQueueCreateInfo *queueCreateInfo =
3370 &pCreateInfo->pQueueCreateInfos[i];
3372 for (uint32_t j = 0; j < queueCreateInfo->queueCount; j++) {
3373 /* When using legacy contexts, we use I915_EXEC_RENDER but, with
3374 * engine-based contexts, the bottom 6 bits of exec_flags are used
3375 * for the engine ID.
3377 uint32_t exec_flags = device->physical->engine_info ?
3378 device->queue_count : I915_EXEC_RENDER;
3380 result = anv_queue_init(device, &device->queues[device->queue_count],
3381 exec_flags, queueCreateInfo, j);
3382 if (result != VK_SUCCESS)
3385 device->queue_count++;
3389 if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
3390 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3394 /* keep the page with address zero out of the allocator */
3395 util_vma_heap_init(&device->vma_lo,
3396 LOW_HEAP_MIN_ADDRESS, LOW_HEAP_SIZE);
3398 util_vma_heap_init(&device->vma_cva, CLIENT_VISIBLE_HEAP_MIN_ADDRESS,
3399 CLIENT_VISIBLE_HEAP_SIZE);
3401 /* Leave the last 4GiB out of the high vma range, so that no state
3402 * base address + size can overflow 48 bits. For more information see
3403 * the comment about Wa32bitGeneralStateOffset in anv_allocator.c
3405 util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
3406 physical_device->gtt_size - (1ull << 32) -
3407 HIGH_HEAP_MIN_ADDRESS);
3409 list_inithead(&device->memory_objects);
3411 device->robust_buffer_access = robust_buffer_access;
3413 if (pthread_mutex_init(&device->mutex, NULL) != 0) {
3414 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3418 pthread_condattr_t condattr;
3419 if (pthread_condattr_init(&condattr) != 0) {
3420 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3423 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
3424 pthread_condattr_destroy(&condattr);
3425 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3428 if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
3429 pthread_condattr_destroy(&condattr);
3430 result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
3433 pthread_condattr_destroy(&condattr);
3435 result = anv_bo_cache_init(&device->bo_cache, device);
3436 if (result != VK_SUCCESS)
3437 goto fail_queue_cond;
3439 anv_bo_pool_init(&device->batch_bo_pool, device, "batch");
3441 /* Because scratch is also relative to General State Base Address, we leave
3442 * the base address 0 and start the pool memory at an offset. This way we
3443 * get the correct offsets in the anv_states that get allocated from it.
3445 result = anv_state_pool_init(&device->general_state_pool, device,
3447 0, GENERAL_STATE_POOL_MIN_ADDRESS, 16384);
3448 if (result != VK_SUCCESS)
3449 goto fail_batch_bo_pool;
3451 result = anv_state_pool_init(&device->dynamic_state_pool, device,
3453 DYNAMIC_STATE_POOL_MIN_ADDRESS, 0, 16384);
3454 if (result != VK_SUCCESS)
3455 goto fail_general_state_pool;
3457 /* The border color pointer is limited to 24 bits, so we need to make
3458 * sure that any such color used at any point in the program doesn't
3459 * exceed that limit.
3460 * We achieve that by reserving all the custom border colors we support
3461 * right off the bat, so they are close to the base address.
3463 anv_state_reserved_pool_init(&device->custom_border_colors,
3464 &device->dynamic_state_pool,
3465 MAX_CUSTOM_BORDER_COLORS,
3466 sizeof(struct gfx8_border_color), 64);
3468 result = anv_state_pool_init(&device->instruction_state_pool, device,
3470 INSTRUCTION_STATE_POOL_MIN_ADDRESS, 0, 16384);
3471 if (result != VK_SUCCESS)
3472 goto fail_dynamic_state_pool;
3474 if (device->info->verx10 >= 125) {
3475 /* Put the scratch surface states at the beginning of the internal
3476 * surface state pool.
3478 result = anv_state_pool_init(&device->scratch_surface_state_pool, device,
3479 "scratch surface state pool",
3480 SCRATCH_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
3481 if (result != VK_SUCCESS)
3482 goto fail_instruction_state_pool;
3484 result = anv_state_pool_init(&device->internal_surface_state_pool, device,
3485 "internal surface state pool",
3486 INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS,
3487 SCRATCH_SURFACE_STATE_POOL_SIZE, 4096);
3489 result = anv_state_pool_init(&device->internal_surface_state_pool, device,
3490 "internal surface state pool",
3491 INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
3493 if (result != VK_SUCCESS)
3494 goto fail_scratch_surface_state_pool;
3496 result = anv_state_pool_init(&device->bindless_surface_state_pool, device,
3497 "bindless surface state pool",
3498 BINDLESS_SURFACE_STATE_POOL_MIN_ADDRESS, 0, 4096);
3499 if (result != VK_SUCCESS)
3500 goto fail_internal_surface_state_pool;
3502 if (device->info->verx10 >= 125) {
3503 /* We're using 3DSTATE_BINDING_TABLE_POOL_ALLOC to give the binding
3504 * table its own base address separately from surface state base.
3506 result = anv_state_pool_init(&device->binding_table_pool, device,
3507 "binding table pool",
3508 BINDING_TABLE_POOL_MIN_ADDRESS, 0,
3509 BINDING_TABLE_POOL_BLOCK_SIZE);
3511 int64_t bt_pool_offset = (int64_t)BINDING_TABLE_POOL_MIN_ADDRESS -
3512 (int64_t)INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS;
3513 assert(INT32_MIN < bt_pool_offset && bt_pool_offset < 0);
3514 result = anv_state_pool_init(&device->binding_table_pool, device,
3515 "binding table pool",
3516 INTERNAL_SURFACE_STATE_POOL_MIN_ADDRESS,
3518 BINDING_TABLE_POOL_BLOCK_SIZE);
3520 if (result != VK_SUCCESS)
3521 goto fail_bindless_surface_state_pool;
3523 if (device->info->has_aux_map) {
3524 device->aux_map_ctx = intel_aux_map_init(device, &aux_map_allocator,
3525 &physical_device->info);
3526 if (!device->aux_map_ctx)
3527 goto fail_binding_table_pool;
3530 result = anv_device_alloc_bo(device, "workaround", 4096,
3531 ANV_BO_ALLOC_CAPTURE |
3532 ANV_BO_ALLOC_MAPPED |
3533 (device->info->has_local_mem ?
3534 ANV_BO_ALLOC_WRITE_COMBINE : 0),
3535 0 /* explicit_address */,
3536 &device->workaround_bo);
3537 if (result != VK_SUCCESS)
3538 goto fail_surface_aux_map_pool;
3540 device->workaround_address = (struct anv_address) {
3541 .bo = device->workaround_bo,
3542 .offset = align_u32(
3543 intel_debug_write_identifiers(device->workaround_bo->map,
3544 device->workaround_bo->size,
3548 device->rt_uuid_addr = anv_address_add(device->workaround_address, 8);
3549 memcpy(device->rt_uuid_addr.bo->map + device->rt_uuid_addr.offset,
3550 physical_device->rt_uuid,
3551 sizeof(physical_device->rt_uuid));
3553 device->debug_frame_desc =
3554 intel_debug_get_identifier_block(device->workaround_bo->map,
3555 device->workaround_bo->size,
3556 INTEL_DEBUG_BLOCK_TYPE_FRAME);
3558 if (device->vk.enabled_extensions.KHR_ray_query) {
3559 uint32_t ray_queries_size =
3560 align_u32(brw_rt_ray_queries_hw_stacks_size(device->info), 4096);
3562 result = anv_device_alloc_bo(device, "ray queries",
3565 0 /* explicit_address */,
3566 &device->ray_query_bo);
3567 if (result != VK_SUCCESS)
3568 goto fail_workaround_bo;
3571 result = anv_device_init_trivial_batch(device);
3572 if (result != VK_SUCCESS)
3573 goto fail_ray_query_bo;
3575 /* Emit the CPS states before running the initialization batch as those
3576 * structures are referenced.
3578 if (device->info->ver >= 12) {
3579 uint32_t n_cps_states = 3 * 3; /* All combinaisons of X by Y CP sizes (1, 2, 4) */
3581 if (device->info->has_coarse_pixel_primitive_and_cb)
3582 n_cps_states *= 5 * 5; /* 5 combiners by 2 operators */
3584 n_cps_states += 1; /* Disable CPS */
3586 /* Each of the combinaison must be replicated on all viewports */
3587 n_cps_states *= MAX_VIEWPORTS;
3589 device->cps_states =
3590 anv_state_pool_alloc(&device->dynamic_state_pool,
3591 n_cps_states * CPS_STATE_length(device->info) * 4,
3593 if (device->cps_states.map == NULL)
3594 goto fail_trivial_batch;
3596 anv_genX(device->info, init_cps_device_state)(device);
3599 /* Allocate a null surface state at surface state offset 0. This makes
3600 * NULL descriptor handling trivial because we can just memset structures
3601 * to zero and they have a valid descriptor.
3603 device->null_surface_state =
3604 anv_state_pool_alloc(device->info->verx10 >= 125 ?
3605 &device->scratch_surface_state_pool :
3606 &device->internal_surface_state_pool,
3607 device->isl_dev.ss.size,
3608 device->isl_dev.ss.align);
3609 isl_null_fill_state(&device->isl_dev, device->null_surface_state.map,
3610 .size = isl_extent3d(1, 1, 1) /* This shouldn't matter */);
3611 assert(device->null_surface_state.offset == 0);
3613 anv_scratch_pool_init(device, &device->scratch_pool);
3615 /* TODO(RT): Do we want some sort of data structure for this? */
3616 memset(device->rt_scratch_bos, 0, sizeof(device->rt_scratch_bos));
3618 if (ANV_SUPPORT_RT && device->info->has_ray_tracing) {
3619 /* The docs say to always allocate 128KB per DSS */
3620 const uint32_t btd_fifo_bo_size =
3621 128 * 1024 * intel_device_info_dual_subslice_id_bound(device->info);
3622 result = anv_device_alloc_bo(device,
3625 0 /* alloc_flags */,
3626 0 /* explicit_address */,
3627 &device->btd_fifo_bo);
3628 if (result != VK_SUCCESS)
3629 goto fail_trivial_batch_bo_and_scratch_pool;
3632 result = anv_genX(device->info, init_device_state)(device);
3633 if (result != VK_SUCCESS)
3634 goto fail_btd_fifo_bo;
3636 struct vk_pipeline_cache_create_info pcc_info = { };
3637 device->default_pipeline_cache =
3638 vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
3639 if (!device->default_pipeline_cache) {
3640 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3641 goto fail_btd_fifo_bo;
3644 /* Internal shaders need their own pipeline cache because, unlike the rest
3645 * of ANV, it won't work at all without the cache. It depends on it for
3646 * shaders to remain resident while it runs. Therefore, we need a special
3647 * cache just for BLORP/RT that's forced to always be enabled.
3649 pcc_info.force_enable = true;
3650 device->internal_cache =
3651 vk_pipeline_cache_create(&device->vk, &pcc_info, NULL);
3652 if (device->internal_cache == NULL) {
3653 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3654 goto fail_default_pipeline_cache;
3657 /* The device (currently is ICL/TGL) does not have float64 support. */
3658 if (!device->info->has_64bit_float &&
3659 device->physical->instance->fp64_workaround_enabled)
3660 anv_load_fp64_shader(device);
3662 result = anv_device_init_rt_shaders(device);
3663 if (result != VK_SUCCESS) {
3664 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3665 goto fail_internal_cache;
3668 anv_device_init_blorp(device);
3670 anv_device_init_border_colors(device);
3672 anv_device_perf_init(device);
3674 anv_device_utrace_init(device);
3676 *pDevice = anv_device_to_handle(device);
3680 fail_internal_cache:
3681 vk_pipeline_cache_destroy(device->internal_cache, NULL);
3682 fail_default_pipeline_cache:
3683 vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
3685 if (ANV_SUPPORT_RT && device->info->has_ray_tracing)
3686 anv_device_release_bo(device, device->btd_fifo_bo);
3687 fail_trivial_batch_bo_and_scratch_pool:
3688 anv_scratch_pool_finish(device, &device->scratch_pool);
3690 anv_device_release_bo(device, device->trivial_batch_bo);
3692 if (device->ray_query_bo)
3693 anv_device_release_bo(device, device->ray_query_bo);
3695 anv_device_release_bo(device, device->workaround_bo);
3696 fail_surface_aux_map_pool:
3697 if (device->info->has_aux_map) {
3698 intel_aux_map_finish(device->aux_map_ctx);
3699 device->aux_map_ctx = NULL;
3701 fail_binding_table_pool:
3702 anv_state_pool_finish(&device->binding_table_pool);
3703 fail_bindless_surface_state_pool:
3704 anv_state_pool_finish(&device->bindless_surface_state_pool);
3705 fail_internal_surface_state_pool:
3706 anv_state_pool_finish(&device->internal_surface_state_pool);
3707 fail_scratch_surface_state_pool:
3708 if (device->info->verx10 >= 125)
3709 anv_state_pool_finish(&device->scratch_surface_state_pool);
3710 fail_instruction_state_pool:
3711 anv_state_pool_finish(&device->instruction_state_pool);
3712 fail_dynamic_state_pool:
3713 anv_state_reserved_pool_finish(&device->custom_border_colors);
3714 anv_state_pool_finish(&device->dynamic_state_pool);
3715 fail_general_state_pool:
3716 anv_state_pool_finish(&device->general_state_pool);
3718 anv_bo_pool_finish(&device->batch_bo_pool);
3719 anv_bo_cache_finish(&device->bo_cache);
3721 pthread_cond_destroy(&device->queue_submit);
3723 pthread_mutex_destroy(&device->mutex);
3725 util_vma_heap_finish(&device->vma_hi);
3726 util_vma_heap_finish(&device->vma_cva);
3727 util_vma_heap_finish(&device->vma_lo);
3729 for (uint32_t i = 0; i < device->queue_count; i++)
3730 anv_queue_finish(&device->queues[i]);
3731 vk_free(&device->vk.alloc, device->queues);
3733 intel_gem_destroy_context(device->fd, device->context_id);
3737 vk_device_finish(&device->vk);
3739 vk_free(&device->vk.alloc, device);
3744 void anv_DestroyDevice(
3746 const VkAllocationCallbacks* pAllocator)
3748 ANV_FROM_HANDLE(anv_device, device, _device);
3753 anv_device_utrace_finish(device);
3755 anv_device_finish_blorp(device);
3757 anv_device_finish_rt_shaders(device);
3759 vk_pipeline_cache_destroy(device->internal_cache, NULL);
3760 vk_pipeline_cache_destroy(device->default_pipeline_cache, NULL);
3762 if (ANV_SUPPORT_RT && device->info->has_ray_tracing)
3763 anv_device_release_bo(device, device->btd_fifo_bo);
3765 #ifdef HAVE_VALGRIND
3766 /* We only need to free these to prevent valgrind errors. The backing
3767 * BO will go away in a couple of lines so we don't actually leak.
3769 anv_state_reserved_pool_finish(&device->custom_border_colors);
3770 anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
3771 anv_state_pool_free(&device->dynamic_state_pool, device->slice_hash);
3772 anv_state_pool_free(&device->dynamic_state_pool, device->cps_states);
3775 for (unsigned i = 0; i < ARRAY_SIZE(device->rt_scratch_bos); i++) {
3776 if (device->rt_scratch_bos[i] != NULL)
3777 anv_device_release_bo(device, device->rt_scratch_bos[i]);
3780 anv_scratch_pool_finish(device, &device->scratch_pool);
3782 if (device->vk.enabled_extensions.KHR_ray_query) {
3783 for (unsigned i = 0; i < ARRAY_SIZE(device->ray_query_shadow_bos); i++) {
3784 if (device->ray_query_shadow_bos[i] != NULL)
3785 anv_device_release_bo(device, device->ray_query_shadow_bos[i]);
3787 anv_device_release_bo(device, device->ray_query_bo);
3789 anv_device_release_bo(device, device->workaround_bo);
3790 anv_device_release_bo(device, device->trivial_batch_bo);
3792 if (device->info->has_aux_map) {
3793 intel_aux_map_finish(device->aux_map_ctx);
3794 device->aux_map_ctx = NULL;
3797 anv_state_pool_finish(&device->binding_table_pool);
3798 if (device->info->verx10 >= 125)
3799 anv_state_pool_finish(&device->scratch_surface_state_pool);
3800 anv_state_pool_finish(&device->internal_surface_state_pool);
3801 anv_state_pool_finish(&device->bindless_surface_state_pool);
3802 anv_state_pool_finish(&device->instruction_state_pool);
3803 anv_state_pool_finish(&device->dynamic_state_pool);
3804 anv_state_pool_finish(&device->general_state_pool);
3806 anv_bo_pool_finish(&device->batch_bo_pool);
3808 anv_bo_cache_finish(&device->bo_cache);
3810 util_vma_heap_finish(&device->vma_hi);
3811 util_vma_heap_finish(&device->vma_cva);
3812 util_vma_heap_finish(&device->vma_lo);
3814 pthread_cond_destroy(&device->queue_submit);
3815 pthread_mutex_destroy(&device->mutex);
3817 for (uint32_t i = 0; i < device->queue_count; i++)
3818 anv_queue_finish(&device->queues[i]);
3819 vk_free(&device->vk.alloc, device->queues);
3821 intel_gem_destroy_context(device->fd, device->context_id);
3823 if (INTEL_DEBUG(DEBUG_BATCH))
3824 intel_batch_decode_ctx_finish(&device->decoder_ctx);
3828 vk_device_finish(&device->vk);
3829 vk_free(&device->vk.alloc, device);
3832 VkResult anv_EnumerateInstanceLayerProperties(
3833 uint32_t* pPropertyCount,
3834 VkLayerProperties* pProperties)
3836 if (pProperties == NULL) {
3837 *pPropertyCount = 0;
3841 /* None supported at this time */
3842 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3846 anv_device_check_status(struct vk_device *vk_device)
3848 struct anv_device *device = container_of(vk_device, struct anv_device, vk);
3850 uint32_t active, pending;
3851 int ret = anv_gem_context_get_reset_stats(device->fd, device->context_id,
3854 /* We don't know the real error. */
3855 return vk_device_set_lost(&device->vk, "get_reset_stats failed: %m");
3859 return vk_device_set_lost(&device->vk, "GPU hung on one of our command buffers");
3860 } else if (pending) {
3861 return vk_device_set_lost(&device->vk, "GPU hung with commands in-flight");
3868 anv_device_wait(struct anv_device *device, struct anv_bo *bo,
3871 int ret = anv_gem_wait(device, bo->gem_handle, &timeout);
3872 if (ret == -1 && errno == ETIME) {
3874 } else if (ret == -1) {
3875 /* We don't know the real error. */
3876 return vk_device_set_lost(&device->vk, "gem wait failed: %m");
3883 anv_vma_alloc(struct anv_device *device,
3884 uint64_t size, uint64_t align,
3885 enum anv_bo_alloc_flags alloc_flags,
3886 uint64_t client_address)
3888 pthread_mutex_lock(&device->vma_mutex);
3892 if (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) {
3893 if (client_address) {
3894 if (util_vma_heap_alloc_addr(&device->vma_cva,
3895 client_address, size)) {
3896 addr = client_address;
3899 addr = util_vma_heap_alloc(&device->vma_cva, size, align);
3901 /* We don't want to fall back to other heaps */
3905 assert(client_address == 0);
3907 if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS))
3908 addr = util_vma_heap_alloc(&device->vma_hi, size, align);
3911 addr = util_vma_heap_alloc(&device->vma_lo, size, align);
3914 pthread_mutex_unlock(&device->vma_mutex);
3916 assert(addr == intel_48b_address(addr));
3917 return intel_canonical_address(addr);
3921 anv_vma_free(struct anv_device *device,
3922 uint64_t address, uint64_t size)
3924 const uint64_t addr_48b = intel_48b_address(address);
3926 pthread_mutex_lock(&device->vma_mutex);
3928 if (addr_48b >= LOW_HEAP_MIN_ADDRESS &&
3929 addr_48b <= LOW_HEAP_MAX_ADDRESS) {
3930 util_vma_heap_free(&device->vma_lo, addr_48b, size);
3931 } else if (addr_48b >= CLIENT_VISIBLE_HEAP_MIN_ADDRESS &&
3932 addr_48b <= CLIENT_VISIBLE_HEAP_MAX_ADDRESS) {
3933 util_vma_heap_free(&device->vma_cva, addr_48b, size);
3935 assert(addr_48b >= HIGH_HEAP_MIN_ADDRESS);
3936 util_vma_heap_free(&device->vma_hi, addr_48b, size);
3939 pthread_mutex_unlock(&device->vma_mutex);
3942 VkResult anv_AllocateMemory(
3944 const VkMemoryAllocateInfo* pAllocateInfo,
3945 const VkAllocationCallbacks* pAllocator,
3946 VkDeviceMemory* pMem)
3948 ANV_FROM_HANDLE(anv_device, device, _device);
3949 struct anv_physical_device *pdevice = device->physical;
3950 struct anv_device_memory *mem;
3951 VkResult result = VK_SUCCESS;
3953 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
3955 /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
3956 assert(pAllocateInfo->allocationSize > 0);
3958 VkDeviceSize aligned_alloc_size =
3959 align_u64(pAllocateInfo->allocationSize, 4096);
3961 if (aligned_alloc_size > MAX_MEMORY_ALLOCATION_SIZE)
3962 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3964 assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.type_count);
3965 struct anv_memory_type *mem_type =
3966 &pdevice->memory.types[pAllocateInfo->memoryTypeIndex];
3967 assert(mem_type->heapIndex < pdevice->memory.heap_count);
3968 struct anv_memory_heap *mem_heap =
3969 &pdevice->memory.heaps[mem_type->heapIndex];
3971 uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
3972 if (mem_heap_used + aligned_alloc_size > mem_heap->size)
3973 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3975 mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
3976 VK_OBJECT_TYPE_DEVICE_MEMORY);
3978 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3980 mem->type = mem_type;
3985 mem->host_ptr = NULL;
3987 enum anv_bo_alloc_flags alloc_flags = 0;
3989 const VkExportMemoryAllocateInfo *export_info = NULL;
3990 const VkImportAndroidHardwareBufferInfoANDROID *ahw_import_info = NULL;
3991 const VkImportMemoryFdInfoKHR *fd_info = NULL;
3992 const VkImportMemoryHostPointerInfoEXT *host_ptr_info = NULL;
3993 const VkMemoryDedicatedAllocateInfo *dedicated_info = NULL;
3994 VkMemoryAllocateFlags vk_flags = 0;
3995 uint64_t client_address = 0;
3997 vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
3998 switch (ext->sType) {
3999 case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
4000 export_info = (void *)ext;
4003 case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
4004 ahw_import_info = (void *)ext;
4007 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
4008 fd_info = (void *)ext;
4011 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT:
4012 host_ptr_info = (void *)ext;
4015 case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO: {
4016 const VkMemoryAllocateFlagsInfo *flags_info = (void *)ext;
4017 vk_flags = flags_info->flags;
4021 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
4022 dedicated_info = (void *)ext;
4025 case VK_STRUCTURE_TYPE_MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO: {
4026 const VkMemoryOpaqueCaptureAddressAllocateInfo *addr_info =
4027 (const VkMemoryOpaqueCaptureAddressAllocateInfo *)ext;
4028 client_address = addr_info->opaqueCaptureAddress;
4033 if (ext->sType != VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA)
4034 /* this isn't a real enum value,
4035 * so use conditional to avoid compiler warn
4037 anv_debug_ignored_stype(ext->sType);
4042 /* By default, we want all VkDeviceMemory objects to support CCS */
4043 if (device->physical->has_implicit_ccs && device->info->has_aux_map)
4044 alloc_flags |= ANV_BO_ALLOC_IMPLICIT_CCS;
4046 /* If i915 reported a mappable/non_mappable vram regions and the
4047 * application want lmem mappable, then we need to use the
4048 * I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag to create our BO.
4050 if (pdevice->vram_mappable.size > 0 &&
4051 pdevice->vram_non_mappable.size > 0 &&
4052 (mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
4053 (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
4054 alloc_flags |= ANV_BO_ALLOC_LOCAL_MEM_CPU_VISIBLE;
4056 if (!(mem_type->propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT))
4057 alloc_flags |= ANV_BO_ALLOC_NO_LOCAL_MEM;
4059 /* If the allocated buffer might end up in local memory and it's host
4060 * visible, make CPU writes are combined, it should be faster.
4062 if (!(alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM) &&
4063 (mem_type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT))
4064 alloc_flags |= ANV_BO_ALLOC_WRITE_COMBINE;
4066 if (vk_flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)
4067 alloc_flags |= ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS;
4069 if ((export_info && export_info->handleTypes) ||
4070 (fd_info && fd_info->handleType) ||
4071 (host_ptr_info && host_ptr_info->handleType)) {
4072 /* Anything imported or exported is EXTERNAL */
4073 alloc_flags |= ANV_BO_ALLOC_EXTERNAL;
4076 /* Check if we need to support Android HW buffer export. If so,
4077 * create AHardwareBuffer and import memory from it.
4079 bool android_export = false;
4080 if (export_info && export_info->handleTypes &
4081 VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)
4082 android_export = true;
4084 if (ahw_import_info) {
4085 result = anv_import_ahw_memory(_device, mem, ahw_import_info);
4086 if (result != VK_SUCCESS)
4090 } else if (android_export) {
4091 result = anv_create_ahw_memory(_device, mem, pAllocateInfo);
4092 if (result != VK_SUCCESS)
4098 /* The Vulkan spec permits handleType to be 0, in which case the struct is
4101 if (fd_info && fd_info->handleType) {
4102 /* At the moment, we support only the below handle types. */
4103 assert(fd_info->handleType ==
4104 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
4105 fd_info->handleType ==
4106 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4108 result = anv_device_import_bo(device, fd_info->fd, alloc_flags,
4109 client_address, &mem->bo);
4110 if (result != VK_SUCCESS)
4113 /* For security purposes, we reject importing the bo if it's smaller
4114 * than the requested allocation size. This prevents a malicious client
4115 * from passing a buffer to a trusted client, lying about the size, and
4116 * telling the trusted client to try and texture from an image that goes
4117 * out-of-bounds. This sort of thing could lead to GPU hangs or worse
4118 * in the trusted client. The trusted client can protect itself against
4119 * this sort of attack but only if it can trust the buffer size.
4121 if (mem->bo->size < aligned_alloc_size) {
4122 result = vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE,
4123 "aligned allocationSize too large for "
4124 "VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT: "
4125 "%"PRIu64"B > %"PRIu64"B",
4126 aligned_alloc_size, mem->bo->size);
4127 anv_device_release_bo(device, mem->bo);
4131 /* From the Vulkan spec:
4133 * "Importing memory from a file descriptor transfers ownership of
4134 * the file descriptor from the application to the Vulkan
4135 * implementation. The application must not perform any operations on
4136 * the file descriptor after a successful import."
4138 * If the import fails, we leave the file descriptor open.
4144 if (host_ptr_info && host_ptr_info->handleType) {
4145 if (host_ptr_info->handleType ==
4146 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_MAPPED_FOREIGN_MEMORY_BIT_EXT) {
4147 result = vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
4151 assert(host_ptr_info->handleType ==
4152 VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
4154 result = anv_device_import_bo_from_host_ptr(device,
4155 host_ptr_info->pHostPointer,
4156 pAllocateInfo->allocationSize,
4160 if (result != VK_SUCCESS)
4163 mem->host_ptr = host_ptr_info->pHostPointer;
4167 /* Regular allocate (not importing memory). */
4169 result = anv_device_alloc_bo(device, "user", pAllocateInfo->allocationSize,
4170 alloc_flags, client_address, &mem->bo);
4171 if (result != VK_SUCCESS)
4174 if (dedicated_info && dedicated_info->image != VK_NULL_HANDLE) {
4175 ANV_FROM_HANDLE(anv_image, image, dedicated_info->image);
4177 /* Some legacy (non-modifiers) consumers need the tiling to be set on
4178 * the BO. In this case, we have a dedicated allocation.
4180 if (image->vk.wsi_legacy_scanout) {
4181 const struct isl_surf *surf = &image->planes[0].primary_surface.isl;
4182 result = anv_device_set_bo_tiling(device, mem->bo,
4185 if (result != VK_SUCCESS) {
4186 anv_device_release_bo(device, mem->bo);
4193 mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
4194 if (mem_heap_used > mem_heap->size) {
4195 p_atomic_add(&mem_heap->used, -mem->bo->size);
4196 anv_device_release_bo(device, mem->bo);
4197 result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
4198 "Out of heap memory");
4202 pthread_mutex_lock(&device->mutex);
4203 list_addtail(&mem->link, &device->memory_objects);
4204 pthread_mutex_unlock(&device->mutex);
4206 *pMem = anv_device_memory_to_handle(mem);
4211 vk_object_free(&device->vk, pAllocator, mem);
4216 VkResult anv_GetMemoryFdKHR(
4218 const VkMemoryGetFdInfoKHR* pGetFdInfo,
4221 ANV_FROM_HANDLE(anv_device, dev, device_h);
4222 ANV_FROM_HANDLE(anv_device_memory, mem, pGetFdInfo->memory);
4224 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
4226 assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
4227 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
4229 return anv_device_export_bo(dev, mem->bo, pFd);
4232 VkResult anv_GetMemoryFdPropertiesKHR(
4234 VkExternalMemoryHandleTypeFlagBits handleType,
4236 VkMemoryFdPropertiesKHR* pMemoryFdProperties)
4238 ANV_FROM_HANDLE(anv_device, device, _device);
4240 switch (handleType) {
4241 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
4242 /* dma-buf can be imported as any memory type */
4243 pMemoryFdProperties->memoryTypeBits =
4244 (1 << device->physical->memory.type_count) - 1;
4248 /* The valid usage section for this function says:
4250 * "handleType must not be one of the handle types defined as
4253 * So opaque handle types fall into the default "unsupported" case.
4255 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
4259 VkResult anv_GetMemoryHostPointerPropertiesEXT(
4261 VkExternalMemoryHandleTypeFlagBits handleType,
4262 const void* pHostPointer,
4263 VkMemoryHostPointerPropertiesEXT* pMemoryHostPointerProperties)
4265 ANV_FROM_HANDLE(anv_device, device, _device);
4267 assert(pMemoryHostPointerProperties->sType ==
4268 VK_STRUCTURE_TYPE_MEMORY_HOST_POINTER_PROPERTIES_EXT);
4270 switch (handleType) {
4271 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT:
4272 /* Host memory can be imported as any memory type. */
4273 pMemoryHostPointerProperties->memoryTypeBits =
4274 (1ull << device->physical->memory.type_count) - 1;
4279 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
4283 void anv_FreeMemory(
4285 VkDeviceMemory _mem,
4286 const VkAllocationCallbacks* pAllocator)
4288 ANV_FROM_HANDLE(anv_device, device, _device);
4289 ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
4294 pthread_mutex_lock(&device->mutex);
4295 list_del(&mem->link);
4296 pthread_mutex_unlock(&device->mutex);
4299 anv_UnmapMemory(_device, _mem);
4301 p_atomic_add(&device->physical->memory.heaps[mem->type->heapIndex].used,
4304 anv_device_release_bo(device, mem->bo);
4306 #if defined(ANDROID) && ANDROID_API_LEVEL >= 26
4308 AHardwareBuffer_release(mem->ahw);
4311 vk_object_free(&device->vk, pAllocator, mem);
4314 VkResult anv_MapMemory(
4316 VkDeviceMemory _memory,
4317 VkDeviceSize offset,
4319 VkMemoryMapFlags flags,
4322 ANV_FROM_HANDLE(anv_device, device, _device);
4323 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
4330 if (mem->host_ptr) {
4331 *ppData = mem->host_ptr + offset;
4335 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
4337 * * memory must have been created with a memory type that reports
4338 * VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
4340 if (!(mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
4341 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
4342 "Memory object not mappable.");
4345 if (size == VK_WHOLE_SIZE)
4346 size = mem->bo->size - offset;
4348 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
4350 * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
4351 * assert(size != 0);
4352 * * If size is not equal to VK_WHOLE_SIZE, size must be less than or
4353 * equal to the size of the memory minus offset
4356 assert(offset + size <= mem->bo->size);
4358 if (size != (size_t)size) {
4359 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
4360 "requested size 0x%"PRIx64" does not fit in %u bits",
4361 size, (unsigned)(sizeof(size_t) * 8));
4364 /* From the Vulkan 1.2.194 spec:
4366 * "memory must not be currently host mapped"
4368 if (mem->map != NULL) {
4369 return vk_errorf(device, VK_ERROR_MEMORY_MAP_FAILED,
4370 "Memory object already mapped.");
4373 uint32_t gem_flags = 0;
4375 if (!device->info->has_llc &&
4376 (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT))
4377 gem_flags |= I915_MMAP_WC;
4379 /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */
4380 uint64_t map_offset;
4381 if (!device->physical->info.has_mmap_offset)
4382 map_offset = offset & ~4095ull;
4385 assert(offset >= map_offset);
4386 uint64_t map_size = (offset + size) - map_offset;
4388 /* Let's map whole pages */
4389 map_size = align_u64(map_size, 4096);
4392 VkResult result = anv_device_map_bo(device, mem->bo, map_offset,
4393 map_size, gem_flags, &map);
4394 if (result != VK_SUCCESS)
4398 mem->map_size = map_size;
4399 mem->map_delta = (offset - map_offset);
4400 *ppData = mem->map + mem->map_delta;
4405 void anv_UnmapMemory(
4407 VkDeviceMemory _memory)
4409 ANV_FROM_HANDLE(anv_device, device, _device);
4410 ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
4412 if (mem == NULL || mem->host_ptr)
4415 anv_device_unmap_bo(device, mem->bo, mem->map, mem->map_size);
4422 VkResult anv_FlushMappedMemoryRanges(
4424 uint32_t memoryRangeCount,
4425 const VkMappedMemoryRange* pMemoryRanges)
4427 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
4428 ANV_FROM_HANDLE(anv_device, device, _device);
4430 if (!device->physical->memory.need_clflush)
4433 /* Make sure the writes we're flushing have landed. */
4434 __builtin_ia32_mfence();
4436 for (uint32_t i = 0; i < memoryRangeCount; i++) {
4437 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
4438 if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
4441 uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
4442 if (map_offset >= mem->map_size)
4445 intel_clflush_range(mem->map + map_offset,
4446 MIN2(pMemoryRanges[i].size,
4447 mem->map_size - map_offset));
4453 VkResult anv_InvalidateMappedMemoryRanges(
4455 uint32_t memoryRangeCount,
4456 const VkMappedMemoryRange* pMemoryRanges)
4458 #ifdef SUPPORT_INTEL_INTEGRATED_GPUS
4459 ANV_FROM_HANDLE(anv_device, device, _device);
4461 if (!device->physical->memory.need_clflush)
4464 for (uint32_t i = 0; i < memoryRangeCount; i++) {
4465 ANV_FROM_HANDLE(anv_device_memory, mem, pMemoryRanges[i].memory);
4466 if (mem->type->propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
4469 uint64_t map_offset = pMemoryRanges[i].offset + mem->map_delta;
4470 if (map_offset >= mem->map_size)
4473 intel_invalidate_range(mem->map + map_offset,
4474 MIN2(pMemoryRanges[i].size,
4475 mem->map_size - map_offset));
4478 /* Make sure no reads get moved up above the invalidate. */
4479 __builtin_ia32_mfence();
4484 void anv_GetDeviceMemoryCommitment(
4486 VkDeviceMemory memory,
4487 VkDeviceSize* pCommittedMemoryInBytes)
4489 *pCommittedMemoryInBytes = 0;
4493 anv_bind_buffer_memory(const VkBindBufferMemoryInfo *pBindInfo)
4495 ANV_FROM_HANDLE(anv_device_memory, mem, pBindInfo->memory);
4496 ANV_FROM_HANDLE(anv_buffer, buffer, pBindInfo->buffer);
4498 assert(pBindInfo->sType == VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO);
4501 assert(pBindInfo->memoryOffset < mem->bo->size);
4502 assert(mem->bo->size - pBindInfo->memoryOffset >= buffer->vk.size);
4503 buffer->address = (struct anv_address) {
4505 .offset = pBindInfo->memoryOffset,
4508 buffer->address = ANV_NULL_ADDRESS;
4512 VkResult anv_BindBufferMemory2(
4514 uint32_t bindInfoCount,
4515 const VkBindBufferMemoryInfo* pBindInfos)
4517 for (uint32_t i = 0; i < bindInfoCount; i++)
4518 anv_bind_buffer_memory(&pBindInfos[i]);
4523 VkResult anv_QueueBindSparse(
4525 uint32_t bindInfoCount,
4526 const VkBindSparseInfo* pBindInfo,
4529 ANV_FROM_HANDLE(anv_queue, queue, _queue);
4530 if (vk_device_is_lost(&queue->device->vk))
4531 return VK_ERROR_DEVICE_LOST;
4533 return vk_error(queue, VK_ERROR_FEATURE_NOT_PRESENT);
4538 VkResult anv_CreateEvent(
4540 const VkEventCreateInfo* pCreateInfo,
4541 const VkAllocationCallbacks* pAllocator,
4544 ANV_FROM_HANDLE(anv_device, device, _device);
4545 struct anv_event *event;
4547 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
4549 event = vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
4550 VK_OBJECT_TYPE_EVENT);
4552 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4554 event->state = anv_state_pool_alloc(&device->dynamic_state_pool,
4555 sizeof(uint64_t), 8);
4556 *(uint64_t *)event->state.map = VK_EVENT_RESET;
4558 *pEvent = anv_event_to_handle(event);
4563 void anv_DestroyEvent(
4566 const VkAllocationCallbacks* pAllocator)
4568 ANV_FROM_HANDLE(anv_device, device, _device);
4569 ANV_FROM_HANDLE(anv_event, event, _event);
4574 anv_state_pool_free(&device->dynamic_state_pool, event->state);
4576 vk_object_free(&device->vk, pAllocator, event);
4579 VkResult anv_GetEventStatus(
4583 ANV_FROM_HANDLE(anv_device, device, _device);
4584 ANV_FROM_HANDLE(anv_event, event, _event);
4586 if (vk_device_is_lost(&device->vk))
4587 return VK_ERROR_DEVICE_LOST;
4589 return *(uint64_t *)event->state.map;
4592 VkResult anv_SetEvent(
4596 ANV_FROM_HANDLE(anv_event, event, _event);
4598 *(uint64_t *)event->state.map = VK_EVENT_SET;
4603 VkResult anv_ResetEvent(
4607 ANV_FROM_HANDLE(anv_event, event, _event);
4609 *(uint64_t *)event->state.map = VK_EVENT_RESET;
4617 anv_get_buffer_memory_requirements(struct anv_device *device,
4619 VkBufferUsageFlags usage,
4620 VkMemoryRequirements2* pMemoryRequirements)
4622 /* The Vulkan spec (git aaed022) says:
4624 * memoryTypeBits is a bitfield and contains one bit set for every
4625 * supported memory type for the resource. The bit `1<<i` is set if and
4626 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
4627 * structure for the physical device is supported.
4629 uint32_t memory_types = (1ull << device->physical->memory.type_count) - 1;
4631 /* Base alignment requirement of a cache line */
4632 uint32_t alignment = 16;
4634 if (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
4635 alignment = MAX2(alignment, ANV_UBO_ALIGNMENT);
4637 pMemoryRequirements->memoryRequirements.size = size;
4638 pMemoryRequirements->memoryRequirements.alignment = alignment;
4640 /* Storage and Uniform buffers should have their size aligned to
4641 * 32-bits to avoid boundary checks when last DWord is not complete.
4642 * This would ensure that not internal padding would be needed for
4645 if (device->robust_buffer_access &&
4646 (usage & VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ||
4647 usage & VK_BUFFER_USAGE_STORAGE_BUFFER_BIT))
4648 pMemoryRequirements->memoryRequirements.size = align_u64(size, 4);
4650 pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
4652 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
4653 switch (ext->sType) {
4654 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
4655 VkMemoryDedicatedRequirements *requirements = (void *)ext;
4656 requirements->prefersDedicatedAllocation = false;
4657 requirements->requiresDedicatedAllocation = false;
4662 anv_debug_ignored_stype(ext->sType);
4668 void anv_GetBufferMemoryRequirements2(
4670 const VkBufferMemoryRequirementsInfo2* pInfo,
4671 VkMemoryRequirements2* pMemoryRequirements)
4673 ANV_FROM_HANDLE(anv_device, device, _device);
4674 ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
4676 anv_get_buffer_memory_requirements(device,
4679 pMemoryRequirements);
4682 void anv_GetDeviceBufferMemoryRequirementsKHR(
4684 const VkDeviceBufferMemoryRequirements* pInfo,
4685 VkMemoryRequirements2* pMemoryRequirements)
4687 ANV_FROM_HANDLE(anv_device, device, _device);
4689 anv_get_buffer_memory_requirements(device,
4690 pInfo->pCreateInfo->size,
4691 pInfo->pCreateInfo->usage,
4692 pMemoryRequirements);
4695 VkResult anv_CreateBuffer(
4697 const VkBufferCreateInfo* pCreateInfo,
4698 const VkAllocationCallbacks* pAllocator,
4701 ANV_FROM_HANDLE(anv_device, device, _device);
4702 struct anv_buffer *buffer;
4704 /* Don't allow creating buffers bigger than our address space. The real
4705 * issue here is that we may align up the buffer size and we don't want
4706 * doing so to cause roll-over. However, no one has any business
4707 * allocating a buffer larger than our GTT size.
4709 if (pCreateInfo->size > device->physical->gtt_size)
4710 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4712 buffer = vk_buffer_create(&device->vk, pCreateInfo,
4713 pAllocator, sizeof(*buffer));
4715 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4717 buffer->address = ANV_NULL_ADDRESS;
4719 *pBuffer = anv_buffer_to_handle(buffer);
4724 void anv_DestroyBuffer(
4727 const VkAllocationCallbacks* pAllocator)
4729 ANV_FROM_HANDLE(anv_device, device, _device);
4730 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
4735 vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
4738 VkDeviceAddress anv_GetBufferDeviceAddress(
4740 const VkBufferDeviceAddressInfo* pInfo)
4742 ANV_FROM_HANDLE(anv_buffer, buffer, pInfo->buffer);
4744 assert(!anv_address_is_null(buffer->address));
4746 return anv_address_physical(buffer->address);
4749 uint64_t anv_GetBufferOpaqueCaptureAddress(
4751 const VkBufferDeviceAddressInfo* pInfo)
4756 uint64_t anv_GetDeviceMemoryOpaqueCaptureAddress(
4758 const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
4760 ANV_FROM_HANDLE(anv_device_memory, memory, pInfo->memory);
4762 assert(memory->bo->has_client_visible_address);
4764 return intel_48b_address(memory->bo->offset);
4768 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
4769 enum isl_format format,
4770 struct isl_swizzle swizzle,
4771 isl_surf_usage_flags_t usage,
4772 struct anv_address address,
4773 uint32_t range, uint32_t stride)
4775 isl_buffer_fill_state(&device->isl_dev, state.map,
4776 .address = anv_address_physical(address),
4777 .mocs = isl_mocs(&device->isl_dev, usage,
4778 address.bo && address.bo->is_external),
4782 .stride_B = stride);
4785 void anv_DestroySampler(
4788 const VkAllocationCallbacks* pAllocator)
4790 ANV_FROM_HANDLE(anv_device, device, _device);
4791 ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
4796 if (sampler->bindless_state.map) {
4797 anv_state_pool_free(&device->dynamic_state_pool,
4798 sampler->bindless_state);
4801 if (sampler->custom_border_color.map) {
4802 anv_state_reserved_pool_free(&device->custom_border_colors,
4803 sampler->custom_border_color);
4806 vk_object_free(&device->vk, pAllocator, sampler);
4809 static const VkTimeDomainEXT anv_time_domains[] = {
4810 VK_TIME_DOMAIN_DEVICE_EXT,
4811 VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
4812 #ifdef CLOCK_MONOTONIC_RAW
4813 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
4817 VkResult anv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
4818 VkPhysicalDevice physicalDevice,
4819 uint32_t *pTimeDomainCount,
4820 VkTimeDomainEXT *pTimeDomains)
4823 VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
4825 for (d = 0; d < ARRAY_SIZE(anv_time_domains); d++) {
4826 vk_outarray_append_typed(VkTimeDomainEXT, &out, i) {
4827 *i = anv_time_domains[d];
4831 return vk_outarray_status(&out);
4834 VkResult anv_GetCalibratedTimestampsEXT(
4836 uint32_t timestampCount,
4837 const VkCalibratedTimestampInfoEXT *pTimestampInfos,
4838 uint64_t *pTimestamps,
4839 uint64_t *pMaxDeviation)
4841 ANV_FROM_HANDLE(anv_device, device, _device);
4842 uint64_t timestamp_frequency = device->info->timestamp_frequency;
4844 uint64_t begin, end;
4845 uint64_t max_clock_period = 0;
4847 #ifdef CLOCK_MONOTONIC_RAW
4848 begin = vk_clock_gettime(CLOCK_MONOTONIC_RAW);
4850 begin = vk_clock_gettime(CLOCK_MONOTONIC);
4853 for (d = 0; d < timestampCount; d++) {
4854 switch (pTimestampInfos[d].timeDomain) {
4855 case VK_TIME_DOMAIN_DEVICE_EXT:
4856 if (!intel_gem_read_render_timestamp(device->fd, &pTimestamps[d])) {
4857 return vk_device_set_lost(&device->vk, "Failed to read the "
4858 "TIMESTAMP register: %m");
4860 uint64_t device_period = DIV_ROUND_UP(1000000000, timestamp_frequency);
4861 max_clock_period = MAX2(max_clock_period, device_period);
4863 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
4864 pTimestamps[d] = vk_clock_gettime(CLOCK_MONOTONIC);
4865 max_clock_period = MAX2(max_clock_period, 1);
4868 #ifdef CLOCK_MONOTONIC_RAW
4869 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
4870 pTimestamps[d] = begin;
4879 #ifdef CLOCK_MONOTONIC_RAW
4880 end = vk_clock_gettime(CLOCK_MONOTONIC_RAW);
4882 end = vk_clock_gettime(CLOCK_MONOTONIC);
4885 *pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period);
4890 void anv_GetPhysicalDeviceMultisamplePropertiesEXT(
4891 VkPhysicalDevice physicalDevice,
4892 VkSampleCountFlagBits samples,
4893 VkMultisamplePropertiesEXT* pMultisampleProperties)
4895 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
4897 assert(pMultisampleProperties->sType ==
4898 VK_STRUCTURE_TYPE_MULTISAMPLE_PROPERTIES_EXT);
4900 VkExtent2D grid_size;
4901 if (samples & isl_device_get_sample_counts(&physical_device->isl_dev)) {
4902 grid_size.width = 1;
4903 grid_size.height = 1;
4905 grid_size.width = 0;
4906 grid_size.height = 0;
4908 pMultisampleProperties->maxSampleLocationGridSize = grid_size;
4910 vk_foreach_struct(ext, pMultisampleProperties->pNext)
4911 anv_debug_ignored_stype(ext->sType);
4914 /* vk_icd.h does not declare this function, so we declare it here to
4915 * suppress Wmissing-prototypes.
4917 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4918 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion);
4920 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
4921 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
4923 /* For the full details on loader interface versioning, see
4924 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
4925 * What follows is a condensed summary, to help you navigate the large and
4926 * confusing official doc.
4928 * - Loader interface v0 is incompatible with later versions. We don't
4931 * - In loader interface v1:
4932 * - The first ICD entrypoint called by the loader is
4933 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
4935 * - The ICD must statically expose no other Vulkan symbol unless it is
4936 * linked with -Bsymbolic.
4937 * - Each dispatchable Vulkan handle created by the ICD must be
4938 * a pointer to a struct whose first member is VK_LOADER_DATA. The
4939 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
4940 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
4941 * vkDestroySurfaceKHR(). The ICD must be capable of working with
4942 * such loader-managed surfaces.
4944 * - Loader interface v2 differs from v1 in:
4945 * - The first ICD entrypoint called by the loader is
4946 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
4947 * statically expose this entrypoint.
4949 * - Loader interface v3 differs from v2 in:
4950 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
4951 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
4952 * because the loader no longer does so.
4954 * - Loader interface v4 differs from v3 in:
4955 * - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
4957 * - Loader interface v5 differs from v4 in:
4958 * - The ICD must support Vulkan API version 1.1 and must not return
4959 * VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a
4960 * Vulkan Loader with interface v4 or smaller is being used and the
4961 * application provides an API version that is greater than 1.0.
4963 *pSupportedVersion = MIN2(*pSupportedVersion, 5u);
4967 VkResult anv_GetPhysicalDeviceFragmentShadingRatesKHR(
4968 VkPhysicalDevice physicalDevice,
4969 uint32_t* pFragmentShadingRateCount,
4970 VkPhysicalDeviceFragmentShadingRateKHR* pFragmentShadingRates)
4972 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
4973 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
4974 pFragmentShadingRates, pFragmentShadingRateCount);
4976 #define append_rate(_samples, _width, _height) \
4978 vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, __r) { \
4979 __r->sampleCounts = _samples; \
4980 __r->fragmentSize = (VkExtent2D) { \
4982 .height = _height, \
4987 VkSampleCountFlags sample_counts =
4988 isl_device_get_sample_counts(&physical_device->isl_dev);
4990 /* BSpec 47003: There are a number of restrictions on the sample count
4991 * based off the coarse pixel size.
4993 static const VkSampleCountFlags cp_size_sample_limits[] = {
4994 [1] = ISL_SAMPLE_COUNT_16_BIT | ISL_SAMPLE_COUNT_8_BIT |
4995 ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4996 [2] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4997 [4] = ISL_SAMPLE_COUNT_4_BIT | ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4998 [8] = ISL_SAMPLE_COUNT_2_BIT | ISL_SAMPLE_COUNT_1_BIT,
4999 [16] = ISL_SAMPLE_COUNT_1_BIT,
5002 for (uint32_t x = 4; x >= 1; x /= 2) {
5003 for (uint32_t y = 4; y >= 1; y /= 2) {
5004 if (physical_device->info.has_coarse_pixel_primitive_and_cb) {
5006 * "CPsize 1x4 and 4x1 are not supported"
5008 if ((x == 1 && y == 4) || (x == 4 && y == 1))
5011 /* For size {1, 1}, the sample count must be ~0
5013 * 4x2 is also a specially case.
5015 if (x == 1 && y == 1)
5016 append_rate(~0, x, y);
5017 else if (x == 4 && y == 2)
5018 append_rate(ISL_SAMPLE_COUNT_1_BIT, x, y);
5020 append_rate(cp_size_sample_limits[x * y], x, y);
5022 /* For size {1, 1}, the sample count must be ~0 */
5023 if (x == 1 && y == 1)
5024 append_rate(~0, x, y);
5026 append_rate(sample_counts, x, y);
5033 return vk_outarray_status(&out);