From a252a9dc24d4c9eaf40bd20d3cab4009aebf660d Mon Sep 17 00:00:00 2001 From: Constantine Shablia Date: Sat, 29 Apr 2023 02:09:16 +0300 Subject: [PATCH] anv: move get_features after get_device_extensions (ugly diff) Reviewed-by: Faith Ekstrand Part-of: --- src/intel/vulkan/anv_device.c | 2144 ++++++++++++++++++++--------------------- 1 file changed, 1071 insertions(+), 1073 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index dbb42f5..1c45d7f 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -185,10 +185,6 @@ static const struct vk_instance_extension_table instance_extensions = { }; static void -get_features(const struct anv_physical_device *pdevice, - struct vk_features *features); - -static void get_device_extensions(const struct anv_physical_device *device, struct vk_device_extension_table *ext) { @@ -381,1249 +377,1251 @@ get_device_extensions(const struct anv_physical_device *device, }; } -static uint64_t -anv_compute_sys_heap_size(struct anv_physical_device *device, - uint64_t total_ram) +static void +get_features(const struct anv_physical_device *pdevice, + struct vk_features *features) { - /* We don't want to burn too much ram with the GPU. If the user has 4GiB - * or less, we use at most half. If they have more than 4GiB, we use 3/4. - */ - uint64_t available_ram; - if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull) - available_ram = total_ram / 2; - else - available_ram = total_ram * 3 / 4; + struct vk_app_info *app_info = &pdevice->instance->vk.app_info; - /* We also want to leave some padding for things we allocate in the driver, - * so don't go over 3/4 of the GTT either. - */ - available_ram = MIN2(available_ram, device->gtt_size * 3 / 4); + /* Just pick one; they're all the same */ + const bool has_astc_ldr = + isl_format_supports_sampling(&pdevice->info, + ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16); - if (available_ram > (2ull << 30) && !device->supports_48bit_addresses) { - /* When running with an overridden PCI ID, we may get a GTT size from - * the kernel that is greater than 2 GiB but the execbuf check for 48bit - * address support can still fail. Just clamp the address space size to - * 2 GiB if we don't have 48-bit support. - */ - mesa_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but " - "not support for 48-bit addresses", - __FILE__, __LINE__); - available_ram = 2ull << 30; - } + const bool rt_enabled = ANV_SUPPORT_RT && pdevice->info.has_ray_tracing; - return available_ram; -} + const bool mesh_shader = + pdevice->vk.supported_extensions.EXT_mesh_shader || + pdevice->vk.supported_extensions.NV_mesh_shader; -static VkResult MUST_CHECK -anv_init_meminfo(struct anv_physical_device *device, int fd) -{ - const struct intel_device_info *devinfo = &device->info; + *features = (struct vk_features) { + /* Vulkan 1.0 */ + .robustBufferAccess = true, + .fullDrawIndexUint32 = true, + .imageCubeArray = true, + .independentBlend = true, + .geometryShader = true, + .tessellationShader = true, + .sampleRateShading = true, + .dualSrcBlend = true, + .logicOp = true, + .multiDrawIndirect = true, + .drawIndirectFirstInstance = true, + .depthClamp = true, + .depthBiasClamp = true, + .fillModeNonSolid = true, + .depthBounds = pdevice->info.ver >= 12, + .wideLines = true, + .largePoints = true, + .alphaToOne = true, + .multiViewport = true, + .samplerAnisotropy = true, + .textureCompressionETC2 = true, + .textureCompressionASTC_LDR = has_astc_ldr, + .textureCompressionBC = true, + .occlusionQueryPrecise = true, + .pipelineStatisticsQuery = true, + /* We can't do image stores in vec4 shaders */ + .vertexPipelineStoresAndAtomics = + pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] && + pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY], + .fragmentStoresAndAtomics = true, + .shaderTessellationAndGeometryPointSize = true, + .shaderImageGatherExtended = true, + .shaderStorageImageExtendedFormats = true, + .shaderStorageImageMultisample = false, + /* Gfx12.5 has all the required format supported in HW for typed + * read/writes + */ + .shaderStorageImageReadWithoutFormat = pdevice->info.verx10 >= 125, + .shaderStorageImageWriteWithoutFormat = true, + .shaderUniformBufferArrayDynamicIndexing = true, + .shaderSampledImageArrayDynamicIndexing = true, + .shaderStorageBufferArrayDynamicIndexing = true, + .shaderStorageImageArrayDynamicIndexing = true, + .shaderClipDistance = true, + .shaderCullDistance = true, + .shaderFloat64 = pdevice->info.has_64bit_float, + .shaderInt64 = true, + .shaderInt16 = true, + .shaderResourceMinLod = true, + .variableMultisampleRate = true, + .inheritedQueries = true, - device->sys.region = &devinfo->mem.sram.mem; - device->sys.size = - anv_compute_sys_heap_size(device, devinfo->mem.sram.mappable.size); - device->sys.available = devinfo->mem.sram.mappable.free; + /* Vulkan 1.1 */ + .storageBuffer16BitAccess = !pdevice->instance->no_16bit, + .uniformAndStorageBuffer16BitAccess = !pdevice->instance->no_16bit, + .storagePushConstant16 = true, + .storageInputOutput16 = false, + .multiview = true, + .multiviewGeometryShader = true, + .multiviewTessellationShader = true, + .variablePointersStorageBuffer = true, + .variablePointers = true, + .protectedMemory = false, + .samplerYcbcrConversion = true, + .shaderDrawParameters = true, - device->vram_mappable.region = &devinfo->mem.vram.mem; - device->vram_mappable.size = devinfo->mem.vram.mappable.size; - device->vram_mappable.available = devinfo->mem.vram.mappable.free; + /* Vulkan 1.2 */ + .samplerMirrorClampToEdge = true, + .drawIndirectCount = true, + .storageBuffer8BitAccess = true, + .uniformAndStorageBuffer8BitAccess = true, + .storagePushConstant8 = true, + .shaderBufferInt64Atomics = true, + .shaderSharedInt64Atomics = false, + .shaderFloat16 = !pdevice->instance->no_16bit, + .shaderInt8 = !pdevice->instance->no_16bit, - device->vram_non_mappable.region = &devinfo->mem.vram.mem; - device->vram_non_mappable.size = devinfo->mem.vram.unmappable.size; - device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free; + .descriptorIndexing = true, + .shaderInputAttachmentArrayDynamicIndexing = false, + .shaderUniformTexelBufferArrayDynamicIndexing = true, + .shaderStorageTexelBufferArrayDynamicIndexing = true, + .shaderUniformBufferArrayNonUniformIndexing = true, + .shaderSampledImageArrayNonUniformIndexing = true, + .shaderStorageBufferArrayNonUniformIndexing = true, + .shaderStorageImageArrayNonUniformIndexing = true, + .shaderInputAttachmentArrayNonUniformIndexing = false, + .shaderUniformTexelBufferArrayNonUniformIndexing = true, + .shaderStorageTexelBufferArrayNonUniformIndexing = true, + .descriptorBindingUniformBufferUpdateAfterBind = true, + .descriptorBindingSampledImageUpdateAfterBind = true, + .descriptorBindingStorageImageUpdateAfterBind = true, + .descriptorBindingStorageBufferUpdateAfterBind = true, + .descriptorBindingUniformTexelBufferUpdateAfterBind = true, + .descriptorBindingStorageTexelBufferUpdateAfterBind = true, + .descriptorBindingUpdateUnusedWhilePending = true, + .descriptorBindingPartiallyBound = true, + .descriptorBindingVariableDescriptorCount = true, + .runtimeDescriptorArray = true, - return VK_SUCCESS; -} + .samplerFilterMinmax = true, + .scalarBlockLayout = true, + .imagelessFramebuffer = true, + .uniformBufferStandardLayout = true, + .shaderSubgroupExtendedTypes = true, + .separateDepthStencilLayouts = true, + .hostQueryReset = true, + .timelineSemaphore = true, + .bufferDeviceAddress = true, + .bufferDeviceAddressCaptureReplay = true, + .bufferDeviceAddressMultiDevice = false, + .vulkanMemoryModel = true, + .vulkanMemoryModelDeviceScope = true, + .vulkanMemoryModelAvailabilityVisibilityChains = true, + .shaderOutputViewportIndex = true, + .shaderOutputLayer = true, + .subgroupBroadcastDynamicId = true, -static void -anv_update_meminfo(struct anv_physical_device *device, int fd) -{ - if (!intel_device_info_update_memory_info(&device->info, fd)) - return; + /* Vulkan 1.3 */ + .robustImageAccess = true, + .inlineUniformBlock = true, + .descriptorBindingInlineUniformBlockUpdateAfterBind = true, + .pipelineCreationCacheControl = true, + .privateData = true, + .shaderDemoteToHelperInvocation = true, + .shaderTerminateInvocation = true, + .subgroupSizeControl = true, + .computeFullSubgroups = true, + .synchronization2 = true, + .textureCompressionASTC_HDR = false, + .shaderZeroInitializeWorkgroupMemory = true, + .dynamicRendering = true, + .shaderIntegerDotProduct = true, + .maintenance4 = true, - const struct intel_device_info *devinfo = &device->info; - device->sys.available = devinfo->mem.sram.mappable.free; - device->vram_mappable.available = devinfo->mem.vram.mappable.free; - device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free; -} + /* VK_EXT_4444_formats */ + .formatA4R4G4B4 = true, + .formatA4B4G4R4 = false, + /* VK_KHR_acceleration_structure */ + .accelerationStructure = rt_enabled, + .accelerationStructureCaptureReplay = false, /* TODO */ + .accelerationStructureIndirectBuild = false, /* TODO */ + .accelerationStructureHostCommands = false, + .descriptorBindingAccelerationStructureUpdateAfterBind = rt_enabled, -static VkResult -anv_physical_device_init_heaps(struct anv_physical_device *device, int fd) -{ - VkResult result = anv_init_meminfo(device, fd); - if (result != VK_SUCCESS) - return result; + /* VK_EXT_border_color_swizzle */ + .borderColorSwizzle = true, + .borderColorSwizzleFromImage = true, - assert(device->sys.size != 0); + /* VK_EXT_color_write_enable */ + .colorWriteEnable = true, - if (anv_physical_device_has_vram(device)) { - /* We can create 2 or 3 different heaps when we have local memory - * support, first heap with local memory size and second with system - * memory size and the third is added only if part of the vram is - * mappable to the host. - */ - device->memory.heap_count = 2; - device->memory.heaps[0] = (struct anv_memory_heap) { - /* If there is a vram_non_mappable, use that for the device only - * heap. Otherwise use the vram_mappable. - */ - .size = device->vram_non_mappable.size != 0 ? - device->vram_non_mappable.size : device->vram_mappable.size, - .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, - .is_local_mem = true, - }; - device->memory.heaps[1] = (struct anv_memory_heap) { - .size = device->sys.size, - .flags = 0, - .is_local_mem = false, - }; - /* Add an additional smaller vram mappable heap if we can't map all the - * vram to the host. - */ - if (device->vram_non_mappable.size > 0) { - device->memory.heap_count++; - device->memory.heaps[2] = (struct anv_memory_heap) { - .size = device->vram_mappable.size, - .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, - .is_local_mem = true, - }; - } + /* VK_EXT_image_2d_view_of_3d */ + .image2DViewOf3D = true, + .sampler2DViewOf3D = true, - device->memory.type_count = 3; - device->memory.types[0] = (struct anv_memory_type) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - .heapIndex = 0, - }; - device->memory.types[1] = (struct anv_memory_type) { - .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - .heapIndex = 1, - }; - device->memory.types[2] = (struct anv_memory_type) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - /* This memory type either comes from heaps[0] if there is only - * mappable vram region, or from heaps[2] if there is both mappable & - * non-mappable vram regions. - */ - .heapIndex = device->vram_non_mappable.size > 0 ? 2 : 0, - }; - } else if (device->info.has_llc) { - device->memory.heap_count = 1; - device->memory.heaps[0] = (struct anv_memory_heap) { - .size = device->sys.size, - .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, - .is_local_mem = false, - }; - - /* Big core GPUs share LLC with the CPU and thus one memory type can be - * both cached and coherent at the same time. - * - * But some game engines can't handle single type well - * https://gitlab.freedesktop.org/mesa/mesa/-/issues/7360#note_1719438 - * - * The second memory type w/out HOST_CACHED_BIT will get write-combining. - * See anv_AllocateMemory()). - * - * The Intel Vulkan driver for Windows also advertises these memory types. - */ - device->memory.type_count = 3; - device->memory.types[0] = (struct anv_memory_type) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - .heapIndex = 0, - }; - device->memory.types[1] = (struct anv_memory_type) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - .heapIndex = 0, - }; - device->memory.types[2] = (struct anv_memory_type) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - .heapIndex = 0, - }; - } else { - device->memory.heap_count = 1; - device->memory.heaps[0] = (struct anv_memory_heap) { - .size = device->sys.size, - .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, - .is_local_mem = false, - }; - - /* The spec requires that we expose a host-visible, coherent memory - * type, but Atom GPUs don't share LLC. Thus we offer two memory types - * to give the application a choice between cached, but not coherent and - * coherent but uncached (WC though). - */ - device->memory.type_count = 2; - device->memory.types[0] = (struct anv_memory_type) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT, - .heapIndex = 0, - }; - device->memory.types[1] = (struct anv_memory_type) { - .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | - VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, - .heapIndex = 0, - }; - } - - for (unsigned i = 0; i < device->memory.type_count; i++) { - VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags; - if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && - !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) -#ifdef SUPPORT_INTEL_INTEGRATED_GPUS - device->memory.need_clflush = true; -#else - return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, - "Memory configuration requires flushing, but it's not implemented for this architecture"); -#endif - } - - return VK_SUCCESS; -} - -static VkResult -anv_physical_device_init_uuids(struct anv_physical_device *device) -{ - const struct build_id_note *note = - build_id_find_nhdr_for_addr(anv_physical_device_init_uuids); - if (!note) { - return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, - "Failed to find build-id"); - } - - unsigned build_id_len = build_id_length(note); - if (build_id_len < 20) { - return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, - "build-id too short. It needs to be a SHA"); - } - - memcpy(device->driver_build_sha1, build_id_data(note), 20); - - struct mesa_sha1 sha1_ctx; - uint8_t sha1[20]; - STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1)); + /* VK_EXT_image_sliced_view_of_3d */ + .imageSlicedViewOf3D = true, - /* The pipeline cache UUID is used for determining when a pipeline cache is - * invalid. It needs both a driver build and the PCI ID of the device. - */ - _mesa_sha1_init(&sha1_ctx); - _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len); - _mesa_sha1_update(&sha1_ctx, &device->info.pci_device_id, - sizeof(device->info.pci_device_id)); - _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless, - sizeof(device->always_use_bindless)); - _mesa_sha1_final(&sha1_ctx, sha1); - memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE); + /* VK_NV_compute_shader_derivatives */ + .computeDerivativeGroupQuads = true, + .computeDerivativeGroupLinear = true, - intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE); - intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE); + /* VK_EXT_conditional_rendering */ + .conditionalRendering = true, + .inheritedConditionalRendering = true, - return VK_SUCCESS; -} + /* VK_EXT_custom_border_color */ + .customBorderColors = true, + .customBorderColorWithoutFormat = true, -static void -anv_physical_device_init_disk_cache(struct anv_physical_device *device) -{ -#ifdef ENABLE_SHADER_CACHE - char renderer[10]; - ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x", - device->info.pci_device_id); - assert(len == sizeof(renderer) - 2); + /* VK_EXT_depth_clamp_zero_one */ + .depthClampZeroOne = true, - char timestamp[41]; - _mesa_sha1_format(timestamp, device->driver_build_sha1); + /* VK_EXT_depth_clip_enable */ + .depthClipEnable = true, - const uint64_t driver_flags = - brw_get_compiler_config_value(device->compiler); - device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags); -#endif -} + /* VK_EXT_fragment_shader_interlock */ + .fragmentShaderSampleInterlock = true, + .fragmentShaderPixelInterlock = true, + .fragmentShaderShadingRateInterlock = false, -static void -anv_physical_device_free_disk_cache(struct anv_physical_device *device) -{ -#ifdef ENABLE_SHADER_CACHE - if (device->vk.disk_cache) { - disk_cache_destroy(device->vk.disk_cache); - device->vk.disk_cache = NULL; - } -#else - assert(device->vk.disk_cache == NULL); -#endif -} + /* VK_EXT_global_priority_query */ + .globalPriorityQuery = true, -/* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of - * queue overrides. - * - * To override the number queues: - * * "gc" is for graphics queues with compute support - * * "g" is for graphics queues with no compute support - * * "c" is for compute queues with no graphics support - * * "v" is for video queues with no graphics support - * - * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of - * advertised queues to be 2 queues with graphics+compute support, and 1 queue - * with compute-only support. - * - * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to - * include 1 queue with compute-only support, but it will not change the - * number of graphics+compute queues. - * - * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues - * to include 1 queue with compute-only support, and it would override the - * number of graphics+compute queues to be 0. - */ -static void -anv_override_engine_counts(int *gc_count, int *g_count, int *c_count, int *v_count) -{ - int gc_override = -1; - int g_override = -1; - int c_override = -1; - int v_override = -1; - char *env = getenv("ANV_QUEUE_OVERRIDE"); + /* VK_EXT_graphics_pipeline_library */ + .graphicsPipelineLibrary = pdevice->gpl_enabled, - if (env == NULL) - return; + /* VK_KHR_fragment_shading_rate */ + .pipelineFragmentShadingRate = true, + .primitiveFragmentShadingRate = + pdevice->info.has_coarse_pixel_primitive_and_cb, + .attachmentFragmentShadingRate = + pdevice->info.has_coarse_pixel_primitive_and_cb, - env = strdup(env); - char *save = NULL; - char *next = strtok_r(env, ",", &save); - while (next != NULL) { - if (strncmp(next, "gc=", 3) == 0) { - gc_override = strtol(next + 3, NULL, 0); - } else if (strncmp(next, "g=", 2) == 0) { - g_override = strtol(next + 2, NULL, 0); - } else if (strncmp(next, "c=", 2) == 0) { - c_override = strtol(next + 2, NULL, 0); - } else if (strncmp(next, "v=", 2) == 0) { - v_override = strtol(next + 2, NULL, 0); - } else { - mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next); - } - next = strtok_r(NULL, ",", &save); - } - free(env); - if (gc_override >= 0) - *gc_count = gc_override; - if (g_override >= 0) - *g_count = g_override; - if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0)) - mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the " - "Vulkan specification"); - if (c_override >= 0) - *c_count = c_override; - if (v_override >= 0) - *v_count = v_override; -} + /* VK_EXT_image_view_min_lod */ + .minLod = true, -static void -anv_physical_device_init_queue_families(struct anv_physical_device *pdevice) -{ - uint32_t family_count = 0; + /* VK_EXT_index_type_uint8 */ + .indexTypeUint8 = true, - if (pdevice->engine_info) { - int gc_count = - intel_engines_count(pdevice->engine_info, - INTEL_ENGINE_CLASS_RENDER); - int v_count = - intel_engines_count(pdevice->engine_info, INTEL_ENGINE_CLASS_VIDEO); - int g_count = 0; - int c_count = 0; - if (debug_get_bool_option("INTEL_COMPUTE_CLASS", false)) - c_count = intel_engines_count(pdevice->engine_info, - INTEL_ENGINE_CLASS_COMPUTE); - enum intel_engine_class compute_class = - c_count < 1 ? INTEL_ENGINE_CLASS_RENDER : INTEL_ENGINE_CLASS_COMPUTE; + /* VK_EXT_line_rasterization */ + /* Rectangular lines must use the strict algorithm, which is not + * supported for wide lines prior to ICL. See rasterization_mode for + * details and how the HW states are programmed. + */ + .rectangularLines = pdevice->info.ver >= 10, + .bresenhamLines = true, + /* Support for Smooth lines with MSAA was removed on gfx11. From the + * BSpec section "Multisample ModesState" table for "AA Line Support + * Requirements": + * + * GFX10:BUG:######## NUM_MULTISAMPLES == 1 + * + * Fortunately, this isn't a case most people care about. + */ + .smoothLines = pdevice->info.ver < 10, + .stippledRectangularLines = false, + .stippledBresenhamLines = true, + .stippledSmoothLines = false, - anv_override_engine_counts(&gc_count, &g_count, &c_count, &v_count); + /* VK_NV_mesh_shader */ + .taskShaderNV = mesh_shader, + .meshShaderNV = mesh_shader, - if (gc_count > 0) { - pdevice->queue.families[family_count++] = (struct anv_queue_family) { - .queueFlags = VK_QUEUE_GRAPHICS_BIT | - VK_QUEUE_COMPUTE_BIT | - VK_QUEUE_TRANSFER_BIT, - .queueCount = gc_count, - .engine_class = INTEL_ENGINE_CLASS_RENDER, - }; - } - if (g_count > 0) { - pdevice->queue.families[family_count++] = (struct anv_queue_family) { - .queueFlags = VK_QUEUE_GRAPHICS_BIT | - VK_QUEUE_TRANSFER_BIT, - .queueCount = g_count, - .engine_class = INTEL_ENGINE_CLASS_RENDER, - }; - } - if (c_count > 0) { - pdevice->queue.families[family_count++] = (struct anv_queue_family) { - .queueFlags = VK_QUEUE_COMPUTE_BIT | - VK_QUEUE_TRANSFER_BIT, - .queueCount = c_count, - .engine_class = compute_class, - }; - } - if (v_count > 0 && pdevice->video_decode_enabled) { - pdevice->queue.families[family_count++] = (struct anv_queue_family) { - .queueFlags = VK_QUEUE_VIDEO_DECODE_BIT_KHR, - .queueCount = v_count, - .engine_class = INTEL_ENGINE_CLASS_VIDEO, - }; - } - /* Increase count below when other families are added as a reminder to - * increase the ANV_MAX_QUEUE_FAMILIES value. - */ - STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 4); - } else { - /* Default to a single render queue */ - pdevice->queue.families[family_count++] = (struct anv_queue_family) { - .queueFlags = VK_QUEUE_GRAPHICS_BIT | - VK_QUEUE_COMPUTE_BIT | - VK_QUEUE_TRANSFER_BIT, - .queueCount = 1, - .engine_class = INTEL_ENGINE_CLASS_RENDER, - }; - family_count = 1; - } - assert(family_count <= ANV_MAX_QUEUE_FAMILIES); - pdevice->queue.family_count = family_count; -} + /* VK_EXT_mesh_shader */ + .taskShader = mesh_shader, + .meshShader = mesh_shader, + .multiviewMeshShader = false, + .primitiveFragmentShadingRateMeshShader = mesh_shader, + .meshShaderQueries = false, -static VkResult -anv_physical_device_get_parameters(struct anv_physical_device *device) -{ - switch (device->info.kmd_type) { - case INTEL_KMD_TYPE_I915: - return anv_i915_physical_device_get_parameters(device); - case INTEL_KMD_TYPE_XE: - return anv_xe_physical_device_get_parameters(device); - default: - unreachable("Missing"); - return VK_ERROR_UNKNOWN; - } -} + /* VK_EXT_mutable_descriptor_type */ + .mutableDescriptorType = true, -static VkResult -anv_physical_device_try_create(struct vk_instance *vk_instance, - struct _drmDevice *drm_device, - struct vk_physical_device **out) -{ - struct anv_instance *instance = - container_of(vk_instance, struct anv_instance, vk); + /* VK_KHR_performance_query */ + .performanceCounterQueryPools = true, + /* HW only supports a single configuration at a time. */ + .performanceCounterMultipleQueryPools = false, - if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) || - drm_device->bustype != DRM_BUS_PCI || - drm_device->deviceinfo.pci->vendor_id != 0x8086) - return VK_ERROR_INCOMPATIBLE_DRIVER; + /* VK_KHR_pipeline_executable_properties */ + .pipelineExecutableInfo = true, - const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY]; - const char *path = drm_device->nodes[DRM_NODE_RENDER]; - VkResult result; - int fd; - int master_fd = -1; + /* VK_EXT_primitives_generated_query */ + .primitivesGeneratedQuery = true, + .primitivesGeneratedQueryWithRasterizerDiscard = false, + .primitivesGeneratedQueryWithNonZeroStreams = false, - brw_process_intel_debug_variable(); + /* VK_EXT_pipeline_library_group_handles */ + .pipelineLibraryGroupHandles = true, - fd = open(path, O_RDWR | O_CLOEXEC); - if (fd < 0) { - if (errno == ENOMEM) { - return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY, - "Unable to open device %s: out of memory", path); - } - return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, - "Unable to open device %s: %m", path); - } + /* VK_EXT_provoking_vertex */ + .provokingVertexLast = true, + .transformFeedbackPreservesProvokingVertex = true, - struct intel_device_info devinfo; - if (!intel_get_device_info_from_fd(fd, &devinfo)) { - result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER); - goto fail_fd; - } + /* VK_KHR_ray_query */ + .rayQuery = rt_enabled, - if (devinfo.ver > 12) { - result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, - "Vulkan not yet supported on %s", devinfo.name); - goto fail_fd; - } else if (devinfo.ver < 9) { - /* Silently fail here, hasvk should pick up this device. */ - result = VK_ERROR_INCOMPATIBLE_DRIVER; - goto fail_fd; - } + /* VK_KHR_ray_tracing_maintenance1 */ + .rayTracingMaintenance1 = rt_enabled, + .rayTracingPipelineTraceRaysIndirect2 = rt_enabled, - struct anv_physical_device *device = - vk_zalloc(&instance->vk.alloc, sizeof(*device), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - if (device == NULL) { - result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_fd; - } + /* VK_KHR_ray_tracing_pipeline */ + .rayTracingPipeline = rt_enabled, + .rayTracingPipelineShaderGroupHandleCaptureReplay = false, + .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false, + .rayTracingPipelineTraceRaysIndirect = rt_enabled, + .rayTraversalPrimitiveCulling = rt_enabled, - struct vk_physical_device_dispatch_table dispatch_table; - vk_physical_device_dispatch_table_from_entrypoints( - &dispatch_table, &anv_physical_device_entrypoints, true); - vk_physical_device_dispatch_table_from_entrypoints( - &dispatch_table, &wsi_physical_device_entrypoints, false); + /* VK_EXT_robustness2 */ + .robustBufferAccess2 = true, + .robustImageAccess2 = true, + .nullDescriptor = true, - result = vk_physical_device_init(&device->vk, &instance->vk, - NULL, NULL, /* We set up extensions later */ - &dispatch_table); - if (result != VK_SUCCESS) { - vk_error(instance, result); - goto fail_alloc; - } - device->instance = instance; + /* VK_EXT_shader_atomic_float */ + .shaderBufferFloat32Atomics = true, + .shaderBufferFloat32AtomicAdd = pdevice->info.has_lsc, + .shaderBufferFloat64Atomics = + pdevice->info.has_64bit_float && pdevice->info.has_lsc, + .shaderBufferFloat64AtomicAdd = false, + .shaderSharedFloat32Atomics = true, + .shaderSharedFloat32AtomicAdd = false, + .shaderSharedFloat64Atomics = false, + .shaderSharedFloat64AtomicAdd = false, + .shaderImageFloat32Atomics = true, + .shaderImageFloat32AtomicAdd = false, + .sparseImageFloat32Atomics = false, + .sparseImageFloat32AtomicAdd = false, - assert(strlen(path) < ARRAY_SIZE(device->path)); - snprintf(device->path, ARRAY_SIZE(device->path), "%s", path); + /* VK_EXT_shader_atomic_float2 */ + .shaderBufferFloat16Atomics = pdevice->info.has_lsc, + .shaderBufferFloat16AtomicAdd = false, + .shaderBufferFloat16AtomicMinMax = pdevice->info.has_lsc, + .shaderBufferFloat32AtomicMinMax = true, + .shaderBufferFloat64AtomicMinMax = + pdevice->info.has_64bit_float && pdevice->info.has_lsc, + .shaderSharedFloat16Atomics = pdevice->info.has_lsc, + .shaderSharedFloat16AtomicAdd = false, + .shaderSharedFloat16AtomicMinMax = pdevice->info.has_lsc, + .shaderSharedFloat32AtomicMinMax = true, + .shaderSharedFloat64AtomicMinMax = false, + .shaderImageFloat32AtomicMinMax = false, + .sparseImageFloat32AtomicMinMax = false, - device->info = devinfo; + /* VK_KHR_shader_clock */ + .shaderSubgroupClock = true, + .shaderDeviceClock = false, - device->local_fd = fd; - result = anv_physical_device_get_parameters(device); - if (result != VK_SUCCESS) - goto fail_base; + /* VK_INTEL_shader_integer_functions2 */ + .shaderIntegerFunctions2 = true, - device->gtt_size = device->info.gtt_size ? device->info.gtt_size : - device->info.aperture_bytes; + /* VK_EXT_shader_module_identifier */ + .shaderModuleIdentifier = true, - /* We only allow 48-bit addresses with softpin because knowing the actual - * address is required for the vertex cache flush workaround. - */ - device->supports_48bit_addresses = - device->gtt_size > (4ULL << 30 /* GiB */); + /* VK_KHR_shader_subgroup_uniform_control_flow */ + .shaderSubgroupUniformControlFlow = true, - /* We currently only have the right bits for instructions in Gen12+. If the - * kernel ever starts supporting that feature on previous generations, - * we'll need to edit genxml prior to enabling here. - */ - device->has_protected_contexts = device->info.ver >= 12 && - intel_gem_supports_protected_context(fd, device->info.kmd_type); + /* VK_EXT_texel_buffer_alignment */ + .texelBufferAlignment = true, - result = anv_physical_device_init_heaps(device, fd); - if (result != VK_SUCCESS) - goto fail_base; + /* VK_EXT_transform_feedback */ + .transformFeedback = true, + .geometryStreams = true, - if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false)) - device->has_exec_timeline = false; + /* VK_EXT_vertex_attribute_divisor */ + .vertexAttributeInstanceRateDivisor = true, + .vertexAttributeInstanceRateZeroDivisor = true, + /* VK_KHR_workgroup_memory_explicit_layout */ + .workgroupMemoryExplicitLayout = true, + .workgroupMemoryExplicitLayoutScalarBlockLayout = true, + .workgroupMemoryExplicitLayout8BitAccess = true, + .workgroupMemoryExplicitLayout16BitAccess = true, - device->generated_indirect_draws = - debug_get_bool_option("ANV_ENABLE_GENERATED_INDIRECT_DRAWS", - true); + /* VK_EXT_ycbcr_image_arrays */ + .ycbcrImageArrays = true, - /* The GPL implementation is new, and may have issues in conjunction with - * mesh shading. Enable it by default for zink for performance reasons (where - * mesh shading is unused anyway), and have an env var for testing in CI or - * by end users. - * */ - if (debug_get_bool_option("ANV_GPL", - instance->vk.app_info.engine_name != NULL && - (strcmp(instance->vk.app_info.engine_name, "mesa zink") == 0 || - strcmp(instance->vk.app_info.engine_name, "DXVK") == 0))) { - device->gpl_enabled = true; - } + /* VK_EXT_extended_dynamic_state */ + .extendedDynamicState = true, + + /* VK_EXT_extended_dynamic_state2 */ + .extendedDynamicState2 = true, + .extendedDynamicState2LogicOp = true, + .extendedDynamicState2PatchControlPoints = false, + + /* VK_EXT_extended_dynamic_state3 */ + .extendedDynamicState3PolygonMode = true, + .extendedDynamicState3TessellationDomainOrigin = true, + .extendedDynamicState3RasterizationStream = true, + .extendedDynamicState3LineStippleEnable = true, + .extendedDynamicState3LineRasterizationMode = true, + .extendedDynamicState3LogicOpEnable = true, + .extendedDynamicState3AlphaToOneEnable = true, + .extendedDynamicState3DepthClipEnable = true, + .extendedDynamicState3DepthClampEnable = true, + .extendedDynamicState3DepthClipNegativeOneToOne = true, + .extendedDynamicState3ProvokingVertexMode = true, + .extendedDynamicState3ColorBlendEnable = true, + .extendedDynamicState3ColorWriteMask = true, + .extendedDynamicState3ColorBlendEquation = true, + .extendedDynamicState3SampleLocationsEnable = true, + .extendedDynamicState3SampleMask = true, + + .extendedDynamicState3RasterizationSamples = false, + .extendedDynamicState3AlphaToCoverageEnable = false, + .extendedDynamicState3ConservativeRasterizationMode = false, + .extendedDynamicState3ExtraPrimitiveOverestimationSize = false, + .extendedDynamicState3ViewportWScalingEnable = false, + .extendedDynamicState3ViewportSwizzle = false, + .extendedDynamicState3ShadingRateImageEnable = false, + .extendedDynamicState3CoverageToColorEnable = false, + .extendedDynamicState3CoverageToColorLocation = false, + .extendedDynamicState3CoverageModulationMode = false, + .extendedDynamicState3CoverageModulationTableEnable = false, + .extendedDynamicState3CoverageModulationTable = false, + .extendedDynamicState3CoverageReductionMode = false, + .extendedDynamicState3RepresentativeFragmentTestEnable = false, + .extendedDynamicState3ColorBlendAdvanced = false, + + /* VK_EXT_multi_draw */ + .multiDraw = true, + + /* VK_EXT_non_seamless_cube_map */ + .nonSeamlessCubeMap = true, + + /* VK_EXT_primitive_topology_list_restart */ + .primitiveTopologyListRestart = true, + .primitiveTopologyPatchListRestart = true, + + /* VK_EXT_depth_clip_control */ + .depthClipControl = true, + + /* VK_KHR_present_id */ + .presentId = pdevice->vk.supported_extensions.KHR_present_id, + + /* VK_KHR_present_wait */ + .presentWait = pdevice->vk.supported_extensions.KHR_present_wait, - unsigned st_idx = 0; + /* VK_EXT_vertex_input_dynamic_state */ + .vertexInputDynamicState = true, + }; - device->sync_syncobj_type = vk_drm_syncobj_get_type(fd); - if (!device->has_exec_timeline) - device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE; - device->sync_types[st_idx++] = &device->sync_syncobj_type; + /* The new DOOM and Wolfenstein games require depthBounds without + * checking for it. They seem to run fine without it so just claim it's + * there and accept the consequences. + */ + if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0) + features->depthBounds = true; +} - /* anv_bo_sync_type is only supported with i915 for now */ - if (device->info.kmd_type == INTEL_KMD_TYPE_I915) { - if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT)) - device->sync_types[st_idx++] = &anv_bo_sync_type; +static uint64_t +anv_compute_sys_heap_size(struct anv_physical_device *device, + uint64_t total_ram) +{ + /* We don't want to burn too much ram with the GPU. If the user has 4GiB + * or less, we use at most half. If they have more than 4GiB, we use 3/4. + */ + uint64_t available_ram; + if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull) + available_ram = total_ram / 2; + else + available_ram = total_ram * 3 / 4; - if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) { - device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type); - device->sync_types[st_idx++] = &device->sync_timeline_type.sync; - } - } else { - assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE); - assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT); - } + /* We also want to leave some padding for things we allocate in the driver, + * so don't go over 3/4 of the GTT either. + */ + available_ram = MIN2(available_ram, device->gtt_size * 3 / 4); - device->sync_types[st_idx++] = NULL; - assert(st_idx <= ARRAY_SIZE(device->sync_types)); - device->vk.supported_sync_types = device->sync_types; + if (available_ram > (2ull << 30) && !device->supports_48bit_addresses) { + /* When running with an overridden PCI ID, we may get a GTT size from + * the kernel that is greater than 2 GiB but the execbuf check for 48bit + * address support can still fail. Just clamp the address space size to + * 2 GiB if we don't have 48-bit support. + */ + mesa_logw("%s:%d: The kernel reported a GTT size larger than 2 GiB but " + "not support for 48-bit addresses", + __FILE__, __LINE__); + available_ram = 2ull << 30; + } - device->vk.pipeline_cache_import_ops = anv_cache_import_ops; + return available_ram; +} - device->always_use_bindless = - debug_get_bool_option("ANV_ALWAYS_BINDLESS", false); +static VkResult MUST_CHECK +anv_init_meminfo(struct anv_physical_device *device, int fd) +{ + const struct intel_device_info *devinfo = &device->info; - device->use_call_secondary = - !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false); + device->sys.region = &devinfo->mem.sram.mem; + device->sys.size = + anv_compute_sys_heap_size(device, devinfo->mem.sram.mappable.size); + device->sys.available = devinfo->mem.sram.mappable.free; - device->has_implicit_ccs = device->info.has_aux_map || - device->info.verx10 >= 125; + device->vram_mappable.region = &devinfo->mem.vram.mem; + device->vram_mappable.size = devinfo->mem.vram.mappable.size; + device->vram_mappable.available = devinfo->mem.vram.mappable.free; - device->video_decode_enabled = debug_get_bool_option("ANV_VIDEO_DECODE", false); + device->vram_non_mappable.region = &devinfo->mem.vram.mem; + device->vram_non_mappable.size = devinfo->mem.vram.unmappable.size; + device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free; - /* Check if we can read the GPU timestamp register from the CPU */ - uint64_t u64_ignore; - device->has_reg_timestamp = intel_gem_read_render_timestamp(fd, - device->info.kmd_type, - &u64_ignore); + return VK_SUCCESS; +} - device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) || - driQueryOptionb(&instance->dri_options, "always_flush_cache"); +static void +anv_update_meminfo(struct anv_physical_device *device, int fd) +{ + if (!intel_device_info_update_memory_info(&device->info, fd)) + return; - device->compiler = brw_compiler_create(NULL, &device->info); - if (device->compiler == NULL) { - result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); - goto fail_base; - } - device->compiler->shader_debug_log = compiler_debug_log; - device->compiler->shader_perf_log = compiler_perf_log; - device->compiler->constant_buffer_0_is_relative = - !device->info.has_context_isolation; - device->compiler->supports_shader_constants = true; - device->compiler->indirect_ubos_use_sampler = device->info.ver < 12; + const struct intel_device_info *devinfo = &device->info; + device->sys.available = devinfo->mem.sram.mappable.free; + device->vram_mappable.available = devinfo->mem.vram.mappable.free; + device->vram_non_mappable.available = devinfo->mem.vram.unmappable.free; +} - isl_device_init(&device->isl_dev, &device->info); - result = anv_physical_device_init_uuids(device); +static VkResult +anv_physical_device_init_heaps(struct anv_physical_device *device, int fd) +{ + VkResult result = anv_init_meminfo(device, fd); if (result != VK_SUCCESS) - goto fail_compiler; + return result; - anv_physical_device_init_disk_cache(device); + assert(device->sys.size != 0); - if (instance->vk.enabled_extensions.KHR_display) { - master_fd = open(primary_path, O_RDWR | O_CLOEXEC); - if (master_fd >= 0) { - /* fail if we don't have permission to even render on this device */ - if (!intel_gem_can_render_on_fd(master_fd, device->info.kmd_type)) { - close(master_fd); - master_fd = -1; - } + if (anv_physical_device_has_vram(device)) { + /* We can create 2 or 3 different heaps when we have local memory + * support, first heap with local memory size and second with system + * memory size and the third is added only if part of the vram is + * mappable to the host. + */ + device->memory.heap_count = 2; + device->memory.heaps[0] = (struct anv_memory_heap) { + /* If there is a vram_non_mappable, use that for the device only + * heap. Otherwise use the vram_mappable. + */ + .size = device->vram_non_mappable.size != 0 ? + device->vram_non_mappable.size : device->vram_mappable.size, + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + .is_local_mem = true, + }; + device->memory.heaps[1] = (struct anv_memory_heap) { + .size = device->sys.size, + .flags = 0, + .is_local_mem = false, + }; + /* Add an additional smaller vram mappable heap if we can't map all the + * vram to the host. + */ + if (device->vram_non_mappable.size > 0) { + device->memory.heap_count++; + device->memory.heaps[2] = (struct anv_memory_heap) { + .size = device->vram_mappable.size, + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + .is_local_mem = true, + }; } - } - device->master_fd = master_fd; - device->engine_info = intel_engine_get_info(fd, device->info.kmd_type); - device->info.has_compute_engine = intel_engines_count(device->engine_info, - INTEL_ENGINE_CLASS_COMPUTE); - anv_physical_device_init_queue_families(device); + device->memory.type_count = 3; + device->memory.types[0] = (struct anv_memory_type) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + .heapIndex = 0, + }; + device->memory.types[1] = (struct anv_memory_type) { + .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = 1, + }; + device->memory.types[2] = (struct anv_memory_type) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + /* This memory type either comes from heaps[0] if there is only + * mappable vram region, or from heaps[2] if there is both mappable & + * non-mappable vram regions. + */ + .heapIndex = device->vram_non_mappable.size > 0 ? 2 : 0, + }; + } else if (device->info.has_llc) { + device->memory.heap_count = 1; + device->memory.heaps[0] = (struct anv_memory_heap) { + .size = device->sys.size, + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + .is_local_mem = false, + }; - anv_physical_device_init_perf(device, fd); + /* Big core GPUs share LLC with the CPU and thus one memory type can be + * both cached and coherent at the same time. + * + * But some game engines can't handle single type well + * https://gitlab.freedesktop.org/mesa/mesa/-/issues/7360#note_1719438 + * + * The second memory type w/out HOST_CACHED_BIT will get write-combining. + * See anv_AllocateMemory()). + * + * The Intel Vulkan driver for Windows also advertises these memory types. + */ + device->memory.type_count = 3; + device->memory.types[0] = (struct anv_memory_type) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + .heapIndex = 0, + }; + device->memory.types[1] = (struct anv_memory_type) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + .heapIndex = 0, + }; + device->memory.types[2] = (struct anv_memory_type) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = 0, + }; + } else { + device->memory.heap_count = 1; + device->memory.heaps[0] = (struct anv_memory_heap) { + .size = device->sys.size, + .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, + .is_local_mem = false, + }; - get_device_extensions(device, &device->vk.supported_extensions); - get_features(device, &device->vk.supported_features); + /* The spec requires that we expose a host-visible, coherent memory + * type, but Atom GPUs don't share LLC. Thus we offer two memory types + * to give the application a choice between cached, but not coherent and + * coherent but uncached (WC though). + */ + device->memory.type_count = 2; + device->memory.types[0] = (struct anv_memory_type) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT, + .heapIndex = 0, + }; + device->memory.types[1] = (struct anv_memory_type) { + .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, + .heapIndex = 0, + }; + } + + for (unsigned i = 0; i < device->memory.type_count; i++) { + VkMemoryPropertyFlags props = device->memory.types[i].propertyFlags; + if ((props & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && + !(props & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) +#ifdef SUPPORT_INTEL_INTEGRATED_GPUS + device->memory.need_clflush = true; +#else + return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, + "Memory configuration requires flushing, but it's not implemented for this architecture"); +#endif + } - /* Gather major/minor before WSI. */ - struct stat st; + return VK_SUCCESS; +} - if (stat(primary_path, &st) == 0) { - device->has_master = true; - device->master_major = major(st.st_rdev); - device->master_minor = minor(st.st_rdev); - } else { - device->has_master = false; - device->master_major = 0; - device->master_minor = 0; +static VkResult +anv_physical_device_init_uuids(struct anv_physical_device *device) +{ + const struct build_id_note *note = + build_id_find_nhdr_for_addr(anv_physical_device_init_uuids); + if (!note) { + return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, + "Failed to find build-id"); } - if (stat(path, &st) == 0) { - device->has_local = true; - device->local_major = major(st.st_rdev); - device->local_minor = minor(st.st_rdev); - } else { - device->has_local = false; - device->local_major = 0; - device->local_minor = 0; + unsigned build_id_len = build_id_length(note); + if (build_id_len < 20) { + return vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED, + "build-id too short. It needs to be a SHA"); } - result = anv_init_wsi(device); - if (result != VK_SUCCESS) - goto fail_perf; + memcpy(device->driver_build_sha1, build_id_data(note), 20); - anv_measure_device_init(device); + struct mesa_sha1 sha1_ctx; + uint8_t sha1[20]; + STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1)); - anv_genX(&device->info, init_physical_device_state)(device); + /* The pipeline cache UUID is used for determining when a pipeline cache is + * invalid. It needs both a driver build and the PCI ID of the device. + */ + _mesa_sha1_init(&sha1_ctx); + _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len); + _mesa_sha1_update(&sha1_ctx, &device->info.pci_device_id, + sizeof(device->info.pci_device_id)); + _mesa_sha1_update(&sha1_ctx, &device->always_use_bindless, + sizeof(device->always_use_bindless)); + _mesa_sha1_final(&sha1_ctx, sha1); + memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE); - *out = &device->vk; + intel_uuid_compute_driver_id(device->driver_uuid, &device->info, VK_UUID_SIZE); + intel_uuid_compute_device_id(device->device_uuid, &device->info, VK_UUID_SIZE); return VK_SUCCESS; - -fail_perf: - ralloc_free(device->perf); - free(device->engine_info); - anv_physical_device_free_disk_cache(device); -fail_compiler: - ralloc_free(device->compiler); -fail_base: - vk_physical_device_finish(&device->vk); -fail_alloc: - vk_free(&instance->vk.alloc, device); -fail_fd: - close(fd); - if (master_fd != -1) - close(master_fd); - return result; } static void -anv_physical_device_destroy(struct vk_physical_device *vk_device) +anv_physical_device_init_disk_cache(struct anv_physical_device *device) { - struct anv_physical_device *device = - container_of(vk_device, struct anv_physical_device, vk); +#ifdef ENABLE_SHADER_CACHE + char renderer[10]; + ASSERTED int len = snprintf(renderer, sizeof(renderer), "anv_%04x", + device->info.pci_device_id); + assert(len == sizeof(renderer) - 2); - anv_finish_wsi(device); - anv_measure_device_destroy(device); - free(device->engine_info); - anv_physical_device_free_disk_cache(device); - ralloc_free(device->compiler); - ralloc_free(device->perf); - close(device->local_fd); - if (device->master_fd >= 0) - close(device->master_fd); - vk_physical_device_finish(&device->vk); - vk_free(&device->instance->vk.alloc, device); + char timestamp[41]; + _mesa_sha1_format(timestamp, device->driver_build_sha1); + + const uint64_t driver_flags = + brw_get_compiler_config_value(device->compiler); + device->vk.disk_cache = disk_cache_create(renderer, timestamp, driver_flags); +#endif } -VkResult anv_EnumerateInstanceExtensionProperties( - const char* pLayerName, - uint32_t* pPropertyCount, - VkExtensionProperties* pProperties) +static void +anv_physical_device_free_disk_cache(struct anv_physical_device *device) { - if (pLayerName) - return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); +#ifdef ENABLE_SHADER_CACHE + if (device->vk.disk_cache) { + disk_cache_destroy(device->vk.disk_cache); + device->vk.disk_cache = NULL; + } +#else + assert(device->vk.disk_cache == NULL); +#endif +} - return vk_enumerate_instance_extension_properties( - &instance_extensions, pPropertyCount, pProperties); +/* The ANV_QUEUE_OVERRIDE environment variable is a comma separated list of + * queue overrides. + * + * To override the number queues: + * * "gc" is for graphics queues with compute support + * * "g" is for graphics queues with no compute support + * * "c" is for compute queues with no graphics support + * * "v" is for video queues with no graphics support + * + * For example, ANV_QUEUE_OVERRIDE=gc=2,c=1 would override the number of + * advertised queues to be 2 queues with graphics+compute support, and 1 queue + * with compute-only support. + * + * ANV_QUEUE_OVERRIDE=c=1 would override the number of advertised queues to + * include 1 queue with compute-only support, but it will not change the + * number of graphics+compute queues. + * + * ANV_QUEUE_OVERRIDE=gc=0,c=1 would override the number of advertised queues + * to include 1 queue with compute-only support, and it would override the + * number of graphics+compute queues to be 0. + */ +static void +anv_override_engine_counts(int *gc_count, int *g_count, int *c_count, int *v_count) +{ + int gc_override = -1; + int g_override = -1; + int c_override = -1; + int v_override = -1; + char *env = getenv("ANV_QUEUE_OVERRIDE"); + + if (env == NULL) + return; + + env = strdup(env); + char *save = NULL; + char *next = strtok_r(env, ",", &save); + while (next != NULL) { + if (strncmp(next, "gc=", 3) == 0) { + gc_override = strtol(next + 3, NULL, 0); + } else if (strncmp(next, "g=", 2) == 0) { + g_override = strtol(next + 2, NULL, 0); + } else if (strncmp(next, "c=", 2) == 0) { + c_override = strtol(next + 2, NULL, 0); + } else if (strncmp(next, "v=", 2) == 0) { + v_override = strtol(next + 2, NULL, 0); + } else { + mesa_logw("Ignoring unsupported ANV_QUEUE_OVERRIDE token: %s", next); + } + next = strtok_r(NULL, ",", &save); + } + free(env); + if (gc_override >= 0) + *gc_count = gc_override; + if (g_override >= 0) + *g_count = g_override; + if (*g_count > 0 && *gc_count <= 0 && (gc_override >= 0 || g_override >= 0)) + mesa_logw("ANV_QUEUE_OVERRIDE: gc=0 with g > 0 violates the " + "Vulkan specification"); + if (c_override >= 0) + *c_count = c_override; + if (v_override >= 0) + *v_count = v_override; } static void -anv_init_dri_options(struct anv_instance *instance) +anv_physical_device_init_queue_families(struct anv_physical_device *pdevice) { - driParseOptionInfo(&instance->available_dri_options, anv_dri_options, - ARRAY_SIZE(anv_dri_options)); - driParseConfigFiles(&instance->dri_options, - &instance->available_dri_options, 0, "anv", NULL, NULL, - instance->vk.app_info.app_name, - instance->vk.app_info.app_version, - instance->vk.app_info.engine_name, - instance->vk.app_info.engine_version); + uint32_t family_count = 0; - instance->assume_full_subgroups = - driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups"); - instance->limit_trig_input_range = - driQueryOptionb(&instance->dri_options, "limit_trig_input_range"); - instance->sample_mask_out_opengl_behaviour = - driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour"); - instance->lower_depth_range_rate = - driQueryOptionf(&instance->dri_options, "lower_depth_range_rate"); - instance->no_16bit = - driQueryOptionb(&instance->dri_options, "no_16bit"); - instance->mesh_conv_prim_attrs_to_vert_attrs = - driQueryOptioni(&instance->dri_options, "anv_mesh_conv_prim_attrs_to_vert_attrs"); - instance->fp64_workaround_enabled = - driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled"); - instance->generated_indirect_threshold = - driQueryOptioni(&instance->dri_options, "generated_indirect_threshold"); - instance->query_clear_with_blorp_threshold = - driQueryOptioni(&instance->dri_options, "query_clear_with_blorp_threshold"); + if (pdevice->engine_info) { + int gc_count = + intel_engines_count(pdevice->engine_info, + INTEL_ENGINE_CLASS_RENDER); + int v_count = + intel_engines_count(pdevice->engine_info, INTEL_ENGINE_CLASS_VIDEO); + int g_count = 0; + int c_count = 0; + if (debug_get_bool_option("INTEL_COMPUTE_CLASS", false)) + c_count = intel_engines_count(pdevice->engine_info, + INTEL_ENGINE_CLASS_COMPUTE); + enum intel_engine_class compute_class = + c_count < 1 ? INTEL_ENGINE_CLASS_RENDER : INTEL_ENGINE_CLASS_COMPUTE; + + anv_override_engine_counts(&gc_count, &g_count, &c_count, &v_count); + + if (gc_count > 0) { + pdevice->queue.families[family_count++] = (struct anv_queue_family) { + .queueFlags = VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_TRANSFER_BIT, + .queueCount = gc_count, + .engine_class = INTEL_ENGINE_CLASS_RENDER, + }; + } + if (g_count > 0) { + pdevice->queue.families[family_count++] = (struct anv_queue_family) { + .queueFlags = VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_TRANSFER_BIT, + .queueCount = g_count, + .engine_class = INTEL_ENGINE_CLASS_RENDER, + }; + } + if (c_count > 0) { + pdevice->queue.families[family_count++] = (struct anv_queue_family) { + .queueFlags = VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_TRANSFER_BIT, + .queueCount = c_count, + .engine_class = compute_class, + }; + } + if (v_count > 0 && pdevice->video_decode_enabled) { + pdevice->queue.families[family_count++] = (struct anv_queue_family) { + .queueFlags = VK_QUEUE_VIDEO_DECODE_BIT_KHR, + .queueCount = v_count, + .engine_class = INTEL_ENGINE_CLASS_VIDEO, + }; + } + /* Increase count below when other families are added as a reminder to + * increase the ANV_MAX_QUEUE_FAMILIES value. + */ + STATIC_ASSERT(ANV_MAX_QUEUE_FAMILIES >= 4); + } else { + /* Default to a single render queue */ + pdevice->queue.families[family_count++] = (struct anv_queue_family) { + .queueFlags = VK_QUEUE_GRAPHICS_BIT | + VK_QUEUE_COMPUTE_BIT | + VK_QUEUE_TRANSFER_BIT, + .queueCount = 1, + .engine_class = INTEL_ENGINE_CLASS_RENDER, + }; + family_count = 1; + } + assert(family_count <= ANV_MAX_QUEUE_FAMILIES); + pdevice->queue.family_count = family_count; } -VkResult anv_CreateInstance( - const VkInstanceCreateInfo* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkInstance* pInstance) +static VkResult +anv_physical_device_get_parameters(struct anv_physical_device *device) { - struct anv_instance *instance; - VkResult result; - - assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); - - if (pAllocator == NULL) - pAllocator = vk_default_allocator(); - - instance = vk_alloc(pAllocator, sizeof(*instance), 8, - VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); - if (!instance) - return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); - - struct vk_instance_dispatch_table dispatch_table; - vk_instance_dispatch_table_from_entrypoints( - &dispatch_table, &anv_instance_entrypoints, true); - vk_instance_dispatch_table_from_entrypoints( - &dispatch_table, &wsi_instance_entrypoints, false); - - result = vk_instance_init(&instance->vk, &instance_extensions, - &dispatch_table, pCreateInfo, pAllocator); - if (result != VK_SUCCESS) { - vk_free(pAllocator, instance); - return vk_error(NULL, result); + switch (device->info.kmd_type) { + case INTEL_KMD_TYPE_I915: + return anv_i915_physical_device_get_parameters(device); + case INTEL_KMD_TYPE_XE: + return anv_xe_physical_device_get_parameters(device); + default: + unreachable("Missing"); + return VK_ERROR_UNKNOWN; } +} - instance->vk.physical_devices.try_create_for_drm = anv_physical_device_try_create; - instance->vk.physical_devices.destroy = anv_physical_device_destroy; - - VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); - - anv_init_dri_options(instance); +static VkResult +anv_physical_device_try_create(struct vk_instance *vk_instance, + struct _drmDevice *drm_device, + struct vk_physical_device **out) +{ + struct anv_instance *instance = + container_of(vk_instance, struct anv_instance, vk); - intel_driver_ds_init(); + if (!(drm_device->available_nodes & (1 << DRM_NODE_RENDER)) || + drm_device->bustype != DRM_BUS_PCI || + drm_device->deviceinfo.pci->vendor_id != 0x8086) + return VK_ERROR_INCOMPATIBLE_DRIVER; - *pInstance = anv_instance_to_handle(instance); + const char *primary_path = drm_device->nodes[DRM_NODE_PRIMARY]; + const char *path = drm_device->nodes[DRM_NODE_RENDER]; + VkResult result; + int fd; + int master_fd = -1; - return VK_SUCCESS; -} + brw_process_intel_debug_variable(); -void anv_DestroyInstance( - VkInstance _instance, - const VkAllocationCallbacks* pAllocator) -{ - ANV_FROM_HANDLE(anv_instance, instance, _instance); + fd = open(path, O_RDWR | O_CLOEXEC); + if (fd < 0) { + if (errno == ENOMEM) { + return vk_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY, + "Unable to open device %s: out of memory", path); + } + return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, + "Unable to open device %s: %m", path); + } - if (!instance) - return; + struct intel_device_info devinfo; + if (!intel_get_device_info_from_fd(fd, &devinfo)) { + result = vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER); + goto fail_fd; + } - VG(VALGRIND_DESTROY_MEMPOOL(instance)); + if (devinfo.ver > 12) { + result = vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, + "Vulkan not yet supported on %s", devinfo.name); + goto fail_fd; + } else if (devinfo.ver < 9) { + /* Silently fail here, hasvk should pick up this device. */ + result = VK_ERROR_INCOMPATIBLE_DRIVER; + goto fail_fd; + } - driDestroyOptionCache(&instance->dri_options); - driDestroyOptionInfo(&instance->available_dri_options); + struct anv_physical_device *device = + vk_zalloc(&instance->vk.alloc, sizeof(*device), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (device == NULL) { + result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_fd; + } - vk_instance_finish(&instance->vk); - vk_free(&instance->vk.alloc, instance); -} + struct vk_physical_device_dispatch_table dispatch_table; + vk_physical_device_dispatch_table_from_entrypoints( + &dispatch_table, &anv_physical_device_entrypoints, true); + vk_physical_device_dispatch_table_from_entrypoints( + &dispatch_table, &wsi_physical_device_entrypoints, false); -static void -get_features(const struct anv_physical_device *pdevice, - struct vk_features *features) -{ - struct vk_app_info *app_info = &pdevice->instance->vk.app_info; + result = vk_physical_device_init(&device->vk, &instance->vk, + NULL, NULL, /* We set up extensions later */ + &dispatch_table); + if (result != VK_SUCCESS) { + vk_error(instance, result); + goto fail_alloc; + } + device->instance = instance; - /* Just pick one; they're all the same */ - const bool has_astc_ldr = - isl_format_supports_sampling(&pdevice->info, - ISL_FORMAT_ASTC_LDR_2D_4X4_FLT16); + assert(strlen(path) < ARRAY_SIZE(device->path)); + snprintf(device->path, ARRAY_SIZE(device->path), "%s", path); - const bool rt_enabled = ANV_SUPPORT_RT && pdevice->info.has_ray_tracing; + device->info = devinfo; - const bool mesh_shader = - pdevice->vk.supported_extensions.EXT_mesh_shader || - pdevice->vk.supported_extensions.NV_mesh_shader; + device->local_fd = fd; + result = anv_physical_device_get_parameters(device); + if (result != VK_SUCCESS) + goto fail_base; - *features = (struct vk_features) { - /* Vulkan 1.0 */ - .robustBufferAccess = true, - .fullDrawIndexUint32 = true, - .imageCubeArray = true, - .independentBlend = true, - .geometryShader = true, - .tessellationShader = true, - .sampleRateShading = true, - .dualSrcBlend = true, - .logicOp = true, - .multiDrawIndirect = true, - .drawIndirectFirstInstance = true, - .depthClamp = true, - .depthBiasClamp = true, - .fillModeNonSolid = true, - .depthBounds = pdevice->info.ver >= 12, - .wideLines = true, - .largePoints = true, - .alphaToOne = true, - .multiViewport = true, - .samplerAnisotropy = true, - .textureCompressionETC2 = true, - .textureCompressionASTC_LDR = has_astc_ldr, - .textureCompressionBC = true, - .occlusionQueryPrecise = true, - .pipelineStatisticsQuery = true, - /* We can't do image stores in vec4 shaders */ - .vertexPipelineStoresAndAtomics = - pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] && - pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY], - .fragmentStoresAndAtomics = true, - .shaderTessellationAndGeometryPointSize = true, - .shaderImageGatherExtended = true, - .shaderStorageImageExtendedFormats = true, - .shaderStorageImageMultisample = false, - /* Gfx12.5 has all the required format supported in HW for typed - * read/writes - */ - .shaderStorageImageReadWithoutFormat = pdevice->info.verx10 >= 125, - .shaderStorageImageWriteWithoutFormat = true, - .shaderUniformBufferArrayDynamicIndexing = true, - .shaderSampledImageArrayDynamicIndexing = true, - .shaderStorageBufferArrayDynamicIndexing = true, - .shaderStorageImageArrayDynamicIndexing = true, - .shaderClipDistance = true, - .shaderCullDistance = true, - .shaderFloat64 = pdevice->info.has_64bit_float, - .shaderInt64 = true, - .shaderInt16 = true, - .shaderResourceMinLod = true, - .variableMultisampleRate = true, - .inheritedQueries = true, + device->gtt_size = device->info.gtt_size ? device->info.gtt_size : + device->info.aperture_bytes; - /* Vulkan 1.1 */ - .storageBuffer16BitAccess = !pdevice->instance->no_16bit, - .uniformAndStorageBuffer16BitAccess = !pdevice->instance->no_16bit, - .storagePushConstant16 = true, - .storageInputOutput16 = false, - .multiview = true, - .multiviewGeometryShader = true, - .multiviewTessellationShader = true, - .variablePointersStorageBuffer = true, - .variablePointers = true, - .protectedMemory = false, - .samplerYcbcrConversion = true, - .shaderDrawParameters = true, + /* We only allow 48-bit addresses with softpin because knowing the actual + * address is required for the vertex cache flush workaround. + */ + device->supports_48bit_addresses = + device->gtt_size > (4ULL << 30 /* GiB */); - /* Vulkan 1.2 */ - .samplerMirrorClampToEdge = true, - .drawIndirectCount = true, - .storageBuffer8BitAccess = true, - .uniformAndStorageBuffer8BitAccess = true, - .storagePushConstant8 = true, - .shaderBufferInt64Atomics = true, - .shaderSharedInt64Atomics = false, - .shaderFloat16 = !pdevice->instance->no_16bit, - .shaderInt8 = !pdevice->instance->no_16bit, + /* We currently only have the right bits for instructions in Gen12+. If the + * kernel ever starts supporting that feature on previous generations, + * we'll need to edit genxml prior to enabling here. + */ + device->has_protected_contexts = device->info.ver >= 12 && + intel_gem_supports_protected_context(fd, device->info.kmd_type); - .descriptorIndexing = true, - .shaderInputAttachmentArrayDynamicIndexing = false, - .shaderUniformTexelBufferArrayDynamicIndexing = true, - .shaderStorageTexelBufferArrayDynamicIndexing = true, - .shaderUniformBufferArrayNonUniformIndexing = true, - .shaderSampledImageArrayNonUniformIndexing = true, - .shaderStorageBufferArrayNonUniformIndexing = true, - .shaderStorageImageArrayNonUniformIndexing = true, - .shaderInputAttachmentArrayNonUniformIndexing = false, - .shaderUniformTexelBufferArrayNonUniformIndexing = true, - .shaderStorageTexelBufferArrayNonUniformIndexing = true, - .descriptorBindingUniformBufferUpdateAfterBind = true, - .descriptorBindingSampledImageUpdateAfterBind = true, - .descriptorBindingStorageImageUpdateAfterBind = true, - .descriptorBindingStorageBufferUpdateAfterBind = true, - .descriptorBindingUniformTexelBufferUpdateAfterBind = true, - .descriptorBindingStorageTexelBufferUpdateAfterBind = true, - .descriptorBindingUpdateUnusedWhilePending = true, - .descriptorBindingPartiallyBound = true, - .descriptorBindingVariableDescriptorCount = true, - .runtimeDescriptorArray = true, + result = anv_physical_device_init_heaps(device, fd); + if (result != VK_SUCCESS) + goto fail_base; - .samplerFilterMinmax = true, - .scalarBlockLayout = true, - .imagelessFramebuffer = true, - .uniformBufferStandardLayout = true, - .shaderSubgroupExtendedTypes = true, - .separateDepthStencilLayouts = true, - .hostQueryReset = true, - .timelineSemaphore = true, - .bufferDeviceAddress = true, - .bufferDeviceAddressCaptureReplay = true, - .bufferDeviceAddressMultiDevice = false, - .vulkanMemoryModel = true, - .vulkanMemoryModelDeviceScope = true, - .vulkanMemoryModelAvailabilityVisibilityChains = true, - .shaderOutputViewportIndex = true, - .shaderOutputLayer = true, - .subgroupBroadcastDynamicId = true, + if (debug_get_bool_option("ANV_QUEUE_THREAD_DISABLE", false)) + device->has_exec_timeline = false; - /* Vulkan 1.3 */ - .robustImageAccess = true, - .inlineUniformBlock = true, - .descriptorBindingInlineUniformBlockUpdateAfterBind = true, - .pipelineCreationCacheControl = true, - .privateData = true, - .shaderDemoteToHelperInvocation = true, - .shaderTerminateInvocation = true, - .subgroupSizeControl = true, - .computeFullSubgroups = true, - .synchronization2 = true, - .textureCompressionASTC_HDR = false, - .shaderZeroInitializeWorkgroupMemory = true, - .dynamicRendering = true, - .shaderIntegerDotProduct = true, - .maintenance4 = true, - /* VK_EXT_4444_formats */ - .formatA4R4G4B4 = true, - .formatA4B4G4R4 = false, + device->generated_indirect_draws = + debug_get_bool_option("ANV_ENABLE_GENERATED_INDIRECT_DRAWS", + true); - /* VK_KHR_acceleration_structure */ - .accelerationStructure = rt_enabled, - .accelerationStructureCaptureReplay = false, /* TODO */ - .accelerationStructureIndirectBuild = false, /* TODO */ - .accelerationStructureHostCommands = false, - .descriptorBindingAccelerationStructureUpdateAfterBind = rt_enabled, + /* The GPL implementation is new, and may have issues in conjunction with + * mesh shading. Enable it by default for zink for performance reasons (where + * mesh shading is unused anyway), and have an env var for testing in CI or + * by end users. + * */ + if (debug_get_bool_option("ANV_GPL", + instance->vk.app_info.engine_name != NULL && + (strcmp(instance->vk.app_info.engine_name, "mesa zink") == 0 || + strcmp(instance->vk.app_info.engine_name, "DXVK") == 0))) { + device->gpl_enabled = true; + } - /* VK_EXT_border_color_swizzle */ - .borderColorSwizzle = true, - .borderColorSwizzleFromImage = true, + unsigned st_idx = 0; - /* VK_EXT_color_write_enable */ - .colorWriteEnable = true, + device->sync_syncobj_type = vk_drm_syncobj_get_type(fd); + if (!device->has_exec_timeline) + device->sync_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE; + device->sync_types[st_idx++] = &device->sync_syncobj_type; - /* VK_EXT_image_2d_view_of_3d */ - .image2DViewOf3D = true, - .sampler2DViewOf3D = true, + /* anv_bo_sync_type is only supported with i915 for now */ + if (device->info.kmd_type == INTEL_KMD_TYPE_I915) { + if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT)) + device->sync_types[st_idx++] = &anv_bo_sync_type; - /* VK_EXT_image_sliced_view_of_3d */ - .imageSlicedViewOf3D = true, + if (!(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE)) { + device->sync_timeline_type = vk_sync_timeline_get_type(&anv_bo_sync_type); + device->sync_types[st_idx++] = &device->sync_timeline_type.sync; + } + } else { + assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_TIMELINE); + assert(device->sync_syncobj_type.features & VK_SYNC_FEATURE_CPU_WAIT); + } - /* VK_NV_compute_shader_derivatives */ - .computeDerivativeGroupQuads = true, - .computeDerivativeGroupLinear = true, + device->sync_types[st_idx++] = NULL; + assert(st_idx <= ARRAY_SIZE(device->sync_types)); + device->vk.supported_sync_types = device->sync_types; - /* VK_EXT_conditional_rendering */ - .conditionalRendering = true, - .inheritedConditionalRendering = true, + device->vk.pipeline_cache_import_ops = anv_cache_import_ops; - /* VK_EXT_custom_border_color */ - .customBorderColors = true, - .customBorderColorWithoutFormat = true, + device->always_use_bindless = + debug_get_bool_option("ANV_ALWAYS_BINDLESS", false); - /* VK_EXT_depth_clamp_zero_one */ - .depthClampZeroOne = true, + device->use_call_secondary = + !debug_get_bool_option("ANV_DISABLE_SECONDARY_CMD_BUFFER_CALLS", false); - /* VK_EXT_depth_clip_enable */ - .depthClipEnable = true, + device->has_implicit_ccs = device->info.has_aux_map || + device->info.verx10 >= 125; - /* VK_EXT_fragment_shader_interlock */ - .fragmentShaderSampleInterlock = true, - .fragmentShaderPixelInterlock = true, - .fragmentShaderShadingRateInterlock = false, + device->video_decode_enabled = debug_get_bool_option("ANV_VIDEO_DECODE", false); - /* VK_EXT_global_priority_query */ - .globalPriorityQuery = true, - .graphicsPipelineLibrary = pdevice->gpl_enabled, + /* Check if we can read the GPU timestamp register from the CPU */ + uint64_t u64_ignore; + device->has_reg_timestamp = intel_gem_read_render_timestamp(fd, + device->info.kmd_type, + &u64_ignore); - /* VK_KHR_fragment_shading_rate */ - .pipelineFragmentShadingRate = true, - .primitiveFragmentShadingRate = - pdevice->info.has_coarse_pixel_primitive_and_cb, - .attachmentFragmentShadingRate = - pdevice->info.has_coarse_pixel_primitive_and_cb, + device->always_flush_cache = INTEL_DEBUG(DEBUG_STALL) || + driQueryOptionb(&instance->dri_options, "always_flush_cache"); - /* VK_EXT_image_view_min_lod */ - .minLod = true, + device->compiler = brw_compiler_create(NULL, &device->info); + if (device->compiler == NULL) { + result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY); + goto fail_base; + } + device->compiler->shader_debug_log = compiler_debug_log; + device->compiler->shader_perf_log = compiler_perf_log; + device->compiler->constant_buffer_0_is_relative = + !device->info.has_context_isolation; + device->compiler->supports_shader_constants = true; + device->compiler->indirect_ubos_use_sampler = device->info.ver < 12; - /* VK_EXT_index_type_uint8 */ - .indexTypeUint8 = true, + isl_device_init(&device->isl_dev, &device->info); - /* VK_EXT_line_rasterization */ - /* Rectangular lines must use the strict algorithm, which is not - * supported for wide lines prior to ICL. See rasterization_mode for - * details and how the HW states are programmed. - */ - .rectangularLines = pdevice->info.ver >= 10, - .bresenhamLines = true, - /* Support for Smooth lines with MSAA was removed on gfx11. From the - * BSpec section "Multisample ModesState" table for "AA Line Support - * Requirements": - * - * GFX10:BUG:######## NUM_MULTISAMPLES == 1 - * - * Fortunately, this isn't a case most people care about. - */ - .smoothLines = pdevice->info.ver < 10, - .stippledRectangularLines = false, - .stippledBresenhamLines = true, - .stippledSmoothLines = false, + result = anv_physical_device_init_uuids(device); + if (result != VK_SUCCESS) + goto fail_compiler; - /* VK_NV_mesh_shader */ - .taskShaderNV = mesh_shader, - .meshShaderNV = mesh_shader, + anv_physical_device_init_disk_cache(device); - /* VK_EXT_mesh_shader */ - .taskShader = mesh_shader, - .meshShader = mesh_shader, - .multiviewMeshShader = false, - .primitiveFragmentShadingRateMeshShader = mesh_shader, - .meshShaderQueries = false, + if (instance->vk.enabled_extensions.KHR_display) { + master_fd = open(primary_path, O_RDWR | O_CLOEXEC); + if (master_fd >= 0) { + /* fail if we don't have permission to even render on this device */ + if (!intel_gem_can_render_on_fd(master_fd, device->info.kmd_type)) { + close(master_fd); + master_fd = -1; + } + } + } + device->master_fd = master_fd; - /* VK_EXT_mutable_descriptor_type */ - .mutableDescriptorType = true, + device->engine_info = intel_engine_get_info(fd, device->info.kmd_type); + device->info.has_compute_engine = intel_engines_count(device->engine_info, + INTEL_ENGINE_CLASS_COMPUTE); + anv_physical_device_init_queue_families(device); - /* VK_KHR_performance_query */ - .performanceCounterQueryPools = true, - /* HW only supports a single configuration at a time. */ - .performanceCounterMultipleQueryPools = false, + anv_physical_device_init_perf(device, fd); - /* VK_KHR_pipeline_executable_properties */ - .pipelineExecutableInfo = true, + get_device_extensions(device, &device->vk.supported_extensions); + get_features(device, &device->vk.supported_features); - /* VK_EXT_primitives_generated_query */ - .primitivesGeneratedQuery = true, - .primitivesGeneratedQueryWithRasterizerDiscard = false, - .primitivesGeneratedQueryWithNonZeroStreams = false, + /* Gather major/minor before WSI. */ + struct stat st; - /* VK_EXT_pipeline_library_group_handles */ - .pipelineLibraryGroupHandles = true, + if (stat(primary_path, &st) == 0) { + device->has_master = true; + device->master_major = major(st.st_rdev); + device->master_minor = minor(st.st_rdev); + } else { + device->has_master = false; + device->master_major = 0; + device->master_minor = 0; + } + + if (stat(path, &st) == 0) { + device->has_local = true; + device->local_major = major(st.st_rdev); + device->local_minor = minor(st.st_rdev); + } else { + device->has_local = false; + device->local_major = 0; + device->local_minor = 0; + } - /* VK_EXT_provoking_vertex */ - .provokingVertexLast = true, - .transformFeedbackPreservesProvokingVertex = true, + result = anv_init_wsi(device); + if (result != VK_SUCCESS) + goto fail_perf; - /* VK_KHR_ray_query */ - .rayQuery = rt_enabled, + anv_measure_device_init(device); - /* VK_KHR_ray_tracing_maintenance1 */ - .rayTracingMaintenance1 = rt_enabled, - .rayTracingPipelineTraceRaysIndirect2 = rt_enabled, + anv_genX(&device->info, init_physical_device_state)(device); - /* VK_KHR_ray_tracing_pipeline */ - .rayTracingPipeline = rt_enabled, - .rayTracingPipelineShaderGroupHandleCaptureReplay = false, - .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false, - .rayTracingPipelineTraceRaysIndirect = rt_enabled, - .rayTraversalPrimitiveCulling = rt_enabled, + *out = &device->vk; - /* VK_EXT_robustness2 */ - .robustBufferAccess2 = true, - .robustImageAccess2 = true, - .nullDescriptor = true, + return VK_SUCCESS; - /* VK_EXT_shader_atomic_float */ - .shaderBufferFloat32Atomics = true, - .shaderBufferFloat32AtomicAdd = pdevice->info.has_lsc, - .shaderBufferFloat64Atomics = - pdevice->info.has_64bit_float && pdevice->info.has_lsc, - .shaderBufferFloat64AtomicAdd = false, - .shaderSharedFloat32Atomics = true, - .shaderSharedFloat32AtomicAdd = false, - .shaderSharedFloat64Atomics = false, - .shaderSharedFloat64AtomicAdd = false, - .shaderImageFloat32Atomics = true, - .shaderImageFloat32AtomicAdd = false, - .sparseImageFloat32Atomics = false, - .sparseImageFloat32AtomicAdd = false, +fail_perf: + ralloc_free(device->perf); + free(device->engine_info); + anv_physical_device_free_disk_cache(device); +fail_compiler: + ralloc_free(device->compiler); +fail_base: + vk_physical_device_finish(&device->vk); +fail_alloc: + vk_free(&instance->vk.alloc, device); +fail_fd: + close(fd); + if (master_fd != -1) + close(master_fd); + return result; +} - /* VK_EXT_shader_atomic_float2 */ - .shaderBufferFloat16Atomics = pdevice->info.has_lsc, - .shaderBufferFloat16AtomicAdd = false, - .shaderBufferFloat16AtomicMinMax = pdevice->info.has_lsc, - .shaderBufferFloat32AtomicMinMax = true, - .shaderBufferFloat64AtomicMinMax = - pdevice->info.has_64bit_float && pdevice->info.has_lsc, - .shaderSharedFloat16Atomics = pdevice->info.has_lsc, - .shaderSharedFloat16AtomicAdd = false, - .shaderSharedFloat16AtomicMinMax = pdevice->info.has_lsc, - .shaderSharedFloat32AtomicMinMax = true, - .shaderSharedFloat64AtomicMinMax = false, - .shaderImageFloat32AtomicMinMax = false, - .sparseImageFloat32AtomicMinMax = false, +static void +anv_physical_device_destroy(struct vk_physical_device *vk_device) +{ + struct anv_physical_device *device = + container_of(vk_device, struct anv_physical_device, vk); - /* VK_KHR_shader_clock */ - .shaderSubgroupClock = true, - .shaderDeviceClock = false, + anv_finish_wsi(device); + anv_measure_device_destroy(device); + free(device->engine_info); + anv_physical_device_free_disk_cache(device); + ralloc_free(device->compiler); + ralloc_free(device->perf); + close(device->local_fd); + if (device->master_fd >= 0) + close(device->master_fd); + vk_physical_device_finish(&device->vk); + vk_free(&device->instance->vk.alloc, device); +} - /* VK_INTEL_shader_integer_functions2 */ - .shaderIntegerFunctions2 = true, +VkResult anv_EnumerateInstanceExtensionProperties( + const char* pLayerName, + uint32_t* pPropertyCount, + VkExtensionProperties* pProperties) +{ + if (pLayerName) + return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT); - /* VK_EXT_shader_module_identifier */ - .shaderModuleIdentifier = true, + return vk_enumerate_instance_extension_properties( + &instance_extensions, pPropertyCount, pProperties); +} - /* VK_KHR_shader_subgroup_uniform_control_flow */ - .shaderSubgroupUniformControlFlow = true, +static void +anv_init_dri_options(struct anv_instance *instance) +{ + driParseOptionInfo(&instance->available_dri_options, anv_dri_options, + ARRAY_SIZE(anv_dri_options)); + driParseConfigFiles(&instance->dri_options, + &instance->available_dri_options, 0, "anv", NULL, NULL, + instance->vk.app_info.app_name, + instance->vk.app_info.app_version, + instance->vk.app_info.engine_name, + instance->vk.app_info.engine_version); - /* VK_EXT_texel_buffer_alignment */ - .texelBufferAlignment = true, + instance->assume_full_subgroups = + driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups"); + instance->limit_trig_input_range = + driQueryOptionb(&instance->dri_options, "limit_trig_input_range"); + instance->sample_mask_out_opengl_behaviour = + driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour"); + instance->lower_depth_range_rate = + driQueryOptionf(&instance->dri_options, "lower_depth_range_rate"); + instance->no_16bit = + driQueryOptionb(&instance->dri_options, "no_16bit"); + instance->mesh_conv_prim_attrs_to_vert_attrs = + driQueryOptioni(&instance->dri_options, "anv_mesh_conv_prim_attrs_to_vert_attrs"); + instance->fp64_workaround_enabled = + driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled"); + instance->generated_indirect_threshold = + driQueryOptioni(&instance->dri_options, "generated_indirect_threshold"); + instance->query_clear_with_blorp_threshold = + driQueryOptioni(&instance->dri_options, "query_clear_with_blorp_threshold"); +} - /* VK_EXT_transform_feedback */ - .transformFeedback = true, - .geometryStreams = true, +VkResult anv_CreateInstance( + const VkInstanceCreateInfo* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkInstance* pInstance) +{ + struct anv_instance *instance; + VkResult result; - /* VK_EXT_vertex_attribute_divisor */ - .vertexAttributeInstanceRateDivisor = true, - .vertexAttributeInstanceRateZeroDivisor = true, + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); - /* VK_KHR_workgroup_memory_explicit_layout */ - .workgroupMemoryExplicitLayout = true, - .workgroupMemoryExplicitLayoutScalarBlockLayout = true, - .workgroupMemoryExplicitLayout8BitAccess = true, - .workgroupMemoryExplicitLayout16BitAccess = true, + if (pAllocator == NULL) + pAllocator = vk_default_allocator(); - /* VK_EXT_ycbcr_image_arrays */ - .ycbcrImageArrays = true, + instance = vk_alloc(pAllocator, sizeof(*instance), 8, + VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); + if (!instance) + return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); - /* VK_EXT_extended_dynamic_state */ - .extendedDynamicState = true, + struct vk_instance_dispatch_table dispatch_table; + vk_instance_dispatch_table_from_entrypoints( + &dispatch_table, &anv_instance_entrypoints, true); + vk_instance_dispatch_table_from_entrypoints( + &dispatch_table, &wsi_instance_entrypoints, false); - /* VK_EXT_extended_dynamic_state2 */ - .extendedDynamicState2 = true, - .extendedDynamicState2LogicOp = true, - .extendedDynamicState2PatchControlPoints = false, + result = vk_instance_init(&instance->vk, &instance_extensions, + &dispatch_table, pCreateInfo, pAllocator); + if (result != VK_SUCCESS) { + vk_free(pAllocator, instance); + return vk_error(NULL, result); + } - /* VK_EXT_extended_dynamic_state3 */ - .extendedDynamicState3PolygonMode = true, - .extendedDynamicState3TessellationDomainOrigin = true, - .extendedDynamicState3RasterizationStream = true, - .extendedDynamicState3LineStippleEnable = true, - .extendedDynamicState3LineRasterizationMode = true, - .extendedDynamicState3LogicOpEnable = true, - .extendedDynamicState3AlphaToOneEnable = true, - .extendedDynamicState3DepthClipEnable = true, - .extendedDynamicState3DepthClampEnable = true, - .extendedDynamicState3DepthClipNegativeOneToOne = true, - .extendedDynamicState3ProvokingVertexMode = true, - .extendedDynamicState3ColorBlendEnable = true, - .extendedDynamicState3ColorWriteMask = true, - .extendedDynamicState3ColorBlendEquation = true, - .extendedDynamicState3SampleLocationsEnable = true, - .extendedDynamicState3SampleMask = true, + instance->vk.physical_devices.try_create_for_drm = anv_physical_device_try_create; + instance->vk.physical_devices.destroy = anv_physical_device_destroy; - .extendedDynamicState3RasterizationSamples = false, - .extendedDynamicState3AlphaToCoverageEnable = false, - .extendedDynamicState3ConservativeRasterizationMode = false, - .extendedDynamicState3ExtraPrimitiveOverestimationSize = false, - .extendedDynamicState3ViewportWScalingEnable = false, - .extendedDynamicState3ViewportSwizzle = false, - .extendedDynamicState3ShadingRateImageEnable = false, - .extendedDynamicState3CoverageToColorEnable = false, - .extendedDynamicState3CoverageToColorLocation = false, - .extendedDynamicState3CoverageModulationMode = false, - .extendedDynamicState3CoverageModulationTableEnable = false, - .extendedDynamicState3CoverageModulationTable = false, - .extendedDynamicState3CoverageReductionMode = false, - .extendedDynamicState3RepresentativeFragmentTestEnable = false, - .extendedDynamicState3ColorBlendAdvanced = false, + VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); - /* VK_EXT_multi_draw */ - .multiDraw = true, + anv_init_dri_options(instance); - /* VK_EXT_non_seamless_cube_map */ - .nonSeamlessCubeMap = true, + intel_driver_ds_init(); - /* VK_EXT_primitive_topology_list_restart */ - .primitiveTopologyListRestart = true, - .primitiveTopologyPatchListRestart = true, + *pInstance = anv_instance_to_handle(instance); - /* VK_EXT_depth_clip_control */ - .depthClipControl = true, + return VK_SUCCESS; +} - /* VK_KHR_present_id */ - .presentId = pdevice->vk.supported_extensions.KHR_present_id, +void anv_DestroyInstance( + VkInstance _instance, + const VkAllocationCallbacks* pAllocator) +{ + ANV_FROM_HANDLE(anv_instance, instance, _instance); - /* VK_KHR_present_wait */ - .presentWait = pdevice->vk.supported_extensions.KHR_present_wait, + if (!instance) + return; - /* VK_EXT_vertex_input_dynamic_state */ - .vertexInputDynamicState = true, - }; + VG(VALGRIND_DESTROY_MEMPOOL(instance)); - /* The new DOOM and Wolfenstein games require depthBounds without - * checking for it. They seem to run fine without it so just claim it's - * there and accept the consequences. - */ - if (app_info->engine_name && strcmp(app_info->engine_name, "idTech") == 0) - features->depthBounds = true; + driDestroyOptionCache(&instance->dri_options); + driDestroyOptionInfo(&instance->available_dri_options); + + vk_instance_finish(&instance->vk); + vk_free(&instance->vk.alloc, instance); } #define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64 -- 2.7.4