From 8b75b726135dcc8fc604e32a4e4be47caf374d61 Mon Sep 17 00:00:00 2001 From: Emma Anholt Date: Tue, 7 Mar 2023 10:44:01 -0800 Subject: [PATCH] anv+hasvk: Use driconf to disable 16-bit for zink. The HW can technically execute 16-bit operations, but the restrictions on 16-bit ALU ops are so great that it ends up not being a win for GLES-on-Vulkan to lower mediump to 16-bit operations, at least with the current state of the Intel compiler. This brings zink-on-anv in line with iris and angle-on-anv for mediump behavior (ANGLE uses RelaxedPrecision, which we ignore). Perf on some angle traces on my brya (ADL) and i9-9900K (CFL): ADL zink pubg_mobile_battle_royale: +13.4574% +/- 5.2046% (n=5) CFL zink pubg_mobile_battle_royale: +29.5332% +/- 0.646585% (n=6) ADL zink aztec_ruins_high: +5.78027% +/- 4.80645% (n=4) CFL zink aztec_ruins_high: -1.10641% +/- 0.140562% (n=12) ADL zink trex_200: +5.86956% +/- 2.09633% (n=10) CFL zink trex_200: +9.72136% +/- 0.749261% (n=10) Reviewed-by: Ivan Briano Part-of: --- src/intel/vulkan/anv_device.c | 16 ++++++++++------ src/intel/vulkan/anv_private.h | 3 +++ src/intel/vulkan_hasvk/anv_device.c | 15 +++++++++------ src/intel/vulkan_hasvk/anv_private.h | 3 +++ src/util/00-mesa-defaults.conf | 9 +++++++++ src/util/driconf.h | 4 ++++ 6 files changed, 38 insertions(+), 12 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index cbd51ab..9db9343 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -74,6 +74,7 @@ static const driOptionDescription anv_dri_options[] = { DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false) DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false) DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(4) + DRI_CONF_NO_16BIT(false) DRI_CONF_SECTION_END DRI_CONF_SECTION_DEBUG @@ -193,7 +194,7 @@ get_device_extensions(const struct anv_physical_device *device, *ext = (struct vk_device_extension_table) { .KHR_8bit_storage = true, - .KHR_16bit_storage = true, + .KHR_16bit_storage = !device->instance->no_16bit, .KHR_acceleration_structure = rt_enabled, .KHR_bind_memory2 = true, .KHR_buffer_device_address = true, @@ -255,7 +256,7 @@ get_device_extensions(const struct anv_physical_device *device, .KHR_shader_atomic_int64 = true, .KHR_shader_clock = true, .KHR_shader_draw_parameters = true, - .KHR_shader_float16_int8 = true, + .KHR_shader_float16_int8 = !device->instance->no_16bit, .KHR_shader_float_controls = true, .KHR_shader_integer_dot_product = true, .KHR_shader_non_semantic_info = true, @@ -1087,6 +1088,9 @@ anv_init_dri_options(struct anv_instance *instance) driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour"); instance->lower_depth_range_rate = driQueryOptionf(&instance->dri_options, "lower_depth_range_rate"); + instance->no_16bit = + driQueryOptionb(&instance->dri_options, "no_16bit"); + instance->fp64_workaround_enabled = driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled"); instance->generated_indirect_threshold = @@ -1235,8 +1239,8 @@ anv_get_physical_device_features_1_1(struct anv_physical_device *pdevice, { assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES); - f->storageBuffer16BitAccess = true; - f->uniformAndStorageBuffer16BitAccess = true; + f->storageBuffer16BitAccess = !pdevice->instance->no_16bit; + f->uniformAndStorageBuffer16BitAccess = !pdevice->instance->no_16bit; f->storagePushConstant16 = true; f->storageInputOutput16 = false; f->multiview = true; @@ -1262,8 +1266,8 @@ anv_get_physical_device_features_1_2(struct anv_physical_device *pdevice, f->storagePushConstant8 = true; f->shaderBufferInt64Atomics = true; f->shaderSharedInt64Atomics = false; - f->shaderFloat16 = true; - f->shaderInt8 = true; + f->shaderFloat16 = !pdevice->instance->no_16bit; + f->shaderInt8 = !pdevice->instance->no_16bit; f->descriptorIndexing = true; f->shaderInputAttachmentArrayDynamicIndexing = false; diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index 970dfbd..71ba1de 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -1045,6 +1045,9 @@ struct anv_instance { bool fp64_workaround_enabled; float lower_depth_range_rate; unsigned generated_indirect_threshold; + + /* HW workarounds */ + bool no_16bit; }; VkResult anv_init_wsi(struct anv_physical_device *physical_device); diff --git a/src/intel/vulkan_hasvk/anv_device.c b/src/intel/vulkan_hasvk/anv_device.c index fb47b2c..5775a0d 100644 --- a/src/intel/vulkan_hasvk/anv_device.c +++ b/src/intel/vulkan_hasvk/anv_device.c @@ -69,6 +69,7 @@ static const driOptionDescription anv_dri_options[] = { DRI_CONF_VK_XWAYLAND_WAIT_READY(true) DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false) DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false) + DRI_CONF_NO_16BIT(false) DRI_CONF_SECTION_END DRI_CONF_SECTION_DEBUG @@ -191,7 +192,7 @@ get_device_extensions(const struct anv_physical_device *device, *ext = (struct vk_device_extension_table) { .KHR_8bit_storage = device->info.ver >= 8, - .KHR_16bit_storage = device->info.ver >= 8, + .KHR_16bit_storage = device->info.ver >= 8 && !device->instance->no_16bit, .KHR_bind_memory2 = true, .KHR_buffer_device_address = device->has_a64_buffer_access, .KHR_copy_commands2 = true, @@ -235,7 +236,7 @@ get_device_extensions(const struct anv_physical_device *device, .KHR_separate_depth_stencil_layouts = true, .KHR_shader_clock = true, .KHR_shader_draw_parameters = true, - .KHR_shader_float16_int8 = device->info.ver >= 8, + .KHR_shader_float16_int8 = device->info.ver >= 8 && !device->instance->no_16bit, .KHR_shader_float_controls = true, .KHR_shader_integer_dot_product = true, .KHR_shader_non_semantic_info = true, @@ -1016,6 +1017,8 @@ anv_init_dri_options(struct anv_instance *instance) driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour"); instance->lower_depth_range_rate = driQueryOptionf(&instance->dri_options, "lower_depth_range_rate"); + instance->no_16bit = + driQueryOptionb(&instance->dri_options, "no_16bit"); } VkResult anv_CreateInstance( @@ -1162,8 +1165,8 @@ anv_get_physical_device_features_1_1(struct anv_physical_device *pdevice, { assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES); - f->storageBuffer16BitAccess = pdevice->info.ver >= 8; - f->uniformAndStorageBuffer16BitAccess = pdevice->info.ver >= 8; + f->storageBuffer16BitAccess = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit; + f->uniformAndStorageBuffer16BitAccess = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit; f->storagePushConstant16 = pdevice->info.ver >= 8; f->storageInputOutput16 = false; f->multiview = true; @@ -1189,8 +1192,8 @@ anv_get_physical_device_features_1_2(struct anv_physical_device *pdevice, f->storagePushConstant8 = pdevice->info.ver >= 8; f->shaderBufferInt64Atomics = false; f->shaderSharedInt64Atomics = false; - f->shaderFloat16 = pdevice->info.ver >= 8; - f->shaderInt8 = pdevice->info.ver >= 8; + f->shaderFloat16 = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit; + f->shaderInt8 = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit; f->descriptorIndexing = false; f->shaderInputAttachmentArrayDynamicIndexing = false; diff --git a/src/intel/vulkan_hasvk/anv_private.h b/src/intel/vulkan_hasvk/anv_private.h index a7bc815..73c0c86 100644 --- a/src/intel/vulkan_hasvk/anv_private.h +++ b/src/intel/vulkan_hasvk/anv_private.h @@ -955,6 +955,9 @@ struct anv_instance { bool limit_trig_input_range; bool sample_mask_out_opengl_behaviour; float lower_depth_range_rate; + + /* HW workarounds */ + bool no_16bit; }; VkResult anv_init_wsi(struct anv_physical_device *physical_device); diff --git a/src/util/00-mesa-defaults.conf b/src/util/00-mesa-defaults.conf index 7c94d47..386ffa6 100644 --- a/src/util/00-mesa-defaults.conf +++ b/src/util/00-mesa-defaults.conf @@ -1012,6 +1012,15 @@ TODO: document the other workarounds. + + + diff --git a/src/util/driconf.h b/src/util/driconf.h index 00f9512..eb5393f 100644 --- a/src/util/driconf.h +++ b/src/util/driconf.h @@ -302,6 +302,10 @@ DRI_CONF_OPT_B(limit_trig_input_range, def, \ "Limit trig input range to [-2p : 2p] to improve sin/cos calculation precision on Intel") +#define DRI_CONF_NO_16BIT(def) \ + DRI_CONF_OPT_B(no_16bit, def, \ + "Disable 16-bit instructions") + /** * \brief Image quality-related options */ -- 2.7.4