anv+hasvk: Use driconf to disable 16-bit for zink.
authorEmma Anholt <emma@anholt.net>
Tue, 7 Mar 2023 18:44:01 +0000 (10:44 -0800)
committerMarge Bot <emma+marge@anholt.net>
Thu, 9 Mar 2023 02:27:01 +0000 (02:27 +0000)
The HW can technically execute 16-bit operations, but the restrictions on
16-bit ALU ops are so great that it ends up not being a win for
GLES-on-Vulkan to lower mediump to 16-bit operations, at least with the
current state of the Intel compiler.  This brings zink-on-anv in line with
iris and angle-on-anv for mediump behavior (ANGLE uses RelaxedPrecision,
which we ignore).

Perf on some angle traces on my brya (ADL) and i9-9900K (CFL):

ADL zink pubg_mobile_battle_royale:  +13.4574% +/- 5.2046% (n=5)
CFL zink pubg_mobile_battle_royale:  +29.5332% +/- 0.646585% (n=6)
ADL zink aztec_ruins_high:           +5.78027% +/- 4.80645% (n=4)
CFL zink aztec_ruins_high:           -1.10641% +/- 0.140562% (n=12)
ADL zink trex_200:                   +5.86956% +/- 2.09633% (n=10)
CFL zink trex_200:                   +9.72136% +/- 0.749261% (n=10)

Reviewed-by: Ivan Briano <ivan.briano@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21775>

src/intel/vulkan/anv_device.c
src/intel/vulkan/anv_private.h
src/intel/vulkan_hasvk/anv_device.c
src/intel/vulkan_hasvk/anv_private.h
src/util/00-mesa-defaults.conf
src/util/driconf.h

index cbd51ab..9db9343 100644 (file)
@@ -74,6 +74,7 @@ static const driOptionDescription anv_dri_options[] = {
       DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
       DRI_CONF_ANV_FP64_WORKAROUND_ENABLED(false)
       DRI_CONF_ANV_GENERATED_INDIRECT_THRESHOLD(4)
+      DRI_CONF_NO_16BIT(false)
    DRI_CONF_SECTION_END
 
    DRI_CONF_SECTION_DEBUG
@@ -193,7 +194,7 @@ get_device_extensions(const struct anv_physical_device *device,
 
    *ext = (struct vk_device_extension_table) {
       .KHR_8bit_storage                      = true,
-      .KHR_16bit_storage                     = true,
+      .KHR_16bit_storage                     = !device->instance->no_16bit,
       .KHR_acceleration_structure            = rt_enabled,
       .KHR_bind_memory2                      = true,
       .KHR_buffer_device_address             = true,
@@ -255,7 +256,7 @@ get_device_extensions(const struct anv_physical_device *device,
       .KHR_shader_atomic_int64               = true,
       .KHR_shader_clock                      = true,
       .KHR_shader_draw_parameters            = true,
-      .KHR_shader_float16_int8               = true,
+      .KHR_shader_float16_int8               = !device->instance->no_16bit,
       .KHR_shader_float_controls             = true,
       .KHR_shader_integer_dot_product        = true,
       .KHR_shader_non_semantic_info          = true,
@@ -1087,6 +1088,9 @@ anv_init_dri_options(struct anv_instance *instance)
             driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
     instance->lower_depth_range_rate =
             driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
+    instance->no_16bit =
+            driQueryOptionb(&instance->dri_options, "no_16bit");
+
     instance->fp64_workaround_enabled =
             driQueryOptionb(&instance->dri_options, "fp64_workaround_enabled");
     instance->generated_indirect_threshold =
@@ -1235,8 +1239,8 @@ anv_get_physical_device_features_1_1(struct anv_physical_device *pdevice,
 {
    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
 
-   f->storageBuffer16BitAccess            = true;
-   f->uniformAndStorageBuffer16BitAccess  = true;
+   f->storageBuffer16BitAccess            = !pdevice->instance->no_16bit;
+   f->uniformAndStorageBuffer16BitAccess  = !pdevice->instance->no_16bit;
    f->storagePushConstant16               = true;
    f->storageInputOutput16                = false;
    f->multiview                           = true;
@@ -1262,8 +1266,8 @@ anv_get_physical_device_features_1_2(struct anv_physical_device *pdevice,
    f->storagePushConstant8                = true;
    f->shaderBufferInt64Atomics            = true;
    f->shaderSharedInt64Atomics            = false;
-   f->shaderFloat16                       = true;
-   f->shaderInt8                          = true;
+   f->shaderFloat16                       = !pdevice->instance->no_16bit;
+   f->shaderInt8                          = !pdevice->instance->no_16bit;
 
    f->descriptorIndexing                                 = true;
    f->shaderInputAttachmentArrayDynamicIndexing          = false;
index 970dfbd..71ba1de 100644 (file)
@@ -1045,6 +1045,9 @@ struct anv_instance {
     bool                                        fp64_workaround_enabled;
     float                                       lower_depth_range_rate;
     unsigned                                    generated_indirect_threshold;
+
+    /* HW workarounds */
+    bool                                        no_16bit;
 };
 
 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
index fb47b2c..5775a0d 100644 (file)
@@ -69,6 +69,7 @@ static const driOptionDescription anv_dri_options[] = {
       DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
       DRI_CONF_ANV_ASSUME_FULL_SUBGROUPS(false)
       DRI_CONF_ANV_SAMPLE_MASK_OUT_OPENGL_BEHAVIOUR(false)
+      DRI_CONF_NO_16BIT(false)
    DRI_CONF_SECTION_END
 
    DRI_CONF_SECTION_DEBUG
@@ -191,7 +192,7 @@ get_device_extensions(const struct anv_physical_device *device,
 
    *ext = (struct vk_device_extension_table) {
       .KHR_8bit_storage                      = device->info.ver >= 8,
-      .KHR_16bit_storage                     = device->info.ver >= 8,
+      .KHR_16bit_storage                     = device->info.ver >= 8 && !device->instance->no_16bit,
       .KHR_bind_memory2                      = true,
       .KHR_buffer_device_address             = device->has_a64_buffer_access,
       .KHR_copy_commands2                    = true,
@@ -235,7 +236,7 @@ get_device_extensions(const struct anv_physical_device *device,
       .KHR_separate_depth_stencil_layouts    = true,
       .KHR_shader_clock                      = true,
       .KHR_shader_draw_parameters            = true,
-      .KHR_shader_float16_int8               = device->info.ver >= 8,
+      .KHR_shader_float16_int8               = device->info.ver >= 8 && !device->instance->no_16bit,
       .KHR_shader_float_controls             = true,
       .KHR_shader_integer_dot_product        = true,
       .KHR_shader_non_semantic_info          = true,
@@ -1016,6 +1017,8 @@ anv_init_dri_options(struct anv_instance *instance)
             driQueryOptionb(&instance->dri_options, "anv_sample_mask_out_opengl_behaviour");
     instance->lower_depth_range_rate =
             driQueryOptionf(&instance->dri_options, "lower_depth_range_rate");
+    instance->no_16bit =
+            driQueryOptionb(&instance->dri_options, "no_16bit");
 }
 
 VkResult anv_CreateInstance(
@@ -1162,8 +1165,8 @@ anv_get_physical_device_features_1_1(struct anv_physical_device *pdevice,
 {
    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
 
-   f->storageBuffer16BitAccess            = pdevice->info.ver >= 8;
-   f->uniformAndStorageBuffer16BitAccess  = pdevice->info.ver >= 8;
+   f->storageBuffer16BitAccess            = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit;
+   f->uniformAndStorageBuffer16BitAccess  = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit;
    f->storagePushConstant16               = pdevice->info.ver >= 8;
    f->storageInputOutput16                = false;
    f->multiview                           = true;
@@ -1189,8 +1192,8 @@ anv_get_physical_device_features_1_2(struct anv_physical_device *pdevice,
    f->storagePushConstant8                = pdevice->info.ver >= 8;
    f->shaderBufferInt64Atomics            = false;
    f->shaderSharedInt64Atomics            = false;
-   f->shaderFloat16                       = pdevice->info.ver >= 8;
-   f->shaderInt8                          = pdevice->info.ver >= 8;
+   f->shaderFloat16                       = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit;
+   f->shaderInt8                          = pdevice->info.ver >= 8 && !pdevice->instance->no_16bit;
 
    f->descriptorIndexing                                 = false;
    f->shaderInputAttachmentArrayDynamicIndexing          = false;
index a7bc815..73c0c86 100644 (file)
@@ -955,6 +955,9 @@ struct anv_instance {
     bool                                        limit_trig_input_range;
     bool                                        sample_mask_out_opengl_behaviour;
     float                                       lower_depth_range_rate;
+
+    /* HW workarounds */
+    bool                                        no_16bit;
 };
 
 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
index 7c94d47..386ffa6 100644 (file)
@@ -1012,6 +1012,15 @@ TODO: document the other workarounds.
         <application name="Rise of the Tomb Raider" executable="ROTTR.exe">
             <option name="limit_trig_input_range" value="true" />
         </application>
+        <!--
+        Disable 16-bit feature on zink and angle so that GLES mediump doesn't
+        lower to our inefficent 16-bit shader support.  No need to do so for
+        ANGLE, since it uses RelaxedPrecision decorations, which the intel
+        compiler ignores.
+        -->
+        <engine engine_name_match="mesa zink">
+            <option name="no_16bit" value="true" />
+        </engine>
     </device>
     <device driver="dzn">
         <application name="No Man's Sky" executable="NMS.exe">
index 00f9512..eb5393f 100644 (file)
    DRI_CONF_OPT_B(limit_trig_input_range, def, \
                   "Limit trig input range to [-2p : 2p] to improve sin/cos calculation precision on Intel")
 
+#define DRI_CONF_NO_16BIT(def) \
+   DRI_CONF_OPT_B(no_16bit, def, \
+                  "Disable 16-bit instructions")
+
 /**
  * \brief Image quality-related options
  */