radv: Enable NV_mesh_shader with a perftest flag.
authorTimur Kristóf <timur.kristof@gmail.com>
Thu, 28 Oct 2021 19:25:48 +0000 (21:25 +0200)
committerMarge Bot <emma+marge@anholt.net>
Fri, 31 Dec 2021 13:05:09 +0000 (13:05 +0000)
We don't plan to support NV_mesh_shader officially on RADV,
because it performs poorly on AMD hardware. However, we are
implementing this extension to get some experience with mesh
shader technology.

Users should not rely on this support because we are going
to remove it if/when a potential cross-vendor extension appears.

Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Acked-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13580>

docs/envvars.rst
src/amd/vulkan/radv_debug.h
src/amd/vulkan/radv_device.c

index a79a785..268dad2 100644 (file)
@@ -686,6 +686,8 @@ RADV driver environment variables
       enable local BOs
    ``nosam``
       disable optimizations that get enabled when all VRAM is CPU visible.
+   ``nv_ms``
+      enable unofficial experimental support for NV_mesh_shader.
    ``pswave32``
       enable wave32 for pixel shaders (GFX10+)
    ``nggc``
index 5c0dd14..491efc4 100644 (file)
@@ -78,6 +78,7 @@ enum {
    RADV_PERFTEST_RT = 1u << 8,
    RADV_PERFTEST_NGGC = 1u << 9,
    RADV_PERFTEST_FORCE_EMULATE_RT = 1u << 10,
+   RADV_PERFTEST_NV_MS = 1u << 11,
 };
 
 bool radv_init_trace(struct radv_device *device);
index 1537f11..0e1a56b 100644 (file)
@@ -568,6 +568,8 @@ radv_physical_device_get_supported_extensions(const struct radv_physical_device
       .GOOGLE_hlsl_functionality1 = true,
       .GOOGLE_user_type = true,
       .NV_compute_shader_derivatives = true,
+      .NV_mesh_shader = device->use_ngg && device->rad_info.chip_class >= GFX10_3 &&
+                        device->instance->perftest_flags & RADV_PERFTEST_NV_MS && !device->use_llvm,
       .VALVE_mutable_descriptor_type = true,
    };
 }
@@ -896,6 +898,7 @@ static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_P
                                                              {"rt", RADV_PERFTEST_RT},
                                                              {"nggc", RADV_PERFTEST_NGGC},
                                                              {"force_emulate_rt", RADV_PERFTEST_FORCE_EMULATE_RT},
+                                                             {"nv_ms", RADV_PERFTEST_NV_MS},
                                                              {NULL, 0}};
 
 const char *
@@ -1674,6 +1677,13 @@ radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
          features->dynamicRendering = true;
          break;
       }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_NV: {
+         VkPhysicalDeviceMeshShaderFeaturesNV *features =
+            (VkPhysicalDeviceMeshShaderFeaturesNV *)ext;
+         features->meshShader = true;
+         features->taskShader = false; /* TODO */
+         break;
+      }
       default:
          break;
       }
@@ -2323,6 +2333,37 @@ radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
          properties->maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
          break;
       }
+      case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_PROPERTIES_NV: {
+         VkPhysicalDeviceMeshShaderPropertiesNV *properties =
+            (VkPhysicalDeviceMeshShaderPropertiesNV *)ext;
+
+         /* Task shader limitations:
+          * Same as compute, because TS are compiled to CS.
+          */
+         properties->maxDrawMeshTasksCount = 65535;
+         properties->maxTaskTotalMemorySize = 65536;
+         properties->maxTaskWorkGroupInvocations = 1024;
+         properties->maxTaskWorkGroupSize[0] = 1024;
+         properties->maxTaskWorkGroupSize[1] = 1024;
+         properties->maxTaskWorkGroupSize[2] = 1024;
+         properties->maxTaskOutputCount = 1024;
+
+         /* Mesh shader limitations:
+          * Same as NGG, because MS are compiled to NGG.
+          */
+         properties->maxMeshMultiviewViewCount = MAX_VIEWS;
+         properties->maxMeshOutputPrimitives = 256;
+         properties->maxMeshOutputVertices = 256;
+         properties->maxMeshTotalMemorySize = 31 * 1024; /* Reserve 1K for prim indices, etc. */
+         properties->maxMeshWorkGroupInvocations = 256;
+         properties->maxMeshWorkGroupSize[0] = 256;
+         properties->maxMeshWorkGroupSize[1] = 256;
+         properties->maxMeshWorkGroupSize[2] = 256;
+         properties->meshOutputPerPrimitiveGranularity = 1;
+         properties->meshOutputPerVertexGranularity = 1;
+
+         break;
+      }
       default:
          break;
       }