From fcb72ffd0c61e2b3226306fae37b85ab4982a39e Mon Sep 17 00:00:00 2001 From: Jordan Justen Date: Tue, 18 Apr 2023 20:11:41 -0400 Subject: [PATCH] intel/compiler/gfx12.5+: Lower 64-bit cluster_broadcast with 32-bit ops MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit For MTL (verx10 == 125), float64 is supported, but int64 is not. Therefore we need to lower cluster broadcast using 32-bit int ops. For gfx12.5+ platforms that support int64, the register regions used by cluster broadcast aren't supported by the 64-bit pipeline. On MTL, dEQP-VK.subgroups.clustered.*_double* and dEQP-VK.subgroups.clustered.*_dvec* were failing to validate the compiled shader in debug mode, and reportedly gpu-hanging in release mode. With this change dEQP-VK.subgroups.clustered.*_double* passed all 48 tests and dEQP-VK.subgroups.clustered.*_dvec* passed all 140 tests on MTL. Rework: * Move from generator to brw_fs_lower_regioning.cpp. (Suggested by Francisco) * Apply to verx10 >= 125.. (Suggested by Francisco) Cc: 23.1 Signed-off-by: Jordan Justen Reviewed-by: Marcin Ślusarz (v1) Reviewed-by: Francisco Jerez Part-of: --- src/intel/compiler/brw_fs_lower_regioning.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/intel/compiler/brw_fs_lower_regioning.cpp b/src/intel/compiler/brw_fs_lower_regioning.cpp index a30d183..a86e092 100644 --- a/src/intel/compiler/brw_fs_lower_regioning.cpp +++ b/src/intel/compiler/brw_fs_lower_regioning.cpp @@ -174,10 +174,17 @@ namespace { * integer DWord multiply, indirect addressing must not be * used." * + * For MTL (verx10 == 125), float64 is supported, but int64 is not. + * Therefore we need to lower cluster broadcast using 32-bit int ops. + * + * For gfx12.5+ platforms that support int64, the register regions + * used by cluster broadcast aren't supported by the 64-bit pipeline. + * * Work around the above and handle platforms that don't * support 64-bit types at all. */ - if ((!has_64bit || devinfo->platform == INTEL_PLATFORM_CHV || + if ((!has_64bit || devinfo->verx10 >= 125 || + devinfo->platform == INTEL_PLATFORM_CHV || intel_device_info_is_9lp(devinfo)) && type_sz(t) > 4) return BRW_REGISTER_TYPE_UD; else -- 2.7.4