From 19330eeb1d65c631ce11a0be1cb13437c6c28491 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Wed, 16 Mar 2022 12:56:54 -0700 Subject: [PATCH] intel/fs: Force destination types on DP4A instructions Most of the time, this doesn't matter. On the versions with _sat, if the destination type is incorrect, the clamping will not happen correctly. Fixes the following CTS tests: dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_packed_ss_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_packed_su_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_packed_us_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_packed_uu_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_ss_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_su_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_us_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_uu_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_packed_ss_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_packed_su_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_packed_us_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_packed_uu_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_ss_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_su_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_us_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_uu_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_packed_ss_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_packed_su_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_packed_us_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_packed_uu_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_ss_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_su_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_us_v4i8_out32 dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_uu_v4i8_out32 v2: Update anv-tgl-fails.txt. Reviewed-by: Ivan Briano Fixes: 0f809dbf404 ("intel/compiler: Basic support for DP4A instruction") Part-of: --- src/intel/ci/anv-tgl-fails.txt | 13 ------------- src/intel/compiler/brw_fs_nir.cpp | 6 +++--- 2 files changed, 3 insertions(+), 16 deletions(-) diff --git a/src/intel/ci/anv-tgl-fails.txt b/src/intel/ci/anv-tgl-fails.txt index 1cfb087..b6d4df1 100644 --- a/src/intel/ci/anv-tgl-fails.txt +++ b/src/intel/ci/anv-tgl-fails.txt @@ -1036,19 +1036,6 @@ dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_8_32.samples_8.d32_sfloat_s8_ dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_8_32.samples_8.d32_sfloat_s8_uint_separate_layouts.depth_zero_stencil_zero_testing_stencil_samplemask,Crash dEQP-VK.renderpass2.depth_stencil_resolve.image_2d_8_32.samples_8.d32_sfloat_s8_uint_separate_layouts.depth_zero_stencil_zero_unused_resolve_testing_stencil_samplemask,Crash -dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_ss_v4i8_out32,Fail -dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_su_v4i8_out32,Fail -dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.all_us_v4i8_out32,Fail -dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_packed_su_v4i8_out32,Fail -dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_packed_us_v4i8_out32,Fail -dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_packed_uu_v4i8_out32,Fail -dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_us_v4i8_out32,Fail -dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.limits_uu_v4i8_out32,Fail -dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_packed_ss_v4i8_out32,Fail -dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_packed_su_v4i8_out32,Fail -dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_ss_v4i8_out32,Fail -dEQP-VK.spirv_assembly.instruction.compute.opudotaccsatkhr.small_su_v4i8_out32,Fail - # https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/3505 dEQP-VK.subgroups.multiple_dispatches.uniform_subgroup_size,Fail diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index 011e0eb..cf5126e 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1889,7 +1889,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, case nir_op_sdot_4x8_iadd: case nir_op_sdot_4x8_iadd_sat: - inst = bld.DP4A(result, + inst = bld.DP4A(retype(result, BRW_REGISTER_TYPE_D), retype(op[2], BRW_REGISTER_TYPE_D), retype(op[0], BRW_REGISTER_TYPE_D), retype(op[1], BRW_REGISTER_TYPE_D)); @@ -1900,7 +1900,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, case nir_op_udot_4x8_uadd: case nir_op_udot_4x8_uadd_sat: - inst = bld.DP4A(result, + inst = bld.DP4A(retype(result, BRW_REGISTER_TYPE_UD), retype(op[2], BRW_REGISTER_TYPE_UD), retype(op[0], BRW_REGISTER_TYPE_UD), retype(op[1], BRW_REGISTER_TYPE_UD)); @@ -1911,7 +1911,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, case nir_op_sudot_4x8_iadd: case nir_op_sudot_4x8_iadd_sat: - inst = bld.DP4A(result, + inst = bld.DP4A(retype(result, BRW_REGISTER_TYPE_D), retype(op[2], BRW_REGISTER_TYPE_D), retype(op[0], BRW_REGISTER_TYPE_D), retype(op[1], BRW_REGISTER_TYPE_UD)); -- 2.7.4