From 2ca13abccebfaafe567f78ad40a04d1129e63942 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Thu, 9 Dec 2021 15:25:20 -0800 Subject: [PATCH] intel/fs: Use HF as destination type for F32TOF16 in fquantize2f16 Having an integer destination type instead of a float destination type confuses the SWSB code. This causes problems on some Intel GPUs. Fix this by using the correct type in the destination of the F32TOF16 opcode. Gfx7 doesn't have the HF type, so continue to emit W on that platform. The assertions in brw_F32TO16 (brw_eu_emit.c) are updated to reflect this. In scalar mode, UD is never emitted as a destination type for this opcode, so remove it from the allowed types in the assertion. I also condidered doing something like de55fd358fa ("intel/fs/xehp: Teach SWSB pass about the exec pipeline of FS_OPCODE_PACK_HALF_2x16_SPLIT."), but Curro recommended that just using the correct types is a better fix. I agree. v2: Add missing changes to fs_generator::generate_pack_half_2x16_split. I'm not sure how I (and the Intel CI) missed that the first time. :( v3: Fix copy-and-paste issue in the v2 fix. Noticed by Tapani. Reviewed-by: Francisco Jerez [v1] Part-of: --- src/intel/compiler/brw_eu_emit.c | 10 ++++++---- src/intel/compiler/brw_fs_generator.cpp | 4 +++- src/intel/compiler/brw_fs_nir.cpp | 3 ++- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c index db379a1..1cb0136 100644 --- a/src/intel/compiler/brw_eu_emit.c +++ b/src/intel/compiler/brw_eu_emit.c @@ -1262,10 +1262,12 @@ brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src) if (align16) { assert(dst.type == BRW_REGISTER_TYPE_UD); } else { - assert(dst.type == BRW_REGISTER_TYPE_UD || - dst.type == BRW_REGISTER_TYPE_W || - dst.type == BRW_REGISTER_TYPE_UW || - dst.type == BRW_REGISTER_TYPE_HF); + if (devinfo->ver <= 7) { + assert(dst.type == BRW_REGISTER_TYPE_W || + dst.type == BRW_REGISTER_TYPE_UW); + } else { + assert(dst.type == BRW_REGISTER_TYPE_HF); + } } brw_push_insn_state(p); diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index b97aed2..926568a 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1816,7 +1816,9 @@ fs_generator::generate_pack_half_2x16_split(fs_inst *, * (HorzStride) of 2. The 16-bit result is stored in the lower word of * each destination channel and the upper word is not modified. */ - struct brw_reg dst_w = spread(retype(dst, BRW_REGISTER_TYPE_W), 2); + const enum brw_reg_type t = devinfo->ver > 7 + ? BRW_REGISTER_TYPE_HF : BRW_REGISTER_TYPE_W; + struct brw_reg dst_w = spread(retype(dst, t), 2); /* Give each 32-bit channel of dst the form below, where "." means * unchanged. diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index f8e2b77..68e5a89 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -1656,7 +1656,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr, fs_reg zero = bld.vgrf(BRW_REGISTER_TYPE_F); /* The destination stride must be at least as big as the source stride. */ - tmp16.type = BRW_REGISTER_TYPE_W; + tmp16.type = devinfo->ver > 7 + ? BRW_REGISTER_TYPE_HF : BRW_REGISTER_TYPE_W; tmp16.stride = 2; /* Check for denormal */ -- 2.7.4