aco/isel: Fix large inputs being truncated in int32->f16 conversions

author Tony Wasserka <tony.wasserka@gmx.de>

Tue, 16 Mar 2021 09:52:39 +0000 (10:52 +0100)

committer Marge Bot <eric+marge@anholt.net>

Fri, 26 Mar 2021 14:39:23 +0000 (14:39 +0000)
author Tony Wasserka <tony.wasserka@gmx.de>
Tue, 16 Mar 2021 09:52:39 +0000 (10:52 +0100)
committer Marge Bot <eric+marge@anholt.net>
Fri, 26 Mar 2021 14:39:23 +0000 (14:39 +0000)
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp

index 38794e4..89b1202 100644 (file)
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -2453,10 +2453,17 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
           src = convert_int(ctx, bld, src, 64, 32, false);
        }
  
-      if (ctx->program->chip_class >= GFX8) {
+      if (ctx->program->chip_class >= GFX8 && input_size <= 16) {
           bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src);
        } else {
-         /* GFX7 and earlier do not support direct f16⟷i16 conversions */
+         /* Convert to f32 and then down to f16. This is needed to handle
+          * inputs slightly outside the range [INT16_MIN, INT16_MAX],
+          * which are representable via f16 but wouldn't be converted
+          * correctly by v_cvt_f16_i16.
+          *
+          * This is also the fallback-path taken on GFX7 and earlier, which
+          * do not support direct f16⟷i16 conversions.
+          */
           src = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), src);
           bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
        }
@@ -2522,6 +2529,8 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
        }
  
        if (ctx->program->chip_class >= GFX8) {
+         /* float16 has a range of [0, 65519]. Converting from larger
+          * inputs is UB, so we just need to consider the lower 16 bits */
           bld.vop1(aco_opcode::v_cvt_f16_u16, Definition(dst), src);
        } else {
           /* GFX7 and earlier do not support direct f16⟷u16 conversions */
author	Tony Wasserka <tony.wasserka@gmx.de>
	Tue, 16 Mar 2021 09:52:39 +0000 (10:52 +0100)
committer	Marge Bot <eric+marge@anholt.net>
	Fri, 26 Mar 2021 14:39:23 +0000 (14:39 +0000)