src = convert_int(ctx, bld, src, 64, 32, false);
}
- if (ctx->program->chip_class >= GFX8) {
+ if (ctx->program->chip_class >= GFX8 && input_size <= 16) {
bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src);
} else {
- /* GFX7 and earlier do not support direct f16⟷i16 conversions */
+ /* Convert to f32 and then down to f16. This is needed to handle
+ * inputs slightly outside the range [INT16_MIN, INT16_MAX],
+ * which are representable via f16 but wouldn't be converted
+ * correctly by v_cvt_f16_i16.
+ *
+ * This is also the fallback-path taken on GFX7 and earlier, which
+ * do not support direct f16⟷i16 conversions.
+ */
src = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), src);
bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src);
}
}
if (ctx->program->chip_class >= GFX8) {
+ /* float16 has a range of [0, 65519]. Converting from larger
+ * inputs is UB, so we just need to consider the lower 16 bits */
bld.vop1(aco_opcode::v_cvt_f16_u16, Definition(dst), src);
} else {
/* GFX7 and earlier do not support direct f16⟷u16 conversions */