Instruction* op1_instr, bool swap, const char *shuffle_str,
Operand operands[3], bool neg[3], bool abs[3], uint8_t *opsel,
bool *op1_clamp, uint8_t *op1_omod,
- bool *inbetween_neg, bool *inbetween_abs, bool *inbetween_opsel)
+ bool *inbetween_neg, bool *inbetween_abs, bool *inbetween_opsel,
+ bool *precise)
{
/* checks */
if (op1_instr->opcode != op1)
else if (op1_vop3 && op1_vop3->opsel & (1 << swap))
return false;
+ *precise = op1_instr->definitions[0].isPrecise() ||
+ op2_instr->definitions[0].isPrecise();
+
int shuffle[3];
shuffle[shuffle_str[0] - '0'] = 0;
shuffle[shuffle_str[1] - '0'] = 1;
continue;
Operand operands[3];
- bool neg[3], abs[3], clamp;
+ bool neg[3], abs[3], clamp, precise;
uint8_t opsel = 0, omod = 0;
if (match_op3_for_vop3(ctx, instr->opcode, op2,
instr.get(), swap, shuffle,
operands, neg, abs, &opsel,
- &clamp, &omod, NULL, NULL, NULL)) {
+ &clamp, &omod, NULL, NULL, NULL, &precise)) {
ctx.uses[instr->operands[swap].tempId()]--;
create_vop3_for_op3(ctx, new_op, instr, operands, neg, abs, opsel, clamp, omod);
return true;
* max(-min(a, b), c) -> max3(-a, -b, c) */
for (unsigned swap = 0; swap < 2; swap++) {
Operand operands[3];
- bool neg[3], abs[3], clamp;
+ bool neg[3], abs[3], clamp, precise;
uint8_t opsel = 0, omod = 0;
bool inbetween_neg;
if (match_op3_for_vop3(ctx, instr->opcode, opposite,
instr.get(), swap, "012",
operands, neg, abs, &opsel,
- &clamp, &omod, &inbetween_neg, NULL, NULL) &&
+ &clamp, &omod, &inbetween_neg, NULL, NULL, &precise) &&
inbetween_neg) {
ctx.uses[instr->operands[swap].tempId()]--;
neg[1] = true;
for (unsigned swap = 0; swap < 2; swap++) {
Operand operands[3];
- bool neg[3], abs[3], clamp;
+ bool neg[3], abs[3], clamp, precise;
uint8_t opsel = 0, omod = 0;
if (match_op3_for_vop3(ctx, instr->opcode, other_op, instr.get(), swap,
"012", operands, neg, abs, &opsel,
- &clamp, &omod, NULL, NULL, NULL)) {
+ &clamp, &omod, NULL, NULL, NULL, &precise)) {
+ /* max(min(src, upper), lower) returns upper if src is NaN, but
+ * med3(src, lower, upper) returns lower.
+ */
+ if (precise && instr->opcode != min)
+ continue;
+
int const0_idx = -1, const1_idx = -1;
uint32_t const0 = 0, const1 = 0;
for (int i = 0; i < 3; i++) {
finish_opt_test();
}
END_TEST
+
+BEGIN_TEST(optimize.clamp)
+ //>> v1: %a, v1: %b, v1: %c, s2: %_:exec = p_startpgm
+ if (!setup_cs("v1 v1 v1", GFX9))
+ return;
+
+ //! v1: %res0 = v_med3_f32 4.0, 0, %a
+ //! p_unit_test 0, %res0
+ writeout(0, bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u),
+ bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), inputs[0])));
+
+ //! v1: %res1 = v_med3_f32 0, 4.0, %a
+ //! p_unit_test 1, %res1
+ writeout(1, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u),
+ bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u), inputs[0])));
+
+ /* correct NaN behaviour with precise */
+
+ //! v1: %res2 = v_med3_f32 4.0, 0, %a
+ //! p_unit_test 2, %res2
+ Builder::Result max = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), inputs[0]);
+ max.def(0).setPrecise(true);
+ Builder::Result min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u), max);
+ max.def(0).setPrecise(true);
+ writeout(2, min);
+
+ //! v1: (precise)%res3_tmp = v_min_f32 4.0, %a
+ //! v1: %res3 = v_max_f32 0, %res3_tmp
+ //! p_unit_test 3, %res3
+ min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0x40800000u), inputs[0]);
+ min.def(0).setPrecise(true);
+ writeout(3, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), min));
+
+ finish_opt_test();
+END_TEST