From d7ca0319d72af55597d8ac4146a5e8a3d55322f5 Mon Sep 17 00:00:00 2001 From: Jesse Natalie Date: Fri, 30 Apr 2021 11:46:09 -0700 Subject: [PATCH] nir: Add relaxed 24bit opcodes These are equivalent to the 32bit opcodes if there are no more efficient 24bit opcodes available, but inputs are guaranteed to already be 24bit, so the 24bit opcodes can be used instead if they exist and are efficient. Reviewed-by: Jason Ekstrand Reviewed-by: Karol Herbst Part-of: --- src/compiler/nir/nir_opcodes.py | 5 +++++ src/compiler/nir/nir_opt_algebraic.py | 8 ++++++++ 2 files changed, 13 insertions(+) diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py index 8a977d0..bb8fc5b 100644 --- a/src/compiler/nir/nir_opcodes.py +++ b/src/compiler/nir/nir_opcodes.py @@ -1258,6 +1258,11 @@ triop("umad24", tuint32, _2src_commutative, binop("umul24", tint32, _2src_commutative + associative, "(((uint32_t)src0 << 8) >> 8) * (((uint32_t)src1 << 8) >> 8)") +# relaxed versions of the above, which assume input is in the 24bit range (no clamping) +binop("imul24_relaxed", tint32, _2src_commutative + associative, "src0 * src1") +triop("umad24_relaxed", tuint32, _2src_commutative, "src0 * src1 + src2") +binop("umul24_relaxed", tuint32, _2src_commutative + associative, "src0 * src1") + unop_convert("fisnormal", tbool1, tfloat, "isnormal(src0)") unop_convert("fisfinite", tbool1, tfloat, "isfinite(src0)") diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py index 48b1848..7a8b610 100644 --- a/src/compiler/nir/nir_opt_algebraic.py +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -1653,6 +1653,14 @@ optimizations.extend([ ('iadd', ('imul', ('iand', a, 0xffffff), ('iand', b, 0xffffff)), c), '!options->has_umad24'), + # Relaxed 24bit ops + (('imul24_relaxed', a, b), ('imul24', a, b), 'options->has_imul24'), + (('imul24_relaxed', a, b), ('imul', a, b), '!options->has_imul24'), + (('umad24_relaxed', a, b, c), ('umad24', a, b, c), 'options->has_umad24'), + (('umad24_relaxed', a, b, c), ('iadd', ('umul24_relaxed', a, b), c), '!options->has_umad24'), + (('umul24_relaxed', a, b), ('umul24', a, b), 'options->has_umul24'), + (('umul24_relaxed', a, b), ('imul', a, b), '!options->has_umul24'), + (('imad24_ir3', a, b, 0), ('imul24', a, b)), (('imad24_ir3', a, 0, c), (c)), (('imad24_ir3', a, 1, c), ('iadd', a, c)), -- 2.7.4