unop("ineg", tint, "-src0")
unop("fneg", tfloat, "-src0")
-unop("inot", tint, "~src0") # invert every bit of the integer
+unop("inot", tint, "~src0", description = "Invert every bit of the integer")
-# nir_op_fsign roughly implements the OpenGL / Vulkan rules for sign(float).
-# The GLSL.std.450 FSign instruction is defined as:
-#
-# Result is 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
-#
-# If the source is equal to zero, there is a preference for the result to have
-# the same sign, but this is not required (it is required by OpenCL). If the
-# source is not a number, there is a preference for the result to be +0.0, but
-# this is not required (it is required by OpenCL). If the source is not a
-# number, and the result is not +0.0, the result should definitely **not** be
-# NaN.
-#
-# The values returned for constant folding match the behavior required by
-# OpenCL.
unop("fsign", tfloat, ("bit_size == 64 ? " +
"(isnan(src0) ? 0.0 : ((src0 == 0.0 ) ? src0 : (src0 > 0.0 ) ? 1.0 : -1.0 )) : " +
- "(isnan(src0) ? 0.0f : ((src0 == 0.0f) ? src0 : (src0 > 0.0f) ? 1.0f : -1.0f))"))
+ "(isnan(src0) ? 0.0f : ((src0 == 0.0f) ? src0 : (src0 > 0.0f) ? 1.0f : -1.0f))"),
+ description = """
+Roughly implements the OpenGL / Vulkan rules for ``sign(float)``.
+The ``GLSL.std.450 FSign`` instruction is defined as:
+
+ Result is 1.0 if x > 0, 0.0 if x = 0, or -1.0 if x < 0.
+
+If the source is equal to zero, there is a preference for the result to have
+the same sign, but this is not required (it is required by OpenCL). If the
+source is not a number, there is a preference for the result to be +0.0, but
+this is not required (it is required by OpenCL). If the source is not a
+number, and the result is not +0.0, the result should definitely **not** be
+NaN.
+
+The values returned for constant folding match the behavior required by
+OpenCL.
+ """)
+
unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)")
unop("iabs", tint, "(src0 < 0) ? -src0 : src0")
unop("fabs", tfloat, "fabs(src0)")
dst_bit_size),
dst_t + str(dst_bit_size), src_t, conv_expr)
-# Special opcode that is the same as f2f16, i2i16, u2u16 except that it is safe
-# to remove it if the result is immediately converted back to 32 bits again.
-# This is generated as part of the precision lowering pass. mp stands for medium
-# precision.
-unop_numeric_convert("f2fmp", tfloat16, tfloat32, opcodes["f2f16"].const_expr)
-unop_numeric_convert("i2imp", tint16, tint32, opcodes["i2i16"].const_expr)
+def unop_numeric_convert_mp(base, src_t, dst_t):
+ op_like = base + "16"
+ unop_numeric_convert(base + "mp", src_t, dst_t, opcodes[op_like].const_expr,
+ description = """
+Special opcode that is the same as :nir:alu-op:`{}` except that it is safe to
+remove it if the result is immediately converted back to 32 bits again. This is
+generated as part of the precision lowering pass. ``mp`` stands for medium
+precision.
+ """.format(op_like))
+
+unop_numeric_convert_mp("f2f", tfloat16, tfloat32)
+unop_numeric_convert_mp("i2i", tint16, tint32)
# u2ump isn't defined, because the behavior is equal to i2imp
-unop_numeric_convert("f2imp", tint16, tfloat32, opcodes["f2i16"].const_expr)
-unop_numeric_convert("f2ump", tuint16, tfloat32, opcodes["f2u16"].const_expr)
-unop_numeric_convert("i2fmp", tfloat16, tint32, opcodes["i2f16"].const_expr)
-unop_numeric_convert("u2fmp", tfloat16, tuint32, opcodes["u2f16"].const_expr)
+unop_numeric_convert_mp("f2i", tint16, tfloat32)
+unop_numeric_convert_mp("f2u", tuint16, tfloat32)
+unop_numeric_convert_mp("i2f", tfloat16, tint32)
+unop_numeric_convert_mp("u2f", tfloat16, tuint32)
# Unary floating-point rounding operations.
unop_convert("frexp_sig", tfloat, tfloat, "int n; dst = frexp(src0, &n);")
# Partial derivatives.
+deriv_template = """
+Calculate the screen-space partial derivative using {} derivatives of the input
+with respect to the {}-axis. The constant folding is trivial as the derivative
+of a constant is 0.
+"""
-
-unop("fddx", tfloat, "0.0") # the derivative of a constant is 0.
-unop("fddy", tfloat, "0.0")
-unop("fddx_fine", tfloat, "0.0")
-unop("fddy_fine", tfloat, "0.0")
-unop("fddx_coarse", tfloat, "0.0")
-unop("fddy_coarse", tfloat, "0.0")
-
+for mode, suffix in [("either fine or coarse", ""), ("fine", "_fine"), ("coarse", "_coarse")]:
+ for axis in ["x", "y"]:
+ unop(f"fdd{axis}{suffix}", tfloat, "0.0",
+ description = deriv_template.format(mode, axis.upper()))
# Floating point pack and unpack operations.
unpack_4x8("unorm")
unpack_2x16("half")
-# Convert two unsigned integers into a packed unsigned short (clamp is applied).
unop_horiz("pack_uint_2x16", 1, tuint32, 2, tuint32, """
dst.x = _mesa_unsigned_to_unsigned(src0.x, 16);
dst.x |= _mesa_unsigned_to_unsigned(src0.y, 16) << 16;
+""", description = """
+Convert two unsigned integers into a packed unsigned short (clamp is applied).
""")
-# Convert two signed integers into a packed signed short (clamp is applied).
unop_horiz("pack_sint_2x16", 1, tint32, 2, tint32, """
dst.x = _mesa_signed_to_signed(src0.x, 16) & 0xffff;
dst.x |= _mesa_signed_to_signed(src0.y, 16) << 16;
+""", description = """
+Convert two signed integers into a packed signed short (clamp is applied).
""")
unop_horiz("pack_uvec2_to_uint", 1, tuint32, 2, tuint32, """
if (src0.z < 0 && absZ >= absX && absZ >= absY) dst.x = 5;
""")
-# Sum of vector components
-unop_reduce("fsum", 1, tfloat, tfloat, "{src}", "{src0} + {src1}", "{src}")
+unop_reduce("fsum", 1, tfloat, tfloat, "{src}", "{src0} + {src1}", "{src}",
+ description = "Sum of vector components")
def binop_convert(name, out_type, in_type, alg_props, const_expr, description=""):
opcode(name, 0, out_type, [0, 0], [in_type, in_type],
}
""")
-# Unlike fmul, anything (even infinity or NaN) multiplied by zero is always zero.
-# fmulz(0.0, inf) and fmulz(0.0, nan) must be +/-0.0, even if
-# SIGNED_ZERO_INF_NAN_PRESERVE is not used. If SIGNED_ZERO_INF_NAN_PRESERVE is used, then
-# the result must be a positive zero if either operand is zero.
binop("fmulz", tfloat32, _2src_commutative + associative, """
if (src0 == 0.0 || src1 == 0.0)
dst = 0.0;
dst = _mesa_double_to_float_rtz((double)src0 * (double)src1);
else
dst = src0 * src1;
+""", description = """
+Unlike :nir:alu-op:`fmul`, anything (even infinity or NaN) multiplied by zero is
+always zero. ``fmulz(0.0, inf)`` and ``fmulz(0.0, nan)`` must be +/-0.0, even
+if ``SIGNED_ZERO_INF_NAN_PRESERVE`` is not used. If
+``SIGNED_ZERO_INF_NAN_PRESERVE`` is used, then the result must be a positive
+zero if either operand is zero.
""")
-# low 32-bits of signed/unsigned integer multiply
+
binop("imul", tint, _2src_commutative + associative, """
/* Use 64-bit multiplies to prevent overflow of signed arithmetic */
dst = (uint64_t)src0 * (uint64_t)src1;
-""")
+""", description = "Low 32-bits of signed/unsigned integer multiply")
-# Generate 64 bit result from 2 32 bits quantity
binop_convert("imul_2x32_64", tint64, tint32, _2src_commutative,
- "(int64_t)src0 * (int64_t)src1")
+ "(int64_t)src0 * (int64_t)src1",
+ description = "Multiply signed 32-bit integers, 64-bit result")
binop_convert("umul_2x32_64", tuint64, tuint32, _2src_commutative,
- "(uint64_t)src0 * (uint64_t)src1")
+ "(uint64_t)src0 * (uint64_t)src1",
+ description = "Multiply unsigned 32-bit integers, 64-bit result")
-# high 32-bits of signed integer multiply
binop("imul_high", tint, _2src_commutative, """
if (bit_size == 64) {
/* We need to do a full 128-bit x 128-bit multiply in order for the sign
* potential overflow of signed multiply */
dst = ((uint64_t)(int64_t)src0 * (uint64_t)(int64_t)src1) >> bit_size;
}
-""")
+""", description = "High 32-bits of signed integer multiply")
-# high 32-bits of unsigned integer multiply
binop("umul_high", tuint, _2src_commutative, """
if (bit_size == 64) {
/* The casts are kind-of annoying but needed to prevent compiler warnings. */
} else {
dst = ((uint64_t)src0 * (uint64_t)src1) >> bit_size;
}
-""")
+""", description = "High 32-bits of unsigned integer multiply")
-# low 32-bits of unsigned integer multiply
binop("umul_low", tuint32, _2src_commutative, """
uint64_t mask = (1 << (bit_size / 2)) - 1;
dst = ((uint64_t)src0 & mask) * ((uint64_t)src1 & mask);
-""")
+""", description = "Low 32-bits of unsigned integer multiply")
-# Multiply 32-bits with low 16-bits.
-binop("imul_32x16", tint32, "", "src0 * (int16_t) src1")
-binop("umul_32x16", tuint32, "", "src0 * (uint16_t) src1")
+binop("imul_32x16", tint32, "", "src0 * (int16_t) src1",
+ description = "Multiply 32-bits with low 16-bits, with sign extension")
+binop("umul_32x16", tuint32, "", "src0 * (uint16_t) src1",
+ description = "Multiply 32-bits with low 16-bits, with zero extension")
binop("fdiv", tfloat, "", "src0 / src1")
binop("idiv", tint, "", "src1 == 0 ? 0 : (src0 / src1)")
binop("udiv", tuint, "", "src1 == 0 ? 0 : (src0 / src1)")
-# returns an integer (1 or 0) representing the carry resulting from the
-# addition of the two unsigned arguments.
-
-binop_convert("uadd_carry", tuint, tuint, _2src_commutative, "src0 + src1 < src0")
+binop_convert("uadd_carry", tuint, tuint, _2src_commutative,
+ "src0 + src1 < src0",
+ description = """
+Return an integer (1 or 0) representing the carry resulting from the
+addition of the two unsigned arguments.
+ """)
-# returns an integer (1 or 0) representing the borrow resulting from the
-# subtraction of the two unsigned arguments.
-
-binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1")
+binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1", description = """
+Return an integer (1 or 0) representing the borrow resulting from the
+subtraction of the two unsigned arguments.
+ """)
# hadd: (a + b) >> 1 (without overflow)
# x + y = x - (x & ~y) + (x & ~y) + y - (~x & y) + (~x & y)
binop("seq", tfloat, _2src_commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
binop("sne", tfloat, _2src_commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
-# SPIRV shifts are undefined for shift-operands >= bitsize,
-# but SM5 shifts are defined to use only the least significant bits.
-# The NIR definition is according to the SM5 specification.
+shift_note = """
+SPIRV shifts are undefined for shift-operands >= bitsize,
+but SM5 shifts are defined to use only the least significant bits.
+The NIR definition is according to the SM5 specification.
+"""
+
opcode("ishl", 0, tint, [0, 0], [tint, tuint32], False, "",
- "(uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1))")
+ "(uint64_t)src0 << (src1 & (sizeof(src0) * 8 - 1))",
+ description = "Left shift." + shift_note)
opcode("ishr", 0, tint, [0, 0], [tint, tuint32], False, "",
- "src0 >> (src1 & (sizeof(src0) * 8 - 1))")
+ "src0 >> (src1 & (sizeof(src0) * 8 - 1))",
+ description = "Signed right-shift." + shift_note)
opcode("ushr", 0, tuint, [0, 0], [tuint, tuint32], False, "",
- "src0 >> (src1 & (sizeof(src0) * 8 - 1))")
+ "src0 >> (src1 & (sizeof(src0) * 8 - 1))",
+ description = "Unsigned right-shift." + shift_note)
opcode("urol", 0, tuint, [0, 0], [tuint, tuint32], False, "", """
uint32_t rotate_mask = sizeof(src0) * 8 - 1;
(src0 << (-src1 & rotate_mask));
""")
-# bitwise logic operators
-#
-# These are also used as boolean and, or, xor for hardware supporting
-# integers.
+bitwise_description = """
+Bitwise {0}, also used as a boolean {0} for hardware supporting integers.
+"""
-
-binop("iand", tuint, _2src_commutative + associative, "src0 & src1")
-binop("ior", tuint, _2src_commutative + associative, "src0 | src1")
-binop("ixor", tuint, _2src_commutative + associative, "src0 ^ src1")
+binop("iand", tuint, _2src_commutative + associative, "src0 & src1",
+ description = bitwise_description.format("AND"))
+binop("ior", tuint, _2src_commutative + associative, "src0 | src1",
+ description = bitwise_description.format("OR"))
+binop("ixor", tuint, _2src_commutative + associative, "src0 ^ src1",
+ description = bitwise_description.format("XOR"))
binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
False, "",
"src0 | ((uint32_t)src1 << 8) | ((uint32_t)src2 << 16) | ((uint32_t)src3 << 24)")
-# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
-# and that of the "bfi1" i965 instruction. That is, the bits and offset values
-# are from the low five bits of src0 and src1, respectively.
binop_convert("bfm", tuint32, tint32, "", """
int bits = src0 & 0x1F;
int offset = src1 & 0x1F;
dst = ((1u << bits) - 1) << offset;
+""", description = """
+Implements the behavior of the first operation of the SM5 "bfi" assembly
+and that of the "bfi1" i965 instruction. That is, the bits and offset values
+are from the low five bits of src0 and src1, respectively.
""")
opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint32], False, "", """
dst = copysignf(0.0f, src0);
""")
-# Combines the first component of each input to make a 2-component vector.
-
binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """
dst.x = src0.x;
dst.y = src1.x;
+""", description = """
+Combines the first component of each input to make a 2-component vector.
""")
# Byte extraction
}
""")
-# Unlike ffma, anything (even infinity or NaN) multiplied by zero is always zero.
-# ffmaz(0.0, inf, src2) and ffmaz(0.0, nan, src2) must be +/-0.0 + src2, even if
-# SIGNED_ZERO_INF_NAN_PRESERVE is not used. If SIGNED_ZERO_INF_NAN_PRESERVE is used, then
-# the result must be a positive zero plus src2 if either src0 or src1 is zero.
triop("ffmaz", tfloat32, _2src_commutative, """
if (src0 == 0.0 || src1 == 0.0)
dst = 0.0 + src2;
dst = _mesa_float_fma_rtz(src0, src1, src2);
else
dst = fmaf(src0, src1, src2);
+""", description = """
+Floating-point multiply-add with modified zero handling.
+
+Unlike :nir:alu-op:`ffma`, anything (even infinity or NaN) multiplied by zero is
+always zero. ``ffmaz(0.0, inf, src2)`` and ``ffmaz(0.0, nan, src2)`` must be
+``+/-0.0 + src2``, even if ``SIGNED_ZERO_INF_NAN_PRESERVE`` is not used. If
+``SIGNED_ZERO_INF_NAN_PRESERVE`` is used, then the result must be a positive
+zero plus src2 if either src0 or src1 is zero.
""")
triop("flrp", tfloat, "", "src0 * (1 - src2) + src1 * src2")
-# Ternary addition
-triop("iadd3", tint, _2src_commutative + associative, "src0 + src1 + src2")
+triop("iadd3", tint, _2src_commutative + associative, "src0 + src1 + src2",
+ description = "Ternary addition")
-# Conditional Select
-#
-# A vector conditional select instruction (like ?:, but operating per-
-# component on vectors). There are two versions, one for floating point
-# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
-
-triop("fcsel", tfloat32, selection, "(src0 != 0.0f) ? src1 : src2")
+csel_description = """
+A vector conditional select instruction (like ?:, but operating per-
+component on vectors). The condition is {} bool ({}).
+"""
+triop("fcsel", tfloat32, selection, "(src0 != 0.0f) ? src1 : src2",
+ description = csel_description.format("a floating point", "0.0 vs 1.0"))
opcode("bcsel", 0, tuint, [0, 0, 0],
- [tbool1, tuint, tuint], False, selection, "src0 ? src1 : src2")
+ [tbool1, tuint, tuint], False, selection, "src0 ? src1 : src2",
+ description = csel_description.format("a 1-bit", "0 vs 1"))
opcode("b8csel", 0, tuint, [0, 0, 0],
- [tbool8, tuint, tuint], False, selection, "src0 ? src1 : src2")
+ [tbool8, tuint, tuint], False, selection, "src0 ? src1 : src2",
+ description = csel_description.format("an 8-bit", "0 vs ~0"))
opcode("b16csel", 0, tuint, [0, 0, 0],
- [tbool16, tuint, tuint], False, selection, "src0 ? src1 : src2")
+ [tbool16, tuint, tuint], False, selection, "src0 ? src1 : src2",
+ description = csel_description.format("a 16-bit", "0 vs ~0"))
opcode("b32csel", 0, tuint, [0, 0, 0],
- [tbool32, tuint, tuint], False, selection, "src0 ? src1 : src2")
+ [tbool32, tuint, tuint], False, selection, "src0 ? src1 : src2",
+ description = csel_description.format("a 32-bit", "0 vs ~0"))
triop("i32csel_gt", tint32, selection, "(src0 > 0) ? src1 : src2")
triop("i32csel_ge", tint32, selection, "(src0 >= 0) ? src1 : src2")
triop("fcsel_gt", tfloat32, selection, "(src0 > 0.0f) ? src1 : src2")
triop("fcsel_ge", tfloat32, selection, "(src0 >= 0.0f) ? src1 : src2")
-# SM5 bfi assembly
triop("bfi", tuint32, "", """
unsigned mask = src0, insert = src1, base = src2;
if (mask == 0) {
}
dst = (base & ~mask) | (insert & mask);
}
-""")
+""", description = "SM5 bfi assembly")
triop("bitfield_select", tuint, "", "(src0 & src1) | (~src0 & src2)")