* iadd(x, ineg(y)). If true, driver should call nir_opt_algebraic_late(). */
bool has_isub;
+ /** Backend supports pack_32_4x8 or pack_32_4x8_split. */
+ bool has_pack_32_4x8;
+
/** Backend supports txs, if not nir_lower_tex(..) uses txs-free variants
* for rect texture lowering. */
bool has_txs;
nir_unpack_32_2x16_split_y(b, zw));
}
+static nir_ssa_def *
+lower_pack_32_from_8(nir_builder *b, nir_ssa_def *src)
+{
+ return nir_pack_32_4x8_split(b, nir_channel(b, src, 0),
+ nir_channel(b, src, 1),
+ nir_channel(b, src, 2),
+ nir_channel(b, src, 3));
+}
+
static bool
lower_pack_instr(nir_builder *b, nir_instr *instr, void *data)
{
alu_instr->op != nir_op_pack_64_4x16 &&
alu_instr->op != nir_op_unpack_64_4x16 &&
alu_instr->op != nir_op_pack_32_2x16 &&
- alu_instr->op != nir_op_unpack_32_2x16)
-
+ alu_instr->op != nir_op_unpack_32_2x16 &&
+ alu_instr->op != nir_op_pack_32_4x8)
return false;
b->cursor = nir_before_instr(&alu_instr->instr);
case nir_op_unpack_32_2x16:
dest = lower_unpack_32_to_16(b, src);
break;
+ case nir_op_pack_32_4x8:
+ dest = lower_pack_32_from_8(b, src);
+ break;
default:
unreachable("Impossible opcode");
}
binop_convert("pack_32_2x16_split", tuint32, tuint16, "",
"src0 | ((uint32_t)src1 << 16)")
+opcode("pack_32_4x8_split", 0, tuint32, [0, 0, 0, 0], [tuint8, tuint8, tuint8, tuint8],
+ False, "",
+ "src0 | ((uint32_t)src1 << 8) | ((uint32_t)src2 << 16) | ((uint32_t)src3 << 24)")
+
# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
# and that of the "bfi1" i965 instruction. That is, the bits and offset values
# are from the low five bits of src0 and src1, respectively.
(('ibfe', a, 0, 16), ('extract_i16', a, 0), '!options->lower_extract_word'),
(('ibfe', a, 16, 16), ('extract_i16', a, 1), '!options->lower_extract_word'),
+ # Packing a u8vec4 to write to an SSBO.
+ (('ior', ('ishl', ('u2u32', 'a@8'), 24), ('ior', ('ishl', ('u2u32', 'b@8'), 16), ('ior', ('ishl', ('u2u32', 'c@8'), 8), ('u2u32', 'd@8')))),
+ ('pack_32_4x8', ('vec4', d, c, b, a)), 'options->has_pack_32_4x8'),
+
(('extract_u16', ('extract_i16', a, b), 0), ('extract_u16', a, b)),
(('extract_u16', ('extract_u16', a, b), 0), ('extract_u16', a, b)),
.lower_usub_sat64 = true, \
.lower_hadd64 = true, \
.avoid_ternary_with_two_constants = true, \
+ .has_pack_32_4x8 = true, \
.max_unroll_iterations = 32, \
.force_indirect_unrolling = nir_var_function_temp
case nir_op_u2u32:
case nir_op_iabs:
case nir_op_ineg:
+ case nir_op_pack_32_4x8_split:
break;
default:
bld.emit(FS_OPCODE_PACK, result, op[0], op[1]);
break;
+ case nir_op_pack_32_4x8_split:
+ bld.emit(FS_OPCODE_PACK, result, op, 4);
+ break;
+
case nir_op_unpack_64_2x32_split_x:
case nir_op_unpack_64_2x32_split_y: {
if (instr->op == nir_op_unpack_64_2x32_split_x)