static void
ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
{
- /* This should always be lowered to ALU operations for V3D. */
- assert(!instr->dest.saturate);
-
/* Vectors are special in that they have non-scalarized writemasks,
* and just take the first swizzle channel for each argument in order
* into each writemask channel.
{
nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
dest->write_mask = src->write_mask;
- dest->saturate = src->saturate;
}
bool
alu_dest_init(nir_alu_dest *dest)
{
dest_init(&dest->dest);
- dest->saturate = false;
dest->write_mask = 0xf;
}
assert(instr->src[0].src.is_ssa);
if (instr->op == nir_op_mov) {
- return !instr->dest.saturate &&
- !instr->src[0].abs &&
+ return !instr->src[0].abs &&
!instr->src[0].negate;
} else if (nir_op_is_vec(instr->op)) {
for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; i++) {
if (instr->src[i].abs || instr->src[i].negate)
return false;
}
- return !instr->dest.saturate;
+ return true;
} else {
return false;
}
nir_dest dest;
/**
- * Saturate output modifier
- *
- * Only valid for opcodes that output floating-point numbers. Clamps the
- * output to between 0.0 and 1.0 inclusive.
- */
- bool saturate;
-
- /**
* Write-mask
*
* Ignored if dest.is_ssa is true
nalu->no_unsigned_wrap = alu->no_unsigned_wrap;
__clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
- nalu->dest.saturate = alu->dest.saturate;
nalu->dest.write_mask = alu->dest.write_mask;
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
if (bit_size == 0)
bit_size = 32;
- /* We shouldn't have any saturate modifiers in the optimization loop. */
- assert(!alu->dest.saturate);
-
nir_const_value *srcs[NIR_MAX_VEC_COMPONENTS];
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; ++i)
}
nir_alu_ssa_dest_init(lower, components, alu->dest.dest.ssa.bit_size);
- lower->dest.saturate = alu->dest.saturate;
lower->exact = alu->exact;
for (i = 0; i < components; i++) {
if (!vec->dest.dest.is_ssa)
continue;
- /* Can't handle saturation */
- if (vec->dest.saturate)
- continue;
-
/* First, mark all of the sources we are going to consider for rewriting
* to the destination
*/
if (alu->dest.dest.ssa.num_components != 1)
continue;
- if (alu->dest.saturate)
- continue;
-
static const uint8_t swizzle[NIR_MAX_VEC_COMPONENTS] = {0};
switch (alu->op) {
if (bit_size == 0)
bit_size = 32;
- /* We shouldn't have any saturate modifiers in the optimization loop. */
- assert(!alu->dest.saturate);
-
nir_const_value dest[NIR_MAX_VEC_COMPONENTS];
nir_const_value *srcs[NIR_MAX_VEC_COMPONENTS];
memset(dest, 0, sizeof(dest));
alu->dest.dest.ssa.num_components,
alu->dest.dest.ssa.bit_size);
- nalu->dest.saturate = alu->dest.saturate;
nalu->dest.write_mask = alu->dest.write_mask;
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
if (mov->op != nir_op_fsat && !movelike)
(*count)++;
} else {
- /* Can't handle saturate */
- if (mov->dest.saturate)
- return false;
-
/* The only uses of this definition must be phis in the successor */
nir_foreach_use_including_if(use, &mov->dest.dest.ssa) {
if (use->is_if ||
fprintf(fp, " = %s", nir_op_infos[instr->op].name);
if (instr->exact)
fprintf(fp, "!");
- if (instr->dest.saturate)
- fprintf(fp, ".sat");
if (instr->no_signed_wrap)
fprintf(fp, ".nsw");
if (instr->no_unsigned_wrap)
if (state->inexact_match && state->has_exact_alu)
return false;
- assert(!instr->dest.saturate);
assert(nir_op_infos[instr->op].num_inputs > 0);
/* If we have an explicitly sized destination, we can only handle the
nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components,
dst_bit_size);
alu->dest.write_mask = (1 << num_components) - 1;
- alu->dest.saturate = false;
/* We have no way of knowing what values in a given search expression
* map to a particular replacement value. Therefore, if the
unsigned exact:1;
unsigned no_signed_wrap:1;
unsigned no_unsigned_wrap:1;
- unsigned saturate:1;
+ unsigned padding:1;
/* Reg: writemask; SSA: swizzles for 2 srcs */
unsigned writemask_or_two_swizzles:4;
unsigned op:9;
header.alu.exact = alu->exact;
header.alu.no_signed_wrap = alu->no_signed_wrap;
header.alu.no_unsigned_wrap = alu->no_unsigned_wrap;
- header.alu.saturate = alu->dest.saturate;
header.alu.op = alu->op;
header.alu.packed_src_ssa_16bit = is_alu_src_ssa_16bit(ctx, alu);
alu->exact = header.alu.exact;
alu->no_signed_wrap = header.alu.no_signed_wrap;
alu->no_unsigned_wrap = header.alu.no_unsigned_wrap;
- alu->dest.saturate = header.alu.saturate;
read_dest(ctx, &alu->dest.dest, &alu->instr, header);
validate_alu_dest(nir_alu_instr *instr, validate_state *state)
{
nir_alu_dest *dest = &instr->dest;
-
- if (instr->op == nir_op_mov)
- assert(!dest->saturate);
-
unsigned dest_size = nir_dest_num_components(dest->dest);
/*
* validate that the instruction doesn't write to components not in the
*/
validate_assert(state, !(dest->write_mask & ~nir_component_mask(dest_size)));
- /* validate that saturate is only ever used on instructions with
- * destinations of type float
- */
- nir_alu_instr *alu = nir_instr_as_alu(state->instr);
- validate_assert(state,
- (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) ==
- nir_type_float) ||
- !dest->saturate);
-
validate_dest(&dest->dest, state, 0, 0);
}
nir_alu_instr *parent = nir_instr_as_alu(alu->src[i].src.ssa->parent_instr);
- if (parent->dest.saturate)
- continue;
-
if (nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[i]) != nir_type_float)
continue;
static void
ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
{
- /* This should always be lowered to ALU operations for VC4. */
- assert(!instr->dest.saturate);
-
/* Vectors are special in that they have non-scalarized writemasks,
* and just take the first swizzle channel for each argument in order
* into each writemask channel.
static void
store_alu_result(struct ntv_context *ctx, nir_alu_instr *alu, SpvId result, nir_alu_type atype)
{
- assert(!alu->dest.saturate);
store_dest(ctx, &alu->dest.dest, result, atype);
}
(nir_alu_type)(nir_op_infos[instr->op].output_type |
nir_dest_bit_size(instr->dest.dest)));
- assert(!instr->dest.saturate);
-
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
/* We don't lower to source modifiers so they should not exist. */
assert(!instr->src[i].abs);
mul_src[0] = nir_fneg(b, mul_src[0]);
nir_alu_instr *ffma = nir_alu_instr_create(b->shader, nir_op_ffma);
- ffma->dest.saturate = add->dest.saturate;
ffma->dest.write_mask = add->dest.write_mask;
for (unsigned i = 0; i < 2; i++) {
b->cursor = nir_before_instr(&imul->instr);
nir_alu_instr *imul_32x16 = nir_alu_instr_create(b->shader, new_opcode);
- imul_32x16->dest.saturate = imul->dest.saturate;
imul_32x16->dest.write_mask = imul->dest.write_mask;
nir_alu_src_copy(&imul_32x16->src[0], &imul->src[1 - small_val], imul_32x16);
dst_reg dst = get_nir_dest(instr->dest.dest, dst_type);
dst.writemask &= instr->dest.write_mask;
- assert(!instr->dest.saturate);
-
src_reg op[4];
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
/* We don't lower to source modifiers, so they shouldn't exist. */
store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,
const struct dxil_value *value)
{
- assert(!alu->dest.saturate);
store_dest(ctx, &alu->dest.dest, chan, value);
}
oldPos = oldPos->next;
oldPos->precise = insn->exact;
}
- oldPos->saturate = insn->dest.saturate;
return true;
}