#define OP_CHANNEL_COUNT(c) ((c - 1) << 0)
#define GET_CHANNEL_COUNT(c) ((c & (0x3 << 0)) ? ((c & (0x3 << 0)) + 1) : 0)
+/* For instructions that take a single argument, normally the first argument
+ * slot is used for the argument and the second slot is a dummy #0 constant.
+ * However, there are exceptions: instructions like fmov store their argument
+ * in the _second_ slot and store a dummy r24 in the first slot, designated by
+ * QUIRK_FLIPPED_R24 */
+
+#define QUIRK_FLIPPED_R24 (1 << 2)
+
/* Vector-independant shorthands for the above; these numbers are arbitrary and
* not from the ISA. Convert to the above with unit_enum_to_midgard */
#define UNIT_SMUL ALU_ENAB_SCAL_MUL
#define UNIT_VLUT ALU_ENAB_VEC_LUT
-/* Shorthands for usual combinations of units. LUT is intentionally excluded
- * since it's nutty. */
+/* Shorthands for usual combinations of units */
#define UNITS_MUL (UNIT_VMUL | UNIT_SMUL)
#define UNITS_ADD (UNIT_VADD | UNIT_SADD)
-#define UNITS_ALL (UNITS_MUL | UNITS_ADD)
+#define UNITS_MOST (UNITS_MUL | UNITS_ADD)
+#define UNITS_ALL (UNITS_MOST | UNIT_VLUT)
#define UNITS_SCALAR (UNIT_SADD | UNIT_SMUL)
#define UNITS_VECTOR (UNIT_VMUL | UNIT_VADD)
#define UNITS_ANY_VECTOR (UNITS_VECTOR | UNIT_VLUT)
-static int alu_opcode_props[256] = {
+static unsigned alu_opcode_props[256] = {
[midgard_alu_op_fadd] = UNITS_ADD,
[midgard_alu_op_fmul] = UNITS_MUL | UNIT_VLUT,
[midgard_alu_op_fmin] = UNITS_MUL | UNITS_ADD,
[midgard_alu_op_fmax] = UNITS_MUL | UNITS_ADD,
- [midgard_alu_op_imin] = UNITS_ALL,
- [midgard_alu_op_imax] = UNITS_ALL,
- [midgard_alu_op_fmov] = UNITS_ALL | UNIT_VLUT,
+ [midgard_alu_op_imin] = UNITS_MOST,
+ [midgard_alu_op_imax] = UNITS_MOST,
+ [midgard_alu_op_fmov] = UNITS_ALL | QUIRK_FLIPPED_R24,
[midgard_alu_op_ffloor] = UNITS_ADD,
[midgard_alu_op_fceil] = UNITS_ADD,
[midgard_alu_op_fdot3] = UNIT_VMUL | OP_CHANNEL_COUNT(3),
[midgard_alu_op_fdot4] = UNIT_VMUL | OP_CHANNEL_COUNT(4),
- [midgard_alu_op_iadd] = UNITS_ADD,
- [midgard_alu_op_isub] = UNITS_ADD,
- [midgard_alu_op_imul] = UNITS_ALL,
- [midgard_alu_op_imov] = UNITS_ALL,
+ /* Incredibly, iadd can run on vmul, etc */
+ [midgard_alu_op_iadd] = UNITS_MOST,
+ [midgard_alu_op_isub] = UNITS_MOST,
+ [midgard_alu_op_imul] = UNITS_MOST,
+ [midgard_alu_op_imov] = UNITS_MOST | QUIRK_FLIPPED_R24,
/* For vector comparisons, use ball etc */
- [midgard_alu_op_feq] = UNITS_ALL,
- [midgard_alu_op_fne] = UNITS_ALL,
+ [midgard_alu_op_feq] = UNITS_MOST,
+ [midgard_alu_op_fne] = UNITS_MOST,
[midgard_alu_op_flt] = UNIT_SADD,
- [midgard_alu_op_ieq] = UNITS_ALL,
- [midgard_alu_op_ine] = UNITS_ALL,
- [midgard_alu_op_ilt] = UNITS_ALL,
- [midgard_alu_op_ile] = UNITS_ALL,
+ [midgard_alu_op_ieq] = UNITS_MOST,
+ [midgard_alu_op_ine] = UNITS_MOST,
+ [midgard_alu_op_ilt] = UNITS_MOST,
+ [midgard_alu_op_ile] = UNITS_MOST,
[midgard_alu_op_icsel] = UNITS_ADD,
[midgard_alu_op_fcsel] = UNITS_ADD | UNIT_SMUL,
[midgard_alu_op_iand] = UNITS_ADD, /* XXX: Test case where it's right on smul but not sadd */
[midgard_alu_op_ior] = UNITS_ADD,
[midgard_alu_op_ixor] = UNITS_ADD,
- [midgard_alu_op_inot] = UNITS_ALL,
+ [midgard_alu_op_inot] = UNITS_MOST,
[midgard_alu_op_ishl] = UNITS_ADD,
[midgard_alu_op_iasr] = UNITS_ADD,
[midgard_alu_op_ilsr] = UNITS_ADD,
[midgard_alu_op_ilsr] = UNITS_ADD,
- [midgard_alu_op_fball_eq] = UNITS_ALL,
- [midgard_alu_op_fbany_neq] = UNITS_ALL,
- [midgard_alu_op_iball_eq] = UNITS_ALL,
- [midgard_alu_op_ibany_neq] = UNITS_ALL
+ [midgard_alu_op_fball_eq] = UNITS_MOST,
+ [midgard_alu_op_fbany_neq] = UNITS_MOST,
+ [midgard_alu_op_iball_eq] = UNITS_MOST,
+ [midgard_alu_op_ibany_neq] = UNITS_MOST
};
emit_mir_instruction(ctx, ins);
}
-/* Components: Number/style of arguments:
- * 3: One-argument op with r24 (i2f, f2i)
- * 2: Standard two argument op (fadd, fmul)
- * 1: Flipped one-argument op (fmov, imov)
- * 0: Standard one-argument op (frcp)
- * NIR: NIR instruction op.
- * Op: Midgard instruction op.
- */
-
-#define ALU_CASE(_components, nir, _op) \
+#define ALU_CASE(nir, _op) \
case nir_op_##nir: \
- components = _components; \
op = midgard_alu_op_##_op; \
break;
unsigned dest = nir_dest_index(&instr->dest.dest);
unsigned nr_components = is_ssa ? instr->dest.dest.ssa.num_components : instr->dest.dest.reg.reg->num_components;
+ unsigned nr_inputs = nir_op_infos[instr->op].num_inputs;
/* Most Midgard ALU ops have a 1:1 correspondance to NIR ops; these are
* supported. A few do not and are commented for now. Also, there are a
* convention of the Midgard instruction; actual packing is done in
* emit_alu below */
- unsigned op, components;
+ unsigned op;
switch (instr->op) {
- ALU_CASE(2, fadd, fadd);
- ALU_CASE(2, fmul, fmul);
- ALU_CASE(2, fmin, fmin);
- ALU_CASE(2, fmax, fmax);
- ALU_CASE(2, imin, imin);
- ALU_CASE(2, imax, imax);
- ALU_CASE(1, fmov, fmov);
- ALU_CASE(0, ffloor, ffloor);
- ALU_CASE(0, fceil, fceil);
- ALU_CASE(2, fdot3, fdot3);
- //ALU_CASE(2, fdot3r);
- ALU_CASE(2, fdot4, fdot4);
- //ALU_CASE(2, freduce);
- ALU_CASE(2, iadd, iadd);
- ALU_CASE(2, isub, isub);
- ALU_CASE(2, imul, imul);
+ ALU_CASE(fadd, fadd);
+ ALU_CASE(fmul, fmul);
+ ALU_CASE(fmin, fmin);
+ ALU_CASE(fmax, fmax);
+ ALU_CASE(imin, imin);
+ ALU_CASE(imax, imax);
+ ALU_CASE(fmov, fmov);
+ ALU_CASE(ffloor, ffloor);
+ ALU_CASE(fceil, fceil);
+ ALU_CASE(fdot3, fdot3);
+ ALU_CASE(fdot4, fdot4);
+ ALU_CASE(iadd, iadd);
+ ALU_CASE(isub, isub);
+ ALU_CASE(imul, imul);
/* XXX: Use fmov, not imov, since imov was causing major
* issues with texture precision? XXX research */
- ALU_CASE(1, imov, fmov);
-
- ALU_CASE(2, feq, feq);
- ALU_CASE(2, fne, fne);
- ALU_CASE(2, flt, flt);
- ALU_CASE(2, ieq, ieq);
- ALU_CASE(2, ine, ine);
- ALU_CASE(2, ilt, ilt);
- //ALU_CASE(2, icsel, icsel);
- ALU_CASE(0, frcp, frcp);
- ALU_CASE(0, frsq, frsqrt);
- ALU_CASE(0, fsqrt, fsqrt);
- ALU_CASE(0, fexp2, fexp2);
- ALU_CASE(0, flog2, flog2);
-
- ALU_CASE(3, f2i32, f2i);
- ALU_CASE(3, f2u32, f2u);
- ALU_CASE(3, i2f32, i2f);
- ALU_CASE(3, u2f32, u2f);
-
- ALU_CASE(0, fsin, fsin);
- ALU_CASE(0, fcos, fcos);
-
- ALU_CASE(2, iand, iand);
- ALU_CASE(2, ior, ior);
- ALU_CASE(2, ixor, ixor);
- ALU_CASE(0, inot, inot);
- ALU_CASE(2, ishl, ishl);
- ALU_CASE(2, ishr, iasr);
- ALU_CASE(2, ushr, ilsr);
- //ALU_CASE(2, ilsr, ilsr);
-
- ALU_CASE(2, ball_fequal4, fball_eq);
- ALU_CASE(2, bany_fnequal4, fbany_neq);
- ALU_CASE(2, ball_iequal4, iball_eq);
- ALU_CASE(2, bany_inequal4, ibany_neq);
+ ALU_CASE(imov, fmov);
+
+ ALU_CASE(feq, feq);
+ ALU_CASE(fne, fne);
+ ALU_CASE(flt, flt);
+ ALU_CASE(ieq, ieq);
+ ALU_CASE(ine, ine);
+ ALU_CASE(ilt, ilt);
+
+ ALU_CASE(frcp, frcp);
+ ALU_CASE(frsq, frsqrt);
+ ALU_CASE(fsqrt, fsqrt);
+ ALU_CASE(fexp2, fexp2);
+ ALU_CASE(flog2, flog2);
+
+ ALU_CASE(f2i32, f2i);
+ ALU_CASE(f2u32, f2u);
+ ALU_CASE(i2f32, i2f);
+ ALU_CASE(u2f32, u2f);
+
+ ALU_CASE(fsin, fsin);
+ ALU_CASE(fcos, fcos);
+
+ ALU_CASE(iand, iand);
+ ALU_CASE(ior, ior);
+ ALU_CASE(ixor, ixor);
+ ALU_CASE(inot, inot);
+ ALU_CASE(ishl, ishl);
+ ALU_CASE(ishr, iasr);
+ ALU_CASE(ushr, ilsr);
+
+ ALU_CASE(ball_fequal4, fball_eq);
+ ALU_CASE(bany_fnequal4, fbany_neq);
+ ALU_CASE(ball_iequal4, iball_eq);
+ ALU_CASE(bany_inequal4, ibany_neq);
/* For greater-or-equal, we use less-or-equal and flip the
* arguments */
case nir_op_ige: {
- components = 2;
op = midgard_alu_op_ile;
/* Swap via temporary */
}
case nir_op_bcsel: {
- components = 2;
op = midgard_alu_op_fcsel;
+ /* csel works as a two-arg in Midgard, since the condition is hardcoded in r31.w */
+ nr_inputs = 2;
+
emit_condition(ctx, &instr->src[0].src, false);
/* The condition is the first argument; move the other
case nir_op_b2f32: {
op = midgard_alu_op_iand;
- components = 0;
break;
}
return;
}
- int _unit = alu_opcode_props[op];
+ /* Fetch unit, quirks, etc information */
+ unsigned opcode_props = alu_opcode_props[op];
+ bool quirk_flipped_r24 = opcode_props & QUIRK_FLIPPED_R24;
/* Initialise fields common between scalar/vector instructions */
midgard_outmod outmod = instr->dest.saturate ? midgard_outmod_sat : midgard_outmod_none;
* needs it, or else we may segfault. */
unsigned src0 = nir_alu_src_index(&instr->src[0]);
- unsigned src1 = components == 2 ? nir_alu_src_index(&instr->src[1]) : SSA_UNUSED_0;
+ unsigned src1 = nr_inputs == 2 ? nir_alu_src_index(&instr->src[1]) : SSA_UNUSED_0;
/* Rather than use the instruction generation helpers, we do it
* ourselves here to avoid the mess */
midgard_instruction ins = {
.type = TAG_ALU_4,
.ssa_args = {
- .src0 = components == 3 || components == 2 || components == 0 ? src0 : SSA_UNUSED_1,
- .src1 = components == 2 ? src1 : components == 1 ? src0 : components == 0 ? SSA_UNUSED_0 : SSA_UNUSED_1,
+ .src0 = quirk_flipped_r24 ? SSA_UNUSED_1 : src0,
+ .src1 = quirk_flipped_r24 ? src0 : src1,
.dest = dest,
- .inline_constant = components == 0
+ .inline_constant = (nr_inputs == 1) && !quirk_flipped_r24
}
};
- nir_alu_src *nirmod0 = NULL;
- nir_alu_src *nirmod1 = NULL;
+ nir_alu_src *nirmods[2] = { NULL };
- if (components == 2) {
- nirmod0 = &instr->src[0];
- nirmod1 = &instr->src[1];
- } else if (components == 1) {
- nirmod1 = &instr->src[0];
- } else if (components == 0) {
- nirmod0 = &instr->src[0];
+ if (nr_inputs == 2) {
+ nirmods[0] = &instr->src[0];
+ nirmods[1] = &instr->src[1];
+ } else if (nr_inputs == 1) {
+ nirmods[quirk_flipped_r24] = &instr->src[0];
+ } else {
+ assert(0);
}
midgard_vector_alu alu = {
/* Writemask only valid for non-SSA NIR */
.mask = expand_writemask((1 << nr_components) - 1),
- .src1 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmod0)),
- .src2 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmod1)),
+ .src1 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmods[0])),
+ .src2 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmods[1])),
};
/* Apply writemask if non-SSA, keeping in mind that we can't write to components that don't exist */
ins.constants[0] = 1.0;
}
- if (_unit == UNIT_VLUT) {
- /* To avoid duplicating the LUTs (we think?), LUT instructions can only
- * operate as if they were scalars. Lower them here by changing the
- * component. */
-
- assert(components == 0);
+ if ((opcode_props & UNITS_ALL) == UNIT_VLUT) {
+ /* To avoid duplicating the lookup tables (probably), true LUT
+ * instructions can only operate as if they were scalars. Lower
+ * them here by changing the component. */
uint8_t original_swizzle[4];
- memcpy(original_swizzle, nirmod0->swizzle, sizeof(nirmod0->swizzle));
+ memcpy(original_swizzle, nirmods[0]->swizzle, sizeof(nirmods[0]->swizzle));
for (int i = 0; i < nr_components; ++i) {
ins.alu.mask = (0x3) << (2 * i); /* Mask the associated component */
for (int j = 0; j < 4; ++j)
- nirmod0->swizzle[j] = original_swizzle[i]; /* Pull from the correct component */
+ nirmods[0]->swizzle[j] = original_swizzle[i]; /* Pull from the correct component */
- ins.alu.src1 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmod0));
+ ins.alu.src1 = vector_alu_srco_unsigned(vector_alu_modifiers(nirmods[0]));
emit_mir_instruction(ctx, ins);
}
} else {
}
}
+#undef ALU_CASE
+
static void
emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
{