.lower_fisnormal = true,
.lower_rotate = true,
.lower_to_scalar = true,
- .lower_int64_options = nir_lower_imul_2x32_64,
+ .lower_int64_options = nir_lower_imul_2x32_64 | nir_lower_imul_high64,
.has_sdot_4x8 = sscreen->info.has_accelerated_dot_product,
.has_sudot_4x8 = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level >= GFX11,
.has_udot_4x8 = sscreen->info.has_accelerated_dot_product,
return 1;
}
+static unsigned si_lower_bit_size_callback(const nir_instr *instr, void *data)
+{
+ if (instr->type != nir_instr_type_alu)
+ return 0;
+
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+ switch (alu->op) {
+ case nir_op_imul_high:
+ case nir_op_umul_high:
+ if (nir_dest_bit_size(alu->dest.dest) < 32)
+ return 32;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool first)
{
bool progress;
NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
/* Needed for algebraic lowering */
+ NIR_PASS(progress, nir, nir_lower_bit_size, si_lower_bit_size_callback, NULL);
NIR_PASS(progress, nir, nir_opt_algebraic);
NIR_PASS(progress, nir, nir_opt_constant_folding);