radeonsi: lower mul_high
authorKarol Herbst <kherbst@redhat.com>
Tue, 15 Nov 2022 23:34:29 +0000 (00:34 +0100)
committerKarol Herbst <kherbst@redhat.com>
Mon, 24 Apr 2023 11:17:00 +0000 (13:17 +0200)
Fixes `integer_mad_hi` and `integer_mul_hi` `integer_ops` tests

Cc: mesa-stable
Signed-off-by: Karol Herbst <kherbst@redhat.com>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22597>

src/gallium/drivers/radeonsi/si_get.c
src/gallium/drivers/radeonsi/si_shader_nir.c

index 77799f5..26c5f2a 100644 (file)
@@ -1248,7 +1248,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
       .lower_fisnormal = true,
       .lower_rotate = true,
       .lower_to_scalar = true,
-      .lower_int64_options = nir_lower_imul_2x32_64,
+      .lower_int64_options = nir_lower_imul_2x32_64 | nir_lower_imul_high64,
       .has_sdot_4x8 = sscreen->info.has_accelerated_dot_product,
       .has_sudot_4x8 = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level >= GFX11,
       .has_udot_4x8 = sscreen->info.has_accelerated_dot_product,
index b655b95..ea6c0c9 100644 (file)
@@ -63,6 +63,26 @@ static uint8_t si_vectorize_callback(const nir_instr *instr, const void *data)
    return 1;
 }
 
+static unsigned si_lower_bit_size_callback(const nir_instr *instr, void *data)
+{
+   if (instr->type != nir_instr_type_alu)
+      return 0;
+
+   nir_alu_instr *alu = nir_instr_as_alu(instr);
+
+   switch (alu->op) {
+   case nir_op_imul_high:
+   case nir_op_umul_high:
+      if (nir_dest_bit_size(alu->dest.dest) < 32)
+         return 32;
+      break;
+   default:
+      break;
+   }
+
+   return 0;
+}
+
 void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool first)
 {
    bool progress;
@@ -105,6 +125,7 @@ void si_nir_opts(struct si_screen *sscreen, struct nir_shader *nir, bool first)
       NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);
 
       /* Needed for algebraic lowering */
+      NIR_PASS(progress, nir, nir_lower_bit_size, si_lower_bit_size_callback, NULL);
       NIR_PASS(progress, nir, nir_opt_algebraic);
       NIR_PASS(progress, nir, nir_opt_constant_folding);