From 7472bb4badbf3bf196c1c09adbe16e70b048ab2d Mon Sep 17 00:00:00 2001
From: Emma Anholt <emma@anholt.net>
Date: Tue, 10 May 2022 10:31:07 -0700
Subject: [PATCH] glsl,nir: Move i/umulExtended lowering to NIR.

NIR already has the necessary lowering, and the GLSL lowering violates
GLSL IR validation rules.  Once quadop lowering was turned off, the IR
validation at the end of the compile path on DEBUG builds caught the
problem.

In order to move the lowering to NIR, though, we need to make sure that
drivers supporting these functions actually have the lowering flag set.

xfails added for t860, where apparently this tickles a variety of existing
64-bit bugs in the backend.

Fixes: #6461
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Reviewed-by: Mykhailo Skorokhodov <mykhailo.skorokhodov@globallogic.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16437>
---
 src/compiler/glsl/ir_optimization.h                |  1 -
 src/compiler/glsl/lower_instructions.cpp           | 70 ----------------------
 src/gallium/auxiliary/nir/nir_to_tgsi.c            |  1 +
 src/gallium/drivers/llvmpipe/lp_screen.c           |  1 +
 .../drivers/nouveau/codegen/nv50_ir_from_nir.cpp   |  2 +-
 src/gallium/drivers/radeonsi/si_get.c              |  1 +
 src/gallium/drivers/softpipe/sp_screen.c           |  1 +
 src/gallium/drivers/svga/svga_screen.c             |  1 +
 src/gallium/drivers/v3d/v3d_screen.c               |  1 +
 src/mesa/state_tracker/st_glsl_to_ir.cpp           |  1 -
 src/panfrost/ci/panfrost-t860-fails.txt            |  5 ++
 src/panfrost/midgard/midgard_compile.h             |  3 +
 12 files changed, 15 insertions(+), 73 deletions(-)

diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h
index 7fc8e03..42b5be5 100644
--- a/src/compiler/glsl/ir_optimization.h
+++ b/src/compiler/glsl/ir_optimization.h
@@ -55,7 +55,6 @@ struct gl_shader_program;
 #define DDIV_TO_MUL_RCP           0x100000
 #define DIV_TO_MUL_RCP            (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)
 #define SQRT_TO_ABS_SQRT          0x200000
-#define MUL64_TO_MUL_AND_MUL_HIGH 0x400000
 
 /* Operations for lower_64bit_integer_instructions() */
 #define DIV64                     (1U << 0)
diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp
index 8e6ed3c..f852762 100644
--- a/src/compiler/glsl/lower_instructions.cpp
+++ b/src/compiler/glsl/lower_instructions.cpp
@@ -157,7 +157,6 @@ private:
    void find_msb_to_float_cast(ir_expression *ir);
    void imul_high_to_mul(ir_expression *ir);
    void sqrt_to_abs_sqrt(ir_expression *ir);
-   void mul64_to_mul_and_mul_high(ir_expression *ir);
 
    ir_expression *_carry(operand a, operand b);
 
@@ -1615,66 +1614,6 @@ lower_instructions_visitor::sqrt_to_abs_sqrt(ir_expression *ir)
    this->progress = true;
 }
 
-void
-lower_instructions_visitor::mul64_to_mul_and_mul_high(ir_expression *ir)
-{
-   /* Lower 32x32-> 64 to
-    *    msb = imul_high(x_lo, y_lo)
-    *    lsb = mul(x_lo, y_lo)
-    */
-   const unsigned elements = ir->operands[0]->type->vector_elements;
-
-   const ir_expression_operation operation =
-      ir->type->base_type == GLSL_TYPE_UINT64 ? ir_unop_pack_uint_2x32
-                                              : ir_unop_pack_int_2x32;
-
-   const glsl_type *var_type = ir->type->base_type == GLSL_TYPE_UINT64
-                               ? glsl_type::uvec(elements)
-                               : glsl_type::ivec(elements);
-
-   const glsl_type *ret_type = ir->type->base_type == GLSL_TYPE_UINT64
-                               ? glsl_type::uvec2_type
-                               : glsl_type::ivec2_type;
-
-   ir_instruction &i = *base_ir;
-
-   ir_variable *msb =
-      new(ir) ir_variable(var_type, "msb", ir_var_temporary);
-   ir_variable *lsb =
-      new(ir) ir_variable(var_type, "lsb", ir_var_temporary);
-   ir_variable *x =
-      new(ir) ir_variable(var_type, "x", ir_var_temporary);
-   ir_variable *y =
-      new(ir) ir_variable(var_type, "y", ir_var_temporary);
-
-   i.insert_before(x);
-   i.insert_before(assign(x, ir->operands[0]));
-   i.insert_before(y);
-   i.insert_before(assign(y, ir->operands[1]));
-   i.insert_before(msb);
-   i.insert_before(lsb);
-
-   i.insert_before(assign(msb, imul_high(x, y)));
-   i.insert_before(assign(lsb, mul(x, y)));
-
-   ir_rvalue *result[4] = {NULL};
-   for (unsigned elem = 0; elem < elements; elem++) {
-      ir_rvalue *val = new(ir) ir_expression(ir_quadop_vector, ret_type,
-                                             swizzle(lsb, elem, 1),
-                                             swizzle(msb, elem, 1), NULL, NULL);
-      result[elem] = expr(operation, val);
-   }
-
-   ir->operation = ir_quadop_vector;
-   ir->init_num_operands();
-   ir->operands[0] = result[0];
-   ir->operands[1] = result[1];
-   ir->operands[2] = result[2];
-   ir->operands[3] = result[3];
-
-   this->progress = true;
-}
-
 ir_visitor_status
 lower_instructions_visitor::visit_leave(ir_expression *ir)
 {
@@ -1802,15 +1741,6 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
          imul_high_to_mul(ir);
       break;
 
-   case ir_binop_mul:
-      if (lowering(MUL64_TO_MUL_AND_MUL_HIGH) &&
-          (ir->type->base_type == GLSL_TYPE_INT64 ||
-           ir->type->base_type == GLSL_TYPE_UINT64) &&
-          (ir->operands[0]->type->base_type == GLSL_TYPE_INT ||
-           ir->operands[1]->type->base_type == GLSL_TYPE_UINT))
-         mul64_to_mul_and_mul_high(ir);
-      break;
-
    case ir_unop_rsq:
    case ir_unop_sqrt:
       if (lowering(SQRT_TO_ABS_SQRT))
diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c
index abb9cb8..2993fc2 100644
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@@ -3906,6 +3906,7 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
    .lower_rotate = true,
    .lower_uniforms_to_ubo = true,
    .lower_vector_cmp = true,
+   .lower_int64_options = nir_lower_imul_2x32_64,
    .use_interpolated_input_intrinsics = true,
 };
 
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 94eda66..f653f1e 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -619,6 +619,7 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
    .lower_usub_borrow = true,
    .lower_mul_2x32_64 = true,
    .lower_ifind_msb = true,
+   .lower_int64_options = nir_lower_imul_2x32_64,
    .max_unroll_iterations = 32,
    .use_interpolated_input_intrinsics = true,
    .lower_to_scalar = true,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index ebc5c6f..931ee85 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -3393,7 +3393,7 @@ nvir_nir_shader_compiler_options(int chipset)
       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) |
       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) |
       ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) |
-      ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) |
+      nir_lower_imul_2x32_64 |
       ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) |
       nir_lower_ufind_msb64
    );
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 84d6e67..06ea8c5 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -1081,6 +1081,7 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
       .lower_insert_word = true,
       .lower_rotate = true,
       .lower_to_scalar = true,
+      .lower_int64_options = nir_lower_imul_2x32_64,
       .has_sdot_4x8 = sscreen->info.has_accelerated_dot_product,
       .has_udot_4x8 = sscreen->info.has_accelerated_dot_product,
       .has_dot_2x16 = sscreen->info.has_accelerated_dot_product,
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index 7d84088..e59ec44 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -88,6 +88,7 @@ static const nir_shader_compiler_options sp_compiler_options = {
    .lower_rotate = true,
    .lower_uniforms_to_ubo = true,
    .lower_vector_cmp = true,
+   .lower_int64_options = nir_lower_imul_2x32_64,
    .max_unroll_iterations = 32,
    .use_interpolated_input_intrinsics = true,
 };
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
index 58b96ec..5e88002 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -762,6 +762,7 @@ vgpu10_get_shader_param(struct pipe_screen *screen,
    .lower_extract_word = true,                                                \
    .lower_insert_byte = true,                                                 \
    .lower_insert_word = true,                                                 \
+   .lower_int64_options = nir_lower_imul_2x32_64,                             \
    .lower_fdph = true,                                                        \
    .lower_flrp64 = true,                                                      \
    .lower_rotate = true,                                                      \
diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c
index 7dc1b33..b0dca2a 100644
--- a/src/gallium/drivers/v3d/v3d_screen.c
+++ b/src/gallium/drivers/v3d/v3d_screen.c
@@ -729,6 +729,7 @@ static const nir_shader_compiler_options v3d_nir_options = {
         .lower_wpos_pntc = true,
         .lower_rotate = true,
         .lower_to_scalar = true,
+        .lower_int64_options = nir_lower_imul_2x32_64,
         .has_fsub = true,
         .has_isub = true,
         .divergence_analysis_options =
diff --git a/src/mesa/state_tracker/st_glsl_to_ir.cpp b/src/mesa/state_tracker/st_glsl_to_ir.cpp
index 7692260..d8333bb 100644
--- a/src/mesa/state_tracker/st_glsl_to_ir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_ir.cpp
@@ -106,7 +106,6 @@ link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
                          FDIV_TO_MUL_RCP |
                          EXP_TO_EXP2 |
                          LOG_TO_LOG2 |
-                         MUL64_TO_MUL_AND_MUL_HIGH |
                          (have_ldexp ? 0 : LDEXP_TO_ARITH) |
                          (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
                          CARRY_TO_ARITH |
diff --git a/src/panfrost/ci/panfrost-t860-fails.txt b/src/panfrost/ci/panfrost-t860-fails.txt
index f679b2d..c05cbd1 100644
--- a/src/panfrost/ci/panfrost-t860-fails.txt
+++ b/src/panfrost/ci/panfrost-t860-fails.txt
@@ -87,3 +87,8 @@ dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_uint_2d,Fail
 dEQP-GLES31.functional.texture.multisample.samples_3.use_texture_uint_2d_array,Fail
 dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_int_2d,Fail
 dEQP-GLES31.functional.texture.multisample.samples_4.use_texture_int_2d_array,Fail
+
+dEQP-GLES31.functional.shaders.builtin_functions.integer.imulextended.ivec3_highp_compute,Fail
+dEQP-GLES31.functional.shaders.builtin_functions.integer.umulextended.uvec3_highp_compute,Fail
+dEQP-GLES31.functional.shaders.builtin_functions.integer.imulextended.ivec4_highp_vertex,Fail
+dEQP-GLES31.functional.shaders.builtin_functions.integer.umulextended.uvec4_highp_vertex,Fail
diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h
index 3c5193f..5ab520a 100644
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@@ -83,6 +83,9 @@ static const nir_shader_compiler_options midgard_nir_options = {
         .lower_unpack_unorm_4x8 = true,
         .lower_unpack_snorm_4x8 = true,
         .lower_pack_split = true,
+        .lower_pack_64_2x32_split = true,
+        .lower_unpack_64_2x32_split = true,
+        .lower_int64_options = nir_lower_imul_2x32_64,
 
         .lower_doubles_options = nir_lower_dmod,
 
-- 
2.7.4