nir: split fuse_ffma into fuse_ffma16/32/64

author Marek Olšák <marek.olsak@amd.com>

Thu, 17 Sep 2020 01:48:18 +0000 (21:48 -0400)

committer Marge Bot <eric+marge@anholt.net>

Thu, 24 Sep 2020 12:29:11 +0000 (12:29 +0000)
author Marek Olšák <marek.olsak@amd.com>
Thu, 17 Sep 2020 01:48:18 +0000 (21:48 -0400)
committer Marge Bot <eric+marge@anholt.net>
Thu, 24 Sep 2020 12:29:11 +0000 (12:29 +0000)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h

index d3ba1bf..d048cb3 100644 (file)
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3056,7 +3056,9 @@ typedef enum {
  typedef struct nir_shader_compiler_options {
     bool lower_fdiv;
     bool lower_ffma;
-   bool fuse_ffma;
+   bool fuse_ffma16;
+   bool fuse_ffma32;
+   bool fuse_ffma64;
     bool lower_flrp16;
     bool lower_flrp32;
     /** Lowers flrp when it does not support doubles */
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py

index 39c07ce..a03cc54 100644 (file)
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -195,7 +195,9 @@ optimizations.extend([
     (('fceil', a), ('fneg', ('ffloor', ('fneg', a))), 'options->lower_fceil'),
     (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
     # Always lower inexact ffma, because it will be fused back by late optimizations (nir_opt_algebraic_late).
-   (('~ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma'),
+   (('~ffma@16', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma16'),
+   (('~ffma@32', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma32'),
+   (('~ffma@64', a, b, c), ('fadd', ('fmul', a, b), c), 'options->fuse_ffma64'),
  
     (('~fmul', ('fadd', ('iand', ('ineg', ('b2i', 'a@bool')), ('fmul', b, c)), '#d'), '#e'),
      ('bcsel', a, ('fmul', ('fadd', ('fmul', b, c), d), e), ('fmul', d, e))),
@@ -2028,7 +2030,9 @@ late_optimizations = [
     (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
     (('ineg', a), ('isub', 0, a), 'options->lower_negate'),
     (('iabs', a), ('imax', a, ('ineg', a)), 'options->lower_iabs'),
-   (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma'),
+   (('~fadd@16', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma16'),
+   (('~fadd@32', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma32'),
+   (('~fadd@64', ('fmul', a, b), c), ('ffma', a, b, c), 'options->fuse_ffma64'),
  
     # These are duplicated from the main optimizations table.  The late
     # patterns that rearrange expressions like x - .5 < 0 to x < .5 can create
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c

index 308d425..f33f048 100644 (file)
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -47,7 +47,9 @@ static const nir_shader_compiler_options options = {
                 .lower_usub_borrow = true,
                 .lower_mul_high = true,
                 .lower_mul_2x32_64 = true,
-               .fuse_ffma = true,
+               .fuse_ffma16 = true,
+               .fuse_ffma32 = true,
+               .fuse_ffma64 = true,
                 .vertex_id_zero_based = true,
                 .lower_extract_byte = true,
                 .lower_extract_word = true,
@@ -97,7 +99,9 @@ static const nir_shader_compiler_options options_a6xx = {
                 .lower_usub_borrow = true,
                 .lower_mul_high = true,
                 .lower_mul_2x32_64 = true,
-               .fuse_ffma = true,
+               .fuse_ffma16 = true,
+               .fuse_ffma32 = true,
+               .fuse_ffma64 = true,
                 .vertex_id_zero_based = false,
                 .lower_extract_byte = true,
                 .lower_extract_word = true,
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c

index 1637eaa..3cc7dce 100644 (file)
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -1004,7 +1004,9 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
        .lower_fpow = true,
        .lower_sub = true,
        .lower_ftrunc = true,
-      .fuse_ffma = true,
+      .fuse_ffma16 = true,
+      .fuse_ffma32 = true,
+      .fuse_ffma64 = true,
        .lower_bitops = true,
        .lower_all_io_to_temps = true,
        .vertex_id_zero_based = true,
diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c

index 58e99f0..6cf95d5 100644 (file)
--- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
@@ -35,7 +35,9 @@ static const nir_shader_compiler_options options = {
         .lower_fmod = true,
         .lower_fdiv = true,
         .lower_fceil = true,
-       .fuse_ffma = true,
+       .fuse_ffma16 = true,
+       .fuse_ffma32 = true,
+       .fuse_ffma64 = true,
         /* .fdot_replicates = true, it is replicated, but it makes things worse */
         .lower_all_io_to_temps = true,
         .vertex_id_zero_based = true, /* its not implemented anyway */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp

index c466d24..c5e5477 100644 (file)
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -3207,7 +3207,9 @@ nvir_nir_shader_compiler_options(int chipset)
     nir_shader_compiler_options op = {};
     op.lower_fdiv = (chipset >= NVISA_GV100_CHIPSET);
     op.lower_ffma = false;
-   op.fuse_ffma = false; /* nir doesn't track mad vs fma */
+   op.fuse_ffma16 = false; /* nir doesn't track mad vs fma */
+   op.fuse_ffma32 = false; /* nir doesn't track mad vs fma */
+   op.fuse_ffma64 = false; /* nir doesn't track mad vs fma */
     op.lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET);
     op.lower_flrp32 = true;
     op.lower_flrp64 = true;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c

index 06f73ab..29201ee 100644 (file)
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -923,7 +923,9 @@ int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space)
  }
  
  static const nir_shader_compiler_options nir_options = {
-   .fuse_ffma = false, /* nir doesn't track mad vs fma */
+   .fuse_ffma16 = false, /* nir doesn't track mad vs fma */
+   .fuse_ffma32 = false, /* nir doesn't track mad vs fma */
+   .fuse_ffma64 = false, /* nir doesn't track mad vs fma */
     .lower_flrp32 = true,
     .lower_flrp64 = true,
     .lower_fpow = false,
diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c

index b4dffa7..eab7ce9 100644 (file)
--- a/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/src/gallium/drivers/r600/r600_pipe_common.c
@@ -1179,7 +1179,9 @@ struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
  }
  
  const struct nir_shader_compiler_options r600_nir_fs_options = {
-       .fuse_ffma = true,
+       .fuse_ffma16 = true,
+       .fuse_ffma32 = true,
+       .fuse_ffma64 = true,
         .lower_scmp = true,
         .lower_flrp32 = true,
         .lower_flrp64 = true,
@@ -1203,7 +1205,9 @@ const struct nir_shader_compiler_options r600_nir_fs_options = {
  };
  
  const struct nir_shader_compiler_options r600_nir_options = {
-       .fuse_ffma = true,
+       .fuse_ffma16 = true,
+       .fuse_ffma32 = true,
+       .fuse_ffma64 = true,
         .lower_scmp = true,
         .lower_flrp32 = true,
         .lower_flrp64 = true,
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c

index a511bab..1f19fdd 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -946,7 +946,9 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
         * for gfx10.3 on gfx10.
         */
        .lower_ffma = sscreen->info.chip_class <= GFX9,
-      .fuse_ffma = sscreen->info.chip_class >= GFX10,
+      .fuse_ffma16 = sscreen->info.chip_class >= GFX10,
+      .fuse_ffma32 = sscreen->info.chip_class >= GFX10,
+      .fuse_ffma64 = sscreen->info.chip_class >= GFX10,
        .lower_fmod = true,
        .lower_pack_snorm_4x8 = true,
        .lower_pack_unorm_4x8 = true,
diff --git a/src/panfrost/bifrost/bifrost_compile.h b/src/panfrost/bifrost/bifrost_compile.h

index cc28b42..15b9078 100644 (file)
--- a/src/panfrost/bifrost/bifrost_compile.h
+++ b/src/panfrost/bifrost/bifrost_compile.h
@@ -69,7 +69,9 @@ static const nir_shader_compiler_options bifrost_nir_options = {
  
          .lower_bitfield_extract_to_shifts = true,
          .vectorize_io = true,
-        .fuse_ffma = true,
+       .fuse_ffma16 = true,
+       .fuse_ffma32 = true,
+       .fuse_ffma64 = true,
          .use_interpolated_input_intrinsics = true
  };
author	Marek Olšák <marek.olsak@amd.com>
	Thu, 17 Sep 2020 01:48:18 +0000 (21:48 -0400)
committer	Marge Bot <eric+marge@anholt.net>
	Thu, 24 Sep 2020 12:29:11 +0000 (12:29 +0000)
src/compiler/nir/nir.h		patch \| blob \| history
src/compiler/nir/nir_opt_algebraic.py		patch \| blob \| history
src/freedreno/ir3/ir3_nir.c		patch \| blob \| history
src/gallium/drivers/etnaviv/etnaviv_screen.c		patch \| blob \| history
src/gallium/drivers/freedreno/a2xx/ir2_nir.c		patch \| blob \| history
src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp		patch \| blob \| history
src/gallium/drivers/nouveau/nv50/nv50_screen.c		patch \| blob \| history
src/gallium/drivers/r600/r600_pipe_common.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_get.c		patch \| blob \| history
src/panfrost/bifrost/bifrost_compile.h		patch \| blob \| history