nir: intel/compiler: Move ifind_msb lowering to NIR

author Ian Romanick <ian.d.romanick@intel.com>

Mon, 10 Oct 2022 20:35:01 +0000 (13:35 -0700)

committer Marge Bot <emma+marge@anholt.net>

Fri, 10 Mar 2023 15:27:17 +0000 (15:27 +0000)
author Ian Romanick <ian.d.romanick@intel.com>
Mon, 10 Oct 2022 20:35:01 +0000 (13:35 -0700)
committer Marge Bot <emma+marge@anholt.net>
Fri, 10 Mar 2023 15:27:17 +0000 (15:27 +0000)
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h

index 37f8d68..864d4ce 100644 (file)
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3411,6 +3411,8 @@ typedef struct nir_shader_compiler_options {
     bool lower_ifind_msb;
     /** Lowers ifind_msb and ufind_msb to reverse variants */
     bool lower_find_msb_to_reverse;
+   /** Lowers ifind_msb to uclz and logic ops*/
+   bool lower_ifind_msb_to_uclz;
     /** Lowers find_lsb to ufind_msb and logic ops */
     bool lower_find_lsb;
     bool lower_uadd_carry;
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py

index 60957c8..e8ec491 100644 (file)
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -2013,6 +2013,28 @@ optimizations.extend([
       ('ifind_msb_rev', 'value')),
      'options->lower_find_msb_to_reverse'),
  
+   # uclz of an absolute value source almost always does the right thing.
+   # There are a couple problem values:
+   #
+   # * 0x80000000.  Since abs(0x80000000) == 0x80000000, uclz returns 0.
+   #   However, findMSB(int(0x80000000)) == 30.
+   #
+   # * 0xffffffff.  Since abs(0xffffffff) == 1, uclz returns 31.  Section 8.8
+   #   (Integer Functions) of the GLSL 4.50 spec says:
+   #
+   #    For a value of zero or negative one, -1 will be returned.
+   #
+   # * Negative powers of two.  uclz(abs(-(1<<x))) returns x, but
+   #   findMSB(-(1<<x)) should return x-1.
+   #
+   # For all negative number cases, including 0x80000000 and 0xffffffff, the
+   # correct value is obtained from uclz if instead of negating the (already
+   # negative) value the logical-not is used.  A conditional logical-not can
+   # be achieved by (x ^ (x >> 31)).
+   (('ifind_msb', 'value'),
+    ('isub', 31, ('uclz', ('ixor', 'value', ('ishr', 'value', 31)))),
+    'options->lower_ifind_msb_to_uclz'),
+
      (('ufind_msb', 'value'),
       ('bcsel', ('ige', ('ufind_msb_rev', 'value'), 0),
        ('isub', 31, ('ufind_msb_rev', 'value')),
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c

index 7ecf630..879f09c 100644 (file)
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -189,6 +189,7 @@ brw_compiler_create(void *mem_ctx, const struct intel_device_info *devinfo)
        nir_options->lower_rotate = devinfo->ver < 11;
        nir_options->lower_bitfield_reverse = devinfo->ver < 7;
        nir_options->lower_find_lsb = devinfo->ver < 7;
+      nir_options->lower_ifind_msb_to_uclz = devinfo->ver < 7;
        nir_options->has_iadd3 = devinfo->verx10 >= 125;
  
        nir_options->has_sdot_4x8 = devinfo->ver >= 12;
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp

index 36bbef7..0215dcd 100644 (file)
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -613,38 +613,11 @@ fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
  static void
  emit_find_msb_using_lzd(const fs_builder &bld,
                          const fs_reg &result,
-                        const fs_reg &src,
-                        bool is_signed)
+                        const fs_reg &src)
  {
     fs_inst *inst;
     fs_reg temp = src;
  
-   if (is_signed) {
-      /* LZD of an absolute value source almost always does the right
-       * thing.  There are two problem values:
-       *
-       * * 0x80000000.  Since abs(0x80000000) == 0x80000000, LZD returns
-       *   0.  However, findMSB(int(0x80000000)) == 30.
-       *
-       * * 0xffffffff.  Since abs(0xffffffff) == 1, LZD returns
-       *   31.  Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
-       *
-       *    For a value of zero or negative one, -1 will be returned.
-       *
-       * * Negative powers of two.  LZD(abs(-(1<<x))) returns x, but
-       *   findMSB(-(1<<x)) should return x-1.
-       *
-       * For all negative number cases, including 0x80000000 and
-       * 0xffffffff, the correct value is obtained from LZD if instead of
-       * negating the (already negative) value the logical-not is used.  A
-       * conditional logical-not can be achieved in two instructions.
-       */
-      temp = bld.vgrf(BRW_REGISTER_TYPE_D);
-
-      bld.ASR(temp, src, brw_imm_d(31));
-      bld.XOR(temp, temp, src);
-   }
-
     bld.LZD(retype(result, BRW_REGISTER_TYPE_UD),
             retype(temp, BRW_REGISTER_TYPE_UD));
  
@@ -1704,7 +1677,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
  
     case nir_op_ufind_msb: {
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      emit_find_msb_using_lzd(bld, result, op[0], false);
+      emit_find_msb_using_lzd(bld, result, op[0]);
        break;
     }
  
@@ -1715,23 +1688,20 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr,
  
     case nir_op_ifind_msb: {
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
+      assert(devinfo->ver >= 7);
  
-      if (devinfo->ver < 7) {
-         emit_find_msb_using_lzd(bld, result, op[0], true);
-      } else {
-         bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
+      bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
  
-         /* FBH counts from the MSB side, while GLSL's findMSB() wants the
-          * count from the LSB side. If FBH didn't return an error
-          * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB
-          * count into an LSB count.
-          */
-         bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
+      /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
+       * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
+       * subtract the result from 31 to convert the MSB count into an LSB
+       * count.
+       */
+      bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
  
-         inst = bld.ADD(result, result, brw_imm_d(31));
-         inst->predicate = BRW_PREDICATE_NORMAL;
-         inst->src[0].negate = true;
-      }
+      inst = bld.ADD(result, result, brw_imm_d(31));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      inst->src[0].negate = true;
        break;
     }
  
diff --git a/src/intel/compiler/brw_vec4_nir.cpp b/src/intel/compiler/brw_vec4_nir.cpp

index 5f40288..a490f01 100644 (file)
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -832,38 +832,11 @@ vec4_visitor::optimize_predicate(nir_alu_instr *instr,
  static void
  emit_find_msb_using_lzd(const vec4_builder &bld,
                          const dst_reg &dst,
-                        const src_reg &src,
-                        bool is_signed)
+                        const src_reg &src)
  {
     vec4_instruction *inst;
     src_reg temp = src;
  
-   if (is_signed) {
-      /* LZD of an absolute value source almost always does the right
-       * thing.  There are two problem values:
-       *
-       * * 0x80000000.  Since abs(0x80000000) == 0x80000000, LZD returns
-       *   0.  However, findMSB(int(0x80000000)) == 30.
-       *
-       * * 0xffffffff.  Since abs(0xffffffff) == 1, LZD returns
-       *   31.  Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
-       *
-       *    For a value of zero or negative one, -1 will be returned.
-       *
-       * * Negative powers of two.  LZD(abs(-(1<<x))) returns x, but
-       *   findMSB(-(1<<x)) should return x-1.
-       *
-       * For all negative number cases, including 0x80000000 and
-       * 0xffffffff, the correct value is obtained from LZD if instead of
-       * negating the (already negative) value the logical-not is used.  A
-       * conditional logical-not can be achieved in two instructions.
-       */
-      temp = src_reg(bld.vgrf(BRW_REGISTER_TYPE_D));
-
-      bld.ASR(dst_reg(temp), src, brw_imm_d(31));
-      bld.XOR(dst_reg(temp), temp, src);
-   }
-
     bld.LZD(retype(dst, BRW_REGISTER_TYPE_UD),
             retype(temp, BRW_REGISTER_TYPE_UD));
  
@@ -1661,30 +1634,28 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
  
     case nir_op_ufind_msb:
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
-      emit_find_msb_using_lzd(vec4_builder(this).at_end(), dst, op[0], false);
+      emit_find_msb_using_lzd(vec4_builder(this).at_end(), dst, op[0]);
        break;
  
     case nir_op_ifind_msb: {
        assert(nir_dest_bit_size(instr->dest.dest) < 64);
+      assert(devinfo->ver >= 7);
+
        vec4_builder bld = vec4_builder(this).at_end();
        src_reg src(dst);
  
-      if (devinfo->ver < 7) {
-         emit_find_msb_using_lzd(bld, dst, op[0], true);
-      } else {
-         emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
+      emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
  
-         /* FBH counts from the MSB side, while GLSL's findMSB() wants the
-          * count from the LSB side. If FBH didn't return an error
-          * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB
-          * count into an LSB count.
-          */
-         bld.CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
+      /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
+       * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
+       * subtract the result from 31 to convert the MSB count into an LSB
+       * count.
+       */
+      bld.CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
  
-         inst = bld.ADD(dst, src, brw_imm_d(31));
-         inst->predicate = BRW_PREDICATE_NORMAL;
-         inst->src[0].negate = true;
-      }
+      inst = bld.ADD(dst, src, brw_imm_d(31));
+      inst->predicate = BRW_PREDICATE_NORMAL;
+      inst->src[0].negate = true;
        break;
     }
author	Ian Romanick <ian.d.romanick@intel.com>
	Mon, 10 Oct 2022 20:35:01 +0000 (13:35 -0700)
committer	Marge Bot <emma+marge@anholt.net>
	Fri, 10 Mar 2023 15:27:17 +0000 (15:27 +0000)
src/compiler/nir/nir.h		patch \| blob \| history
src/compiler/nir/nir_opt_algebraic.py		patch \| blob \| history
src/intel/compiler/brw_compiler.c		patch \| blob \| history
src/intel/compiler/brw_fs_nir.cpp		patch \| blob \| history
src/intel/compiler/brw_vec4_nir.cpp		patch \| blob \| history