bool lower_ifind_msb;
/** Lowers ifind_msb and ufind_msb to reverse variants */
bool lower_find_msb_to_reverse;
+ /** Lowers ifind_msb to uclz and logic ops*/
+ bool lower_ifind_msb_to_uclz;
/** Lowers find_lsb to ufind_msb and logic ops */
bool lower_find_lsb;
bool lower_uadd_carry;
('ifind_msb_rev', 'value')),
'options->lower_find_msb_to_reverse'),
+ # uclz of an absolute value source almost always does the right thing.
+ # There are a couple problem values:
+ #
+ # * 0x80000000. Since abs(0x80000000) == 0x80000000, uclz returns 0.
+ # However, findMSB(int(0x80000000)) == 30.
+ #
+ # * 0xffffffff. Since abs(0xffffffff) == 1, uclz returns 31. Section 8.8
+ # (Integer Functions) of the GLSL 4.50 spec says:
+ #
+ # For a value of zero or negative one, -1 will be returned.
+ #
+ # * Negative powers of two. uclz(abs(-(1<<x))) returns x, but
+ # findMSB(-(1<<x)) should return x-1.
+ #
+ # For all negative number cases, including 0x80000000 and 0xffffffff, the
+ # correct value is obtained from uclz if instead of negating the (already
+ # negative) value the logical-not is used. A conditional logical-not can
+ # be achieved by (x ^ (x >> 31)).
+ (('ifind_msb', 'value'),
+ ('isub', 31, ('uclz', ('ixor', 'value', ('ishr', 'value', 31)))),
+ 'options->lower_ifind_msb_to_uclz'),
+
(('ufind_msb', 'value'),
('bcsel', ('ige', ('ufind_msb_rev', 'value'), 0),
('isub', 31, ('ufind_msb_rev', 'value')),
nir_options->lower_rotate = devinfo->ver < 11;
nir_options->lower_bitfield_reverse = devinfo->ver < 7;
nir_options->lower_find_lsb = devinfo->ver < 7;
+ nir_options->lower_ifind_msb_to_uclz = devinfo->ver < 7;
nir_options->has_iadd3 = devinfo->verx10 >= 125;
nir_options->has_sdot_4x8 = devinfo->ver >= 12;
static void
emit_find_msb_using_lzd(const fs_builder &bld,
const fs_reg &result,
- const fs_reg &src,
- bool is_signed)
+ const fs_reg &src)
{
fs_inst *inst;
fs_reg temp = src;
- if (is_signed) {
- /* LZD of an absolute value source almost always does the right
- * thing. There are two problem values:
- *
- * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns
- * 0. However, findMSB(int(0x80000000)) == 30.
- *
- * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns
- * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
- *
- * For a value of zero or negative one, -1 will be returned.
- *
- * * Negative powers of two. LZD(abs(-(1<<x))) returns x, but
- * findMSB(-(1<<x)) should return x-1.
- *
- * For all negative number cases, including 0x80000000 and
- * 0xffffffff, the correct value is obtained from LZD if instead of
- * negating the (already negative) value the logical-not is used. A
- * conditional logical-not can be achieved in two instructions.
- */
- temp = bld.vgrf(BRW_REGISTER_TYPE_D);
-
- bld.ASR(temp, src, brw_imm_d(31));
- bld.XOR(temp, temp, src);
- }
-
bld.LZD(retype(result, BRW_REGISTER_TYPE_UD),
retype(temp, BRW_REGISTER_TYPE_UD));
case nir_op_ufind_msb: {
assert(nir_dest_bit_size(instr->dest.dest) < 64);
- emit_find_msb_using_lzd(bld, result, op[0], false);
+ emit_find_msb_using_lzd(bld, result, op[0]);
break;
}
case nir_op_ifind_msb: {
assert(nir_dest_bit_size(instr->dest.dest) < 64);
+ assert(devinfo->ver >= 7);
- if (devinfo->ver < 7) {
- emit_find_msb_using_lzd(bld, result, op[0], true);
- } else {
- bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
+ bld.FBH(retype(result, BRW_REGISTER_TYPE_UD), op[0]);
- /* FBH counts from the MSB side, while GLSL's findMSB() wants the
- * count from the LSB side. If FBH didn't return an error
- * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB
- * count into an LSB count.
- */
- bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
+ /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
+ * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
+ * subtract the result from 31 to convert the MSB count into an LSB
+ * count.
+ */
+ bld.CMP(bld.null_reg_d(), result, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
- inst = bld.ADD(result, result, brw_imm_d(31));
- inst->predicate = BRW_PREDICATE_NORMAL;
- inst->src[0].negate = true;
- }
+ inst = bld.ADD(result, result, brw_imm_d(31));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->src[0].negate = true;
break;
}
static void
emit_find_msb_using_lzd(const vec4_builder &bld,
const dst_reg &dst,
- const src_reg &src,
- bool is_signed)
+ const src_reg &src)
{
vec4_instruction *inst;
src_reg temp = src;
- if (is_signed) {
- /* LZD of an absolute value source almost always does the right
- * thing. There are two problem values:
- *
- * * 0x80000000. Since abs(0x80000000) == 0x80000000, LZD returns
- * 0. However, findMSB(int(0x80000000)) == 30.
- *
- * * 0xffffffff. Since abs(0xffffffff) == 1, LZD returns
- * 31. Section 8.8 (Integer Functions) of the GLSL 4.50 spec says:
- *
- * For a value of zero or negative one, -1 will be returned.
- *
- * * Negative powers of two. LZD(abs(-(1<<x))) returns x, but
- * findMSB(-(1<<x)) should return x-1.
- *
- * For all negative number cases, including 0x80000000 and
- * 0xffffffff, the correct value is obtained from LZD if instead of
- * negating the (already negative) value the logical-not is used. A
- * conditional logical-not can be achieved in two instructions.
- */
- temp = src_reg(bld.vgrf(BRW_REGISTER_TYPE_D));
-
- bld.ASR(dst_reg(temp), src, brw_imm_d(31));
- bld.XOR(dst_reg(temp), temp, src);
- }
-
bld.LZD(retype(dst, BRW_REGISTER_TYPE_UD),
retype(temp, BRW_REGISTER_TYPE_UD));
case nir_op_ufind_msb:
assert(nir_dest_bit_size(instr->dest.dest) < 64);
- emit_find_msb_using_lzd(vec4_builder(this).at_end(), dst, op[0], false);
+ emit_find_msb_using_lzd(vec4_builder(this).at_end(), dst, op[0]);
break;
case nir_op_ifind_msb: {
assert(nir_dest_bit_size(instr->dest.dest) < 64);
+ assert(devinfo->ver >= 7);
+
vec4_builder bld = vec4_builder(this).at_end();
src_reg src(dst);
- if (devinfo->ver < 7) {
- emit_find_msb_using_lzd(bld, dst, op[0], true);
- } else {
- emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
+ emit(FBH(retype(dst, BRW_REGISTER_TYPE_UD), op[0]));
- /* FBH counts from the MSB side, while GLSL's findMSB() wants the
- * count from the LSB side. If FBH didn't return an error
- * (0xFFFFFFFF), then subtract the result from 31 to convert the MSB
- * count into an LSB count.
- */
- bld.CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
+ /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
+ * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
+ * subtract the result from 31 to convert the MSB count into an LSB
+ * count.
+ */
+ bld.CMP(dst_null_d(), src, brw_imm_d(-1), BRW_CONDITIONAL_NZ);
- inst = bld.ADD(dst, src, brw_imm_d(31));
- inst->predicate = BRW_PREDICATE_NORMAL;
- inst->src[0].negate = true;
- }
+ inst = bld.ADD(dst, src, brw_imm_d(31));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->src[0].negate = true;
break;
}