.lower_hadd = true,
.lower_mul_32x16 = true,
.lower_uclz = true,
+ .has_bfe = true,
.has_bfm = true,
.has_bitfield_select = true,
.has_fsub = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_bitfield_insert = true,
- .lower_bitfield_extract_to_shifts = true,
+ .lower_bitfield_extract = true,
.lower_bitfield_reverse = true,
.lower_bit_count = true,
.lower_cs_local_id_to_index = true,
bool lower_fsqrt;
bool lower_sincos;
bool lower_fmod;
- /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */
+ /** Lowers ibitfield_extract/ubitfield_extract. */
bool lower_bitfield_extract;
- /** Lowers ibitfield_extract/ubitfield_extract to compares, shifts. */
- bool lower_bitfield_extract_to_shifts;
/** Lowers bitfield_insert. */
bool lower_bitfield_insert;
/** Lowers bitfield_reverse to shifts. */
/** Backend supports bitz/bitnz. */
bool has_bit_test;
+ /** Backend supports ubfe/ibfe. */
+ bool has_bfe;
+
/** Backend supports bfm. */
bool has_bfm;
(void) options;
(void) info;
- /* This is not a great place for this, but it seems to be the best place
- * for it. Check that at most one kind of lowering is requested for
- * bitfield extract and bitfield insert. Otherwise the lowering can fight
- * with each other and optimizations.
- */
- assert((int)options->lower_bitfield_extract +
- (int)options->lower_bitfield_extract_to_shifts <= 1);
-
-
STATIC_ASSERT(${str(cache["next_index"])} == ARRAY_SIZE(${pass_name}_values));
% for index, condition in enumerate(condition_list):
condition_flags[${index}] = ${condition};
(('ibitfield_extract', 'value', 'offset', 'bits'),
('bcsel', ('ult', 31, 'bits'), 'value',
('ibfe', 'value', 'offset', 'bits')),
- 'options->lower_bitfield_extract'),
+ 'options->lower_bitfield_extract && options->has_bfe'),
(('ubitfield_extract', 'value', 'offset', 'bits'),
('bcsel', ('ult', 31, 'bits'), 'value',
('ubfe', 'value', 'offset', 'bits')),
- 'options->lower_bitfield_extract'),
+ 'options->lower_bitfield_extract && options->has_bfe'),
# (src0 & src1) | (~src0 & src2). Constant fold if src2 is 0.
(('bitfield_select', a, b, 0), ('iand', a, b)),
('ishr',
('ishl', 'value', ('isub', ('isub', 32, 'bits'), 'offset')),
('isub', 32, 'bits'))),
- 'options->lower_bitfield_extract_to_shifts'),
+ 'options->lower_bitfield_extract && !options->has_bfe'),
(('ubitfield_extract', 'value', 'offset', 'bits'),
('iand',
('bcsel', ('ieq', 'bits', 32),
0xffffffff,
('isub', ('ishl', 1, 'bits'), 1))),
- 'options->lower_bitfield_extract_to_shifts'),
+ 'options->lower_bitfield_extract && !options->has_bfe'),
(('ifind_msb', 'value'),
('ufind_msb', ('bcsel', ('ilt', 'value', 0), ('inot', 'value'), 'value')),
.lower_insert_word = true,
.lower_helper_invocation = true,
.lower_bitfield_insert = true,
- .lower_bitfield_extract_to_shifts = true,
+ .lower_bitfield_extract = true,
.lower_pack_half_2x16 = true,
.lower_pack_snorm_4x8 = true,
.lower_pack_snorm_2x16 = true,
.lower_flrp64 = true,
.lower_fsat = true,
.lower_bitfield_insert = true,
- .lower_bitfield_extract_to_shifts = true,
+ .lower_bitfield_extract = true,
.lower_fdph = true,
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_flrp64 = true,
.lower_fsat = true,
.lower_bitfield_insert = true,
- .lower_bitfield_extract_to_shifts = true,
+ .lower_bitfield_extract = true,
.lower_fdot = true,
.lower_fdph = true,
.lower_ffma16 = true,
.lower_fmod = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
+ .lower_bitfield_extract = true,
.lower_bitfield_insert = true,
.lower_extract_byte = true,
.lower_extract_word = true,
rscreen->nir_options.force_indirect_unrolling_sampler = true;
if (rscreen->info.gfx_level >= EVERGREEN) {
- rscreen->nir_options.lower_bitfield_extract = true;
+ rscreen->nir_options.has_bfe = true;
rscreen->nir_options.has_bfm = true;
rscreen->nir_options.has_bitfield_select = true;
}
/* Pre-EG doesn't have these ALU ops */
rscreen->nir_options.lower_bit_count = true;
rscreen->nir_options.lower_bitfield_reverse = true;
- rscreen->nir_options.lower_bitfield_extract_to_shifts = true;
}
if (rscreen->info.gfx_level < CAYMAN) {
.has_sudot_4x8 = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level >= GFX11,
.has_udot_4x8 = sscreen->info.has_accelerated_dot_product,
.has_dot_2x16 = sscreen->info.has_accelerated_dot_product && sscreen->info.gfx_level < GFX11,
+ .has_bfe = true,
.has_bfm = true,
.has_bitfield_select = true,
.optimize_sample_mask_in = true,
.lower_insert_byte = true,
.lower_insert_word = true,
.lower_bitfield_insert = true,
- .lower_bitfield_extract_to_shifts = true,
+ .lower_bitfield_extract = true,
.lower_bitfield_reverse = true,
.lower_bit_count = true,
.lower_cs_local_id_to_index = true,
.lower_fisnormal = true, \
.lower_isign = true, \
.lower_ldexp = true, \
+ .lower_bitfield_extract = true, \
.lower_bitfield_insert = true, \
.lower_device_index_to_zero = true, \
.vectorize_io = true, \
nir_options->lower_flrp32 = devinfo->ver < 6 || devinfo->ver >= 11;
nir_options->lower_fpow = devinfo->ver >= 12;
- nir_options->lower_bitfield_extract = devinfo->ver >= 7;
- nir_options->lower_bitfield_extract_to_shifts = devinfo->ver < 7;
+ nir_options->has_bfe = devinfo->ver >= 7;
nir_options->has_bfm = devinfo->ver >= 7;
nir_options->has_bfi = devinfo->ver >= 7;
.lower_interpolate_at = true,
.has_fsub = true,
.has_isub = true,
+ .has_bfe = true,
.vertex_id_zero_based = true,
.lower_base_vertex = true,
.lower_helper_invocation = true,
op.lower_fsqrt = false; // TODO: only before gm200
op.lower_sincos = false;
op.lower_fmod = true;
- op.lower_bitfield_extract = false;
- op.lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
+ op.lower_bitfield_extract = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
op.lower_bitfield_insert = (chipset >= NVISA_GV100_CHIPSET || chipset < NVISA_GF100_CHIPSET);
op.lower_bitfield_reverse = (chipset < NVISA_GF100_CHIPSET);
op.lower_bit_count = (chipset < NVISA_GF100_CHIPSET);
.lower_fsign = true, \
\
.lower_bitfield_insert = true, \
- .lower_bitfield_extract_to_shifts = true, \
+ .lower_bitfield_extract = true, \
.lower_insert_byte = true, \
.lower_rotate = true, \
\
.lower_bit_count = true,
.lower_bitfield_reverse = true,
.lower_bitfield_insert = true,
- .lower_bitfield_extract_to_shifts = true,
+ .lower_bitfield_extract = true,
.lower_extract_byte = true,
.lower_extract_word = true,
.lower_insert_byte = true,