bool lower_elect:1;
bool lower_read_invocation_to_cond:1;
bool lower_rotate_to_shuffle:1;
+ bool lower_ballot_bit_count_to_mbcnt_amd:1;
} nir_lower_subgroups_options;
bool nir_lower_subgroups(nir_shader *shader,
case nir_intrinsic_ballot_bit_count_exclusive:
case nir_intrinsic_ballot_bit_count_inclusive: {
+ assert(intrin->src[0].is_ssa);
+ nir_ssa_def *int_val = ballot_type_to_uint(b, intrin->src[0].ssa,
+ options);
+ if (options->lower_ballot_bit_count_to_mbcnt_amd) {
+ nir_ssa_def *acc;
+ if (intrin->intrinsic == nir_intrinsic_ballot_bit_count_exclusive) {
+ acc = nir_imm_int(b, 0);
+ } else {
+ acc = nir_iand_imm(b, nir_u2u32(b, int_val), 0x1);
+ int_val = nir_ushr_imm(b, int_val, 1);
+ }
+ return nir_mbcnt_amd(b, int_val, acc);
+ }
+
nir_ssa_def *mask;
if (intrin->intrinsic == nir_intrinsic_ballot_bit_count_inclusive) {
mask = nir_inot(b, build_subgroup_gt_mask(b, options));
mask = nir_inot(b, build_subgroup_ge_mask(b, options));
}
- assert(intrin->src[0].is_ssa);
- nir_ssa_def *int_val = ballot_type_to_uint(b, intrin->src[0].ssa,
- options);
-
return vec_bit_count(b, nir_iand(b, int_val, mask));
}