There are a lot of optimizations in opt_algebraic that match ('ine', a,
0), but there are almost none that match i2b. Instead of adding a huge
pile of additional patterns (including variations that include both ine
and i2b), always lower i2b to a != 0.
At this point in the series, it should be impossible for anything to
generate i2b, so there /should not/ be any changes.
The failing test on d3d12 is a pre-existing bug that is triggered by
this change. I talked to Jesse about it, and, after some analysis, he
suggested just adding it to the list of known failures.
v2: Don't rematerialize i2b instructions in dxil_nir_lower_x2b.
v3: Don't rematerialize i2b instructions in zink_nir_algebraic.py.
v4: Fix zink-on-TGL CI failures by calling nir_opt_algebraic after
nir_lower_doubles makes progress. The latter can generate b2i
instructions, but nir_lower_int64 can't handle them (anymore).
v5: Add back most of the hunk at line 2125 of nir_opt_algebraic.py. I
had accidentally removed the f2b(bf2(x)) optimization.
v6: Just eliminate the i2b instruction.
v7: Remove missed i2b32 in midgard_compile.c. Remove (now unused)
emit_alu_i2orf2_b1 function from sfn_instr_alu.cpp. Previously this
function was still used. :shrug:
No shader-db changes on any Intel platform.
All Intel platforms had similar results. (Ice Lake shown)
Instructions in all programs:
141165875 ->
141165873 (-0.0%)
Instructions helped: 2
Cycles in all programs:
9098956382 ->
9098956350 (-0.0%)
Cycles helped: 2
The two Vulkan shaders are helped because of the "new" (('b2i32',
('ine', ('ubfe', a, b, 1), 0)), ('ubfe', a, b, 1)) algebraic pattern.
Acked-by: Jesse Natalie <jenatali@microsoft.com> [earlier version]
Acked-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Tested-by: Daniel Schürmann <daniel@schuermann.dev> [earlier version]
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15121>
bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand::zero());
break;
}
- case nir_op_b2b1:
- case nir_op_i2b1: {
+ case nir_op_b2b1: {
Temp src = get_alu_src(ctx, instr->src[0]);
assert(dst.regClass() == bld.lm);
case nir_op_b2i64:
result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
break;
- case nir_op_i2b1:
case nir_op_b2b1: /* after loads */
result = emit_i2b(&ctx->ac, src[0]);
break;
case nir_op_bit_count:
case nir_op_find_lsb:
case nir_op_ufind_msb:
- case nir_op_i2b1:
return 32;
case nir_op_ilt:
case nir_op_ige:
case nir_op_inot: return agx_xor_to(b, dst, s0, t);
case nir_op_f2b1: return agx_fcmpsel_to(b, dst, s0, f, f, t, AGX_FCOND_EQ);
- case nir_op_i2b1: return agx_icmpsel_to(b, dst, s0, f, f, t, AGX_ICOND_UEQ);
case nir_op_b2b1: return agx_icmpsel_to(b, dst, s0, f, f, t, AGX_ICOND_UEQ);
case nir_op_bcsel:
vir_set_pf(c, vir_SUB_dest(c, nop, src0, src1), V3D_QPU_PF_PUSHC);
break;
- case nir_op_i2b32:
- vir_set_pf(c, vir_MOV_dest(c, nop, src0), V3D_QPU_PF_PUSHZ);
- cond_invert = true;
- break;
-
case nir_op_f2b32:
vir_set_pf(c, vir_FMOV_dest(c, nop, src0), V3D_QPU_PF_PUSHZ);
cond_invert = true;
break;
}
- case nir_op_i2b32:
case nir_op_f2b32:
case nir_op_feq32:
case nir_op_fneu32:
CASE_ALL_SIZES(nir_op_uge)
CASE_ALL_SIZES(nir_op_ieq)
CASE_ALL_SIZES(nir_op_ine)
- case nir_op_i2b1:
- case nir_op_i2b8:
- case nir_op_i2b16:
- case nir_op_i2b32:
case nir_op_f2b1:
case nir_op_f2b8:
case nir_op_f2b16:
bit_size == 16 ? nir_op_f2b16 : nir_op_f2b32;
break;
- case nir_op_i2b1:
- opcode = bit_size == 8 ? nir_op_i2b8 :
- bit_size == 16 ? nir_op_i2b16 : nir_op_i2b32;
- break;
-
case nir_op_b2b1:
/* Since the canonical bit size is the size of the src, it's a no-op */
opcode = nir_op_mov;
case nir_op_b2f32: alu->op = nir_op_mov; break;
case nir_op_b2i32: alu->op = nir_op_mov; break;
case nir_op_f2b1:
- case nir_op_i2b1:
rep = nir_sne(b, nir_ssa_for_alu_src(b, alu, 0),
nir_imm_float(b, 0));
break;
break;
case nir_op_f2b1: alu->op = nir_op_f2b32; break;
- case nir_op_i2b1: alu->op = nir_op_i2b32; break;
case nir_op_b2b32:
case nir_op_b2b1:
}
static nir_ssa_def *
-lower_i2b(nir_builder *b, nir_ssa_def *x)
-{
- return nir_ine(b, nir_ior(b, nir_unpack_64_2x32_split_x(b, x),
- nir_unpack_64_2x32_split_y(b, x)),
- nir_imm_int(b, 0));
-}
-
-static nir_ssa_def *
lower_i2i8(nir_builder *b, nir_ssa_def *x)
{
return nir_i2i8(b, nir_unpack_64_2x32_split_x(b, x));
case nir_op_irem:
return nir_lower_divmod64;
case nir_op_b2i64:
- case nir_op_i2b1:
case nir_op_i2i8:
case nir_op_i2i16:
case nir_op_i2i32:
return lower_irem64(b, src[0], src[1]);
case nir_op_b2i64:
return lower_b2i64(b, src[0]);
- case nir_op_i2b1:
- return lower_i2b(b, src[0]);
case nir_op_i2i8:
return lower_i2i8(b, src[0]);
case nir_op_i2i16:
const nir_shader_compiler_options *options)
{
switch (alu->op) {
- case nir_op_i2b1:
case nir_op_i2i8:
case nir_op_i2i16:
case nir_op_i2i32:
}
case nir_op_f2u32: alu->op = nir_op_ffloor; break;
- case nir_op_i2b1: alu->op = nir_op_f2b1; break;
case nir_op_ilt: alu->op = nir_op_flt; break;
case nir_op_ige: alu->op = nir_op_fge; break;
if src_t == tbool:
dst_types = [tfloat, tint, tbool]
elif src_t == tint:
- dst_types = [tfloat, tint, tbool]
+ dst_types = [tfloat, tint]
elif src_t == tuint:
dst_types = [tfloat, tuint]
elif src_t == tfloat:
return nir_op_mov;
}
+ /* i2b and u2b do not exist. Use ine (via nir_type_conversion) instead */
+ assert((src_base != nir_type_int && src_base != nir_type_uint) ||
+ dst_base != nir_type_bool);
+
switch (src_base) {
% for src_t in ['int', 'uint', 'float', 'bool']:
case nir_type_${src_t}:
% else:
<% dst_t = 'int' %>
% endif
-% elif src_t == 'uint' and dst_t == 'bool':
-<% src_t = 'int' %>
+% elif src_t in ['int', 'uint'] and dst_t == 'bool':
+<% continue %>
% endif
switch (dst_bit_size) {
% for dst_bits in type_sizes(dst_t):
(('bcsel', ('ilt', a, b), b, a), ('imax', a, b)),
(('bcsel', ('ige', a, b), b, a), ('imin', a, b)),
(('bcsel', ('ige', b, a), b, a), ('imax', a, b)),
- (('bcsel', ('i2b', a), b, c), ('bcsel', ('ine', a, 0), b, c)),
(('bcsel', ('inot', a), b, c), ('bcsel', a, c, b)),
(('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
(('bcsel', a, b, ('bcsel', a, c, d)), ('bcsel', a, b, d)),
(('fsin', a), lowered_sincos(0.5), 'options->lower_sincos'),
(('fcos', a), lowered_sincos(0.75), 'options->lower_sincos'),
# Boolean simplifications
- (('i2b16(is_used_by_if)', a), ('ine16', a, 0)),
- (('i2b32(is_used_by_if)', a), ('ine32', a, 0)),
- (('i2b1(is_used_by_if)', a), ('ine', a, 0)),
(('ieq', a, True), a),
(('ine(is_not_used_by_if)', a, True), ('inot', a)),
(('ine', a, False), a),
# Conversions
(('f2i', ('ftrunc', a)), ('f2i', a)),
(('f2u', ('ftrunc', a)), ('f2u', a)),
- (('i2b', ('ineg', a)), ('i2b', a)),
- (('i2b', ('iabs', a)), ('i2b', a)),
(('inot', ('f2b1', a)), ('feq', a, 0.0)),
# Conversions from 16 bits to 32 bits and back can always be removed
(('ubfe', a, 0, '#b'), ('iand', a, ('ushr', 0xffffffff, ('ineg', b)))),
- (('b2i32', ('i2b', ('ubfe', a, b, 1))), ('ubfe', a, b, 1)),
- (('b2i32', ('i2b', ('ibfe', a, b, 1))), ('ubfe', a, b, 1)), # ubfe in the replacement is correct
+ (('b2i32', ('ine', ('ubfe', a, b, 1), 0)), ('ubfe', a, b, 1)),
+ (('b2i32', ('ine', ('ibfe', a, b, 1), 0)), ('ubfe', a, b, 1)), # ubfe in the replacement is correct
(('ine', ('ibfe(is_used_once)', a, '#b', '#c'), 0), ('ine', ('iand', a, ('ishl', ('ushr', 0xffffffff, ('ineg', c)), b)), 0)),
(('ieq', ('ibfe(is_used_once)', a, '#b', '#c'), 0), ('ieq', ('iand', a, ('ishl', ('ushr', 0xffffffff, ('ineg', c)), b)), 0)),
(('ine', ('ubfe(is_used_once)', a, '#b', '#c'), 0), ('ine', ('iand', a, ('ishl', ('ushr', 0xffffffff, ('ineg', c)), b)), 0)),
optimizations.append((('inot', ('iand(is_used_once)', (left, a, b), (right, c, d))),
('ior', (invert[left], a, b), (invert[right], c, d))))
-# Optimize x2bN(b2x(x)) -> x
+# Optimize f2bN(b2f(x)) -> x
for size in type_sizes('bool'):
aN = 'a@' + str(size)
f2bN = 'f2b' + str(size)
- i2bN = 'i2b' + str(size)
optimizations.append(((f2bN, ('b2f', aN)), a))
- optimizations.append(((i2bN, ('b2i', aN)), a))
# Optimize x2yN(b2x(x)) -> b2y
for x, y in itertools.product(['f', 'u', 'i'], ['f', 'u', 'i']):
MATCH_ICONV_CASE(i2i)
MATCH_FCONV_CASE(b2f)
MATCH_ICONV_CASE(b2i)
- MATCH_BCONV_CASE(i2b)
MATCH_BCONV_CASE(f2b)
default:
unreachable("Invalid nir_search_op");
MATCH_ICONV_CASE(i2i)
MATCH_FCONV_CASE(b2f)
MATCH_ICONV_CASE(b2i)
- MATCH_BCONV_CASE(i2b)
MATCH_BCONV_CASE(f2b)
default:
return nop;
RET_ICONV_CASE(i2i)
RET_FCONV_CASE(b2f)
RET_ICONV_CASE(b2i)
- RET_BCONV_CASE(i2b)
RET_BCONV_CASE(f2b)
default:
unreachable("Invalid nir_search_op");
CASE(f2b)
CASE(b2f)
CASE(b2i)
- CASE(i2b)
CASE(i2i)
CASE(f2i)
CASE(i2f)
nir_search_op_i2i,
nir_search_op_b2f,
nir_search_op_b2i,
- nir_search_op_i2b,
nir_search_op_f2b,
nir_num_search_ops,
};
dst[0]->cat2.condition = IR3_COND_NE;
break;
- case nir_op_i2b1:
- /* i2b1 will appear when translating from nir_load_ubo or
- * nir_intrinsic_load_ssbo, where any non-zero value is true.
- */
- dst[0] = ir3_CMPS_S(
- b, src[0], 0,
- create_immed_typed(b, 0, type_uint_size(bs[0])), 0);
- dst[0]->cat2.condition = IR3_COND_NE;
- break;
-
case nir_op_b2b1:
/* b2b1 will appear when translating from
*
assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals);
}
-
-static LLVMValueRef
-int_to_bool32(struct lp_build_nir_context *bld_base,
- uint32_t src_bit_size,
- bool is_unsigned,
- LLVMValueRef val)
-{
- LLVMBuilderRef builder = bld_base->base.gallivm->builder;
- struct lp_build_context *int_bld =
- get_int_bld(bld_base, is_unsigned, src_bit_size);
- LLVMValueRef result = lp_build_compare(bld_base->base.gallivm,
- int_bld->type, PIPE_FUNC_NOTEQUAL,
- val, int_bld->zero);
- if (src_bit_size == 16)
- result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
- else if (src_bit_size == 64)
- result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
- return result;
-}
-
-
static LLVMValueRef
flt_to_bool32(struct lp_build_nir_context *bld_base,
uint32_t src_bit_size,
case nir_op_ftrunc:
result = lp_build_trunc(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
break;
- case nir_op_i2b32:
- result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]);
- break;
case nir_op_i2f16:
result = LLVMBuildSIToFP(builder, src[0],
bld_base->half_bld.vec_type, "");
ntt_FSNE(c, dst, src[0], ureg_imm1f(c->ureg, 0));
break;
- case nir_op_i2b32:
- if (src_64) {
- ntt_U64SNE(c, dst, src[0], ureg_imm1u(c->ureg, 0));
- } else
- ntt_USNE(c, dst, src[0], ureg_imm1u(c->ureg, 0));
- break;
-
case nir_op_b2i32:
ntt_AND(c, dst, src[0], ureg_imm1u(c->ureg, 1));
break;
spec/arb_tessellation_shader/execution/variable-indexing/tcs-patch-output-array-vec3-index-wr: crash
spec/arb_tessellation_shader/execution/variable-indexing/tcs-patch-output-array-vec4-index-wr: crash
spec/arb_tessellation_shader/execution/variable-indexing/tcs-patch-vec4-index-wr: crash
+spec/arb_tessellation_shader/execution/variable-indexing/tcs-patch-vec4-swiz-index-wr: crash
spec/arb_tessellation_shader/execution/variable-indexing/tes-both-input-array-float-index-rd: crash
spec/arb_tessellation_shader/execution/variable-indexing/tes-both-input-array-vec2-index-rd: crash
spec/arb_tessellation_shader/execution/variable-indexing/tes-both-input-array-vec3-index-rd: crash
summary:
name: results
---- --------
- pass: 17231
+ pass: 17230
fail: 20
- crash: 40
+ crash: 41
skip: 2925
timeout: 0
warn: 25
UOP(b2f32, AND, 0_X_X), /* AND with fui(1.0f) */
UOP(b2i32, AND, 0_X_X), /* AND with 1 */
OPC(f2b32, CMP, 0_X_X, NE), /* != 0.0 */
- UOPC(i2b32, CMP, 0_X_X, NE), /* != 0 */
/* arithmetic */
IOP(iadd, ADD, 0_X_1),
case nir_op_f2b32:
inst.src[1] = etna_immediate_float(0.0f);
break;
- case nir_op_i2b32:
- inst.src[1] = etna_immediate_int(0);
- break;
case nir_op_ineg:
inst.src[0] = etna_immediate_int(0);
src[0].neg = 1;
const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
static bool
-emit_alu_i2orf2_b1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
-
-static bool
emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
static bool
emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader);
return emit_alu_op2(*alu, op2_add, shader, op2_opt_neg_src1);
case nir_op_ftrunc:
return emit_alu_op1(*alu, op1_trunc, shader);
- case nir_op_i2b1:
- case nir_op_i2b32:
- return emit_alu_i2orf2_b1(*alu, op2_setne_int, shader);
case nir_op_iadd:
return emit_alu_op2_int(*alu, op2_add_int, shader);
case nir_op_iand:
}
static bool
-emit_alu_i2orf2_b1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
-{
- auto& value_factory = shader.value_factory();
- AluInstr *ir = nullptr;
- Pin pin = nir_dest_num_components(alu.dest.dest) == 1 ? pin_free : pin_none;
-
- for (int i = 0; i < 4; ++i) {
- if (alu.dest.write_mask & (1 << i)) {
- ir = new AluInstr(opcode,
- value_factory.dest(alu.dest, i, pin),
- value_factory.src(alu.src[0], i),
- value_factory.zero(),
- AluInstr::write);
- shader.emit_instruction(ir);
- }
- }
- if (ir)
- ir->set_alu_flag(alu_last_instr);
- return true;
-}
-
-static bool
emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
{
auto& value_factory = shader.value_factory();
case nir_op_b2i32:
result = qir_AND(c, src[0], qir_uniform_ui(c, 1));
break;
- case nir_op_i2b32:
case nir_op_f2b32:
qir_SF(c, src[0]);
result = qir_MOV(c, qir_SEL(c, QPU_COND_ZC,
nir_src_bit_size(alu->src[0].src),
num_components, 0));
break;
- case nir_op_i2b1:
- assert(nir_op_infos[alu->op].num_inputs == 1);
- result = emit_binop(ctx, SpvOpINotEqual, dest_type, src[0],
- get_ivec_constant(ctx,
- nir_src_bit_size(alu->src[0].src),
- num_components, 0));
- break;
#define BINOP(nir_op, spirv_op) \
lower_b2b = [
(('b2b32', 'a'), ('b2i32', 'a')),
- (('b2b1', 'a'), ('i2b1', 'a')),
+ (('b2b1', 'a'), ('ine', 'a', 0)),
]
def main():
inst = bld.emit(SHADER_OPCODE_RSQ, result, op[0]);
break;
- case nir_op_i2b32:
case nir_op_f2b32: {
uint32_t bit_size = nir_src_bit_size(instr->src[0].src);
if (bit_size == 64) {
/* two-argument instructions can't take 64-bit immediates */
- fs_reg zero;
- fs_reg tmp;
+ fs_reg zero = vgrf(glsl_type::double_type);
+ fs_reg tmp = vgrf(glsl_type::double_type);
- if (instr->op == nir_op_f2b32) {
- zero = vgrf(glsl_type::double_type);
- tmp = vgrf(glsl_type::double_type);
- bld.MOV(zero, setup_imm_df(bld, 0.0));
- } else {
- zero = vgrf(glsl_type::int64_t_type);
- tmp = vgrf(glsl_type::int64_t_type);
- bld.MOV(zero, brw_imm_q(0));
- }
+ bld.MOV(zero, setup_imm_df(bld, 0.0));
/* A SIMD16 execution needs to be split in two instructions, so use
* a vgrf instead of the flag register as dst so instruction splitting
} else {
fs_reg zero;
if (bit_size == 32) {
- zero = instr->op == nir_op_f2b32 ? brw_imm_f(0.0f) : brw_imm_d(0);
+ zero = brw_imm_f(0.0f);
} else {
assert(bit_size == 16);
- zero = instr->op == nir_op_f2b32 ?
- retype(brw_imm_w(0), BRW_REGISTER_TYPE_HF) : brw_imm_w(0);
+ zero = retype(brw_imm_w(0), BRW_REGISTER_TYPE_HF);
}
bld.CMP(result, op[0], zero, BRW_CONDITIONAL_NZ);
}
}
break;
- case nir_op_i2b32:
- emit(CMP(dst, op[0], brw_imm_d(0), BRW_CONDITIONAL_NZ));
- break;
-
case nir_op_unpack_half_2x16_split_x:
case nir_op_unpack_half_2x16_split_y:
case nir_op_pack_half_2x16_split:
lower_x2b = [
(('b2b32', 'a'), ('b2i32', 'a')),
- (('b2b1', 'a'), ('i2b1', 'a')),
- (('i2b1', 'a'), ('ine', a, 0)),
+ (('b2b1', 'a'), ('ine', ('b2i32', a), 0)),
(('f2b1', 'a'), ('fneu', a, 0)),
]
mkOp2(OP_MERGE, TYPE_U64, newDefs[0], loadImm(NULL, 0), tmp);
break;
}
- case nir_op_f2b32:
- case nir_op_i2b32: {
+ case nir_op_f2b32: {
DEFAULT_CHECKS;
LValues &newDefs = convert(&insn->dest);
- Value *src1;
- if (typeSizeof(sTypes[0]) == 8) {
- src1 = loadImm(getSSA(8), 0.0);
- } else {
- src1 = zero;
- }
- CondCode cc = op == nir_op_f2b32 ? CC_NEU : CC_NE;
- mkCmp(OP_SET, cc, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), src1);
+ mkCmp(OP_SET, CC_NEU, TYPE_U32, newDefs[0], sTypes[0], getSrc(&insn->src[0]), zero);
break;
}
case nir_op_b2i8:
bi_mux_i32_to(b, dst, bi_imm_u32(0), bi_imm_u32(~0), s0, BI_MUX_FP_ZERO);
break;
- case nir_op_i2b8:
- bi_mux_v4i8_to(b, dst, bi_imm_u8(0), bi_imm_u8(~0), s0, BI_MUX_INT_ZERO);
- break;
- case nir_op_i2b16:
- bi_mux_v2i16_to(b, dst, bi_imm_u16(0), bi_imm_u16(~0), s0, BI_MUX_INT_ZERO);
- break;
- case nir_op_i2b32:
- bi_mux_i32_to(b, dst, bi_imm_u32(0), bi_imm_u32(~0), s0, BI_MUX_INT_ZERO);
- break;
-
case nir_op_ieq8:
case nir_op_ine8:
case nir_op_ilt8:
* we can do a "not equal to 0.0" test. */
ALU_CASE_CMP(f2b32, fne);
- ALU_CASE_CMP(i2b32, ine);
ALU_CASE(frcp, frcp);
ALU_CASE(frsq, frsqrt);