From 66e51dc4747954b1dc3a3259b2a9ba86f6502a27 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 13 Jul 2021 11:22:46 +0200 Subject: [PATCH] aco: Remove use of deprecated Operand constructors MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This migration was done with libclang-based automatic tooling, which performed these replacements: * Operand(uint8_t) -> Operand::c8 * Operand(uint16_t) -> Operand::c16 * Operand(uint32_t, false) -> Operand::c32 * Operand(uint32_t, bool) -> Operand::c32_or_c64 * Operand(uint64_t) -> Operand::c64 * Operand(0) -> Operand::zero(num_bytes) Casts that were previously used for constructor selection have automatically been removed (e.g. Operand((uint16_t)1) -> Operand::c16(1)). Reviewed-by: Timur Kristóf Part-of: --- src/amd/compiler/aco_assembler.cpp | 14 +- src/amd/compiler/aco_insert_NOPs.cpp | 2 +- src/amd/compiler/aco_insert_exec_mask.cpp | 12 +- src/amd/compiler/aco_instruction_selection.cpp | 1290 +++++++++++----------- src/amd/compiler/aco_ir.h | 10 +- src/amd/compiler/aco_lower_phis.cpp | 10 +- src/amd/compiler/aco_lower_to_hw_instr.cpp | 232 ++-- src/amd/compiler/aco_optimizer.cpp | 36 +- src/amd/compiler/aco_optimizer_postRA.cpp | 2 +- src/amd/compiler/aco_spill.cpp | 20 +- src/amd/compiler/tests/helpers.cpp | 15 +- src/amd/compiler/tests/test_assembler.cpp | 16 +- src/amd/compiler/tests/test_hard_clause.cpp | 89 +- src/amd/compiler/tests/test_insert_nops.cpp | 37 +- src/amd/compiler/tests/test_optimizer.cpp | 216 ++-- src/amd/compiler/tests/test_optimizer_postRA.cpp | 73 +- src/amd/compiler/tests/test_regalloc.cpp | 2 +- src/amd/compiler/tests/test_sdwa.cpp | 148 ++- src/amd/compiler/tests/test_to_hw_instr.cpp | 147 ++- 19 files changed, 1252 insertions(+), 1119 deletions(-) diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp index 8631189..fd4916c 100644 --- a/src/amd/compiler/aco_assembler.cpp +++ b/src/amd/compiler/aco_assembler.cpp @@ -98,7 +98,7 @@ emit_instruction(asm_context& ctx, std::vector& out, Instruction* inst ctx.constaddrs[instr->operands[1].constantValue()].add_literal = out.size() + 1; instr->opcode = aco_opcode::s_add_u32; - instr->operands[1] = Operand(0u); + instr->operands[1] = Operand::zero(); instr->operands[1].setFixed(PhysReg(255)); } @@ -904,20 +904,20 @@ emit_long_jump(asm_context& ctx, SOPP_instruction* branch, bool backwards, instr.reset(bld.sop1(aco_opcode::s_getpc_b64, branch->definitions[0]).instr); emit_instruction(ctx, out, instr.get()); - instr.reset(bld.sop2(aco_opcode::s_addc_u32, def_tmp_lo, op_tmp_lo, Operand(0u)).instr); + instr.reset(bld.sop2(aco_opcode::s_addc_u32, def_tmp_lo, op_tmp_lo, Operand::zero()).instr); instr->operands[1].setFixed(PhysReg{255}); /* this operand has to be a literal */ emit_instruction(ctx, out, instr.get()); branch->pass_flags = out.size(); - instr.reset( - bld.sop2(aco_opcode::s_addc_u32, def_tmp_hi, op_tmp_hi, Operand(backwards ? UINT32_MAX : 0u)) - .instr); + instr.reset(bld.sop2(aco_opcode::s_addc_u32, def_tmp_hi, op_tmp_hi, + Operand::c32(backwards ? UINT32_MAX : 0u)) + .instr); emit_instruction(ctx, out, instr.get()); /* restore SCC and clear the LSB of the new PC */ - instr.reset(bld.sopc(aco_opcode::s_bitcmp1_b32, def_tmp_lo, op_tmp_lo, Operand(0u)).instr); + instr.reset(bld.sopc(aco_opcode::s_bitcmp1_b32, def_tmp_lo, op_tmp_lo, Operand::zero()).instr); emit_instruction(ctx, out, instr.get()); - instr.reset(bld.sop1(aco_opcode::s_bitset0_b32, def_tmp_lo, Operand(0u)).instr); + instr.reset(bld.sop1(aco_opcode::s_bitset0_b32, def_tmp_lo, Operand::zero()).instr); emit_instruction(ctx, out, instr.get()); /* create the s_setpc_b64 to jump */ diff --git a/src/amd/compiler/aco_insert_NOPs.cpp b/src/amd/compiler/aco_insert_NOPs.cpp index 3ef7085..edc52c3 100644 --- a/src/amd/compiler/aco_insert_NOPs.cpp +++ b/src/amd/compiler/aco_insert_NOPs.cpp @@ -710,7 +710,7 @@ handle_instruction_gfx10(Program* program, Block* cur_block, NOP_ctx_gfx10& ctx, aco_ptr s_mov{ create_instruction(aco_opcode::s_mov_b32, Format::SOP1, 1, 1)}; s_mov->definitions[0] = Definition(sgpr_null, s1); - s_mov->operands[0] = Operand(0u); + s_mov->operands[0] = Operand::zero(); new_instructions.emplace_back(std::move(s_mov)); } } else if (instr->isSALU()) { diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp index 288ade8..fb4ad4f 100644 --- a/src/amd/compiler/aco_insert_exec_mask.cpp +++ b/src/amd/compiler/aco_insert_exec_mask.cpp @@ -418,7 +418,7 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector> /* exec seems to need to be manually initialized with combined shaders */ if (ctx.program->stage.num_sw_stages() > 1 || ctx.program->stage.hw == HWStage::NGG) { - start_exec = Operand(-1u, bld.lm == s2); + start_exec = Operand::c32_or_c64(-1u, bld.lm == s2); bld.copy(Definition(exec, bld.lm), start_exec); } @@ -754,7 +754,7 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector(bld.w64or32(Builder::s_mov), Format::SOP1, 1, 1)); - instr->operands[0] = Operand(0u); + instr->operands[0] = Operand::zero(); instr->definitions[0] = dst; } else { std::pair& exact_mask = ctx.info[block->index].exec[0]; @@ -780,7 +780,7 @@ process_instructions(exec_ctx& ctx, Block* block, std::vectorindex].exec.size() - 2; if (!(ctx.info[block->index].exec.back().second & mask_type_exact)) { @@ -924,7 +924,7 @@ add_branch_code(exec_ctx& ctx, Block* block) } Temp cond = bld.sop1(Builder::s_and_saveexec, bld.def(bld.lm), bld.def(s1, scc), - Definition(exec, bld.lm), Operand(0u), Operand(exec, bld.lm)); + Definition(exec, bld.lm), Operand::zero(), Operand(exec, bld.lm)); for (int i = num - 1; i >= 0; i--) { Instruction* andn2 = bld.sop2(Builder::s_andn2, bld.def(bld.lm), bld.def(s1, scc), @@ -1047,7 +1047,7 @@ add_branch_code(exec_ctx& ctx, Block* block) unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0]; Block& succ = ctx.program->blocks[succ_idx]; if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) { - bld.copy(Definition(exec, bld.lm), Operand(0u, bld.lm == s2)); + bld.copy(Definition(exec, bld.lm), Operand::zero(bld.lm.bytes())); } bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), bld.scc(cond), @@ -1076,7 +1076,7 @@ add_branch_code(exec_ctx& ctx, Block* block) unsigned succ_idx = ctx.program->blocks[block->linear_succs[1]].linear_succs[0]; Block& succ = ctx.program->blocks[succ_idx]; if (!(succ.kind & block_kind_invert || succ.kind & block_kind_merge)) { - bld.copy(Definition(exec, bld.lm), Operand(0u, bld.lm == s2)); + bld.copy(Definition(exec, bld.lm), Operand::zero(bld.lm.bytes())); } bld.branch(aco_opcode::p_cbranch_nz, bld.hint_vcc(bld.def(s2)), bld.scc(cond), diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp index bcd8490..09100b9 100644 --- a/src/amd/compiler/aco_instruction_selection.cpp +++ b/src/amd/compiler/aco_instruction_selection.cpp @@ -132,19 +132,19 @@ get_ssa_temp(struct isel_context* ctx, nir_ssa_def* def) } Temp -emit_mbcnt(isel_context* ctx, Temp dst, Operand mask = Operand(), Operand base = Operand(0u)) +emit_mbcnt(isel_context* ctx, Temp dst, Operand mask = Operand(), Operand base = Operand::zero()) { Builder bld(ctx->program, ctx->block); assert(mask.isUndefined() || mask.isTemp() || (mask.isFixed() && mask.physReg() == exec)); assert(mask.isUndefined() || mask.bytes() == bld.lm.bytes()); if (ctx->program->wave_size == 32) { - Operand mask_lo = mask.isUndefined() ? Operand(-1u) : mask; + Operand mask_lo = mask.isUndefined() ? Operand::c32(-1u) : mask; return bld.vop3(aco_opcode::v_mbcnt_lo_u32_b32, Definition(dst), mask_lo, base); } - Operand mask_lo(-1u); - Operand mask_hi(-1u); + Operand mask_lo = Operand::c32(-1u); + Operand mask_hi = Operand::c32(-1u); if (mask.isTemp()) { RegClass rc = RegClass(mask.regClass().type(), 1); @@ -204,14 +204,15 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data) } else if (ctx->options->chip_class >= GFX10 && ctx->program->wave_size == 64) { /* GFX10 wave64 mode: emulate full-wave bpermute */ - Temp index_is_lo = bld.vopc(aco_opcode::v_cmp_ge_u32, bld.def(bld.lm), Operand(31u), index); + Temp index_is_lo = + bld.vopc(aco_opcode::v_cmp_ge_u32, bld.def(bld.lm), Operand::c32(31u), index); Builder::Result index_is_lo_split = bld.pseudo(aco_opcode::p_split_vector, bld.def(s1), bld.def(s1), index_is_lo); Temp index_is_lo_n1 = bld.sop1(aco_opcode::s_not_b32, bld.def(s1), bld.def(s1, scc), index_is_lo_split.def(1).getTemp()); Operand same_half = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), index_is_lo_split.def(0).getTemp(), index_is_lo_n1); - Operand index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(2u), index); + Operand index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index); Operand input_data(data); index_x4.setLateKill(true); @@ -226,7 +227,7 @@ emit_bpermute(isel_context* ctx, Builder& bld, Temp index, Temp data) index_x4, input_data, same_half); } else { /* GFX8-9 or GFX10 wave32: bpermute works normally */ - Temp index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(2u), index); + Temp index_x4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), index); return bld.ds(aco_opcode::ds_bpermute_b32, bld.def(v1), index_x4, data); } } @@ -281,7 +282,7 @@ emit_v_div_u32(isel_context* ctx, Temp dst, Temp a, uint32_t b) Builder bld(ctx->program, ctx->block); if (util_is_power_of_two_or_zero(b)) { - bld.vop2(aco_opcode::v_lshrrev_b32, Definition(dst), Operand((uint32_t)util_logbase2(b)), a); + bld.vop2(aco_opcode::v_lshrrev_b32, Definition(dst), Operand::c32(util_logbase2(b)), a); return; } @@ -302,25 +303,25 @@ emit_v_div_u32(isel_context* ctx, Temp dst, Temp a, uint32_t b) Temp pre_shift_dst = a; if (pre_shift) { pre_shift_dst = (increment || multiply || post_shift) ? bld.tmp(v1) : dst; - bld.vop2(aco_opcode::v_lshrrev_b32, Definition(pre_shift_dst), - Operand((uint32_t)info.pre_shift), a); + bld.vop2(aco_opcode::v_lshrrev_b32, Definition(pre_shift_dst), Operand::c32(info.pre_shift), + a); } Temp increment_dst = pre_shift_dst; if (increment) { increment_dst = (post_shift || multiply) ? bld.tmp(v1) : dst; - bld.vadd32(Definition(increment_dst), Operand((uint32_t)info.increment), pre_shift_dst); + bld.vadd32(Definition(increment_dst), Operand::c32(info.increment), pre_shift_dst); } Temp multiply_dst = increment_dst; if (multiply) { multiply_dst = post_shift ? bld.tmp(v1) : dst; bld.vop3(aco_opcode::v_mul_hi_u32, Definition(multiply_dst), increment_dst, - bld.copy(bld.def(v1), Operand((uint32_t)info.multiplier))); + bld.copy(bld.def(v1), Operand::c32(info.multiplier))); } if (post_shift) { - bld.vop2(aco_opcode::v_lshrrev_b32, Definition(dst), Operand((uint32_t)info.post_shift), + bld.vop2(aco_opcode::v_lshrrev_b32, Definition(dst), Operand::c32(info.post_shift), multiply_dst); } } @@ -329,7 +330,7 @@ void emit_extract_vector(isel_context* ctx, Temp src, uint32_t idx, Temp dst) { Builder bld(ctx->program, ctx->block); - bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand(idx)); + bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand::c32(idx)); } Temp @@ -432,7 +433,7 @@ expand_vector(isel_context* ctx, Temp vec_src, Temp dst, unsigned num_components src = bld.as_uniform(src); vec->operands[i] = Operand(src); } else { - vec->operands[i] = Operand(0u, component_size == 2); + vec->operands[i] = Operand::zero(component_size == 2 ? 8 : 4); } elems[i] = vec->operands[i].getTemp(); } @@ -449,14 +450,14 @@ byte_align_scalar(isel_context* ctx, Temp vec, Operand offset, Temp dst) Temp select = Temp(); if (offset.isConstant()) { assert(offset.constantValue() && offset.constantValue() < 4); - shift = Operand(offset.constantValue() * 8); + shift = Operand::c32(offset.constantValue() * 8); } else { /* bit_offset = 8 * (offset & 0x3) */ Temp tmp = - bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), offset, Operand(3u)); + bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), offset, Operand::c32(3u)); select = bld.tmp(s1); shift = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.scc(Definition(select)), tmp, - Operand(3u)); + Operand::c32(3u)); } if (vec.size() == 1) { @@ -477,10 +478,11 @@ byte_align_scalar(isel_context* ctx, Temp vec, Operand offset, Temp dst) } else { hi = bld.tmp(s2); bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), vec); - hi = bld.pseudo(aco_opcode::p_extract_vector, bld.def(s1), hi, Operand(0u)); + hi = bld.pseudo(aco_opcode::p_extract_vector, bld.def(s1), hi, Operand::zero()); } if (select != Temp()) - hi = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), hi, Operand(0u), bld.scc(select)); + hi = + bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), hi, Operand::zero(), bld.scc(select)); lo = bld.sop2(aco_opcode::s_lshr_b64, bld.def(s2), bld.def(s1, scc), lo, shift); Temp mid = bld.tmp(s1); lo = bld.pseudo(aco_opcode::p_split_vector, bld.def(s1), Definition(mid), lo); @@ -517,7 +519,7 @@ byte_align_vector(isel_context* ctx, Temp vec, Operand offset, Temp dst, unsigne if (dst.size() == 2) vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), tmp[0], tmp[1]); - offset = Operand(0u); + offset = Operand::zero(); } unsigned num_components = vec.bytes() / component_size; @@ -569,7 +571,7 @@ bool_to_vector_condition(isel_context* ctx, Temp val, Temp dst = Temp(0, s2)) assert(val.regClass() == s1); assert(dst.regClass() == bld.lm); - return bld.sop2(Builder::s_cselect, Definition(dst), Operand((uint32_t)-1), Operand(0u), + return bld.sop2(Builder::s_cselect, Definition(dst), Operand::c32(-1), Operand::zero(), bld.scc(val)); } @@ -620,7 +622,7 @@ convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsign * the caller to handle appropriately */ return bld.copy(Definition(dst), src); } else if (dst.bytes() < src.bytes()) { - return bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand(0u)); + return bld.pseudo(aco_opcode::p_extract_vector, Definition(dst), src, Operand::zero()); } Temp tmp = dst; @@ -630,8 +632,8 @@ convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsign if (tmp == src) { } else if (src.regClass() == s1) { assert(src_bits < 32); - bld.pseudo(aco_opcode::p_extract, Definition(tmp), bld.def(s1, scc), src, Operand(0u), - Operand(src_bits), Operand((unsigned)sign_extend)); + bld.pseudo(aco_opcode::p_extract, Definition(tmp), bld.def(s1, scc), src, Operand::zero(), + Operand::c32(src_bits), Operand::c32((unsigned)sign_extend)); } else if (ctx->options->chip_class >= GFX8) { assert(src_bits < 32); assert(src_bits != 8 || src.regClass() == v1b); @@ -651,19 +653,20 @@ convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsign assert(src_bits < 32); assert(ctx->options->chip_class == GFX6 || ctx->options->chip_class == GFX7); aco_opcode opcode = sign_extend ? aco_opcode::v_bfe_i32 : aco_opcode::v_bfe_u32; - bld.vop3(opcode, Definition(tmp), src, Operand(0u), Operand(src_bits == 8 ? 8u : 16u)); + bld.vop3(opcode, Definition(tmp), src, Operand::zero(), + Operand::c32(src_bits == 8 ? 8u : 16u)); } if (dst_bits == 64) { if (sign_extend && dst.regClass() == s2) { Temp high = - bld.sop2(aco_opcode::s_ashr_i32, bld.def(s1), bld.def(s1, scc), tmp, Operand(31u)); + bld.sop2(aco_opcode::s_ashr_i32, bld.def(s1), bld.def(s1, scc), tmp, Operand::c32(31u)); bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, high); } else if (sign_extend && dst.regClass() == v2) { - Temp high = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand(31u), tmp); + Temp high = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), tmp); bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, high); } else { - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand(0u)); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand::zero()); } } @@ -696,8 +699,8 @@ extract_8_16_bit_sgpr_element(isel_context* ctx, Temp dst, nir_alu_src* src, sgp bld.copy(Definition(tmp), vec); else bld.pseudo(aco_opcode::p_extract, Definition(tmp), bld.def(s1, scc), Operand(vec), - Operand(swizzle), Operand(src_size), - Operand((uint32_t)(mode == sgpr_extract_sext))); + Operand::c32(swizzle), Operand::c32(src_size), + Operand::c32((mode == sgpr_extract_sext))); if (dst.regClass() == s2) convert_int(ctx, bld, tmp, 32, 64, mode == sgpr_extract_sext, dst); @@ -798,7 +801,7 @@ convert_pointer_to_64_bit(isel_context* ctx, Temp ptr, bool non_uniform = false) if (ptr.type() == RegType::vgpr && !non_uniform) ptr = bld.as_uniform(ptr); return bld.pseudo(aco_opcode::p_create_vector, bld.def(RegClass(ptr.type(), 2)), ptr, - Operand((unsigned)ctx->options->address32_hi)); + Operand::c32((unsigned)ctx->options->address32_hi)); } void @@ -864,7 +867,7 @@ emit_vop2_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, Te if (flush_denorms && ctx->program->chip_class < GFX9) { assert(dst.size() == 1); Temp tmp = bld.vop2(op, bld.def(v1), op0, op1); - bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand(0x3f800000u), tmp); + bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand::c32(0x3f800000u), tmp); } else { if (nuw) { bld.nuw().vop2(op, Definition(dst), op0, op1); @@ -923,10 +926,10 @@ emit_vop3a_instruction(isel_context* ctx, nir_alu_instr* instr, aco_opcode op, T else tmp = bld.vop3(op, bld.def(dst.regClass()), src[0], src[1]); if (dst.size() == 1) - bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand(0x3f800000u), tmp); + bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand::c32(0x3f800000u), tmp); else - bld.vop3(aco_opcode::v_mul_f64, Definition(dst), Operand(UINT64_C(0x3FF0000000000000)), - tmp); + bld.vop3(aco_opcode::v_mul_f64, Definition(dst), + Operand::c64(UINT64_C(0x3FF0000000000000)), tmp); } else if (num_sources == 3) { bld.vop3(op, Definition(dst), src[0], src[1], src[2]); } else { @@ -1145,10 +1148,10 @@ emit_scaled_op(isel_context* ctx, Builder& bld, Definition dst, Temp val, aco_op /* multiply by 16777216 to handle denormals */ Temp is_denormal = bld.vopc(aco_opcode::v_cmp_class_f32, bld.hint_vcc(bld.def(bld.lm)), as_vgpr(ctx, val), - bld.copy(bld.def(v1), Operand((1u << 7) | (1u << 4)))); - Temp scaled = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x4b800000u), val); + bld.copy(bld.def(v1), Operand::c32((1u << 7) | (1u << 4)))); + Temp scaled = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x4b800000u), val); scaled = bld.vop1(op, bld.def(v1), scaled); - scaled = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(undo), scaled); + scaled = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(undo), scaled); Temp not_scaled = bld.vop1(op, bld.def(v1), val); @@ -1215,12 +1218,13 @@ emit_trunc_f64(isel_context* ctx, Builder& bld, Definition dst, Temp val) bld.pseudo(aco_opcode::p_split_vector, Definition(val_lo), Definition(val_hi), val); /* Extract the exponent and compute the unbiased value. */ - Temp exponent = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), val_hi, Operand(20u), Operand(11u)); - exponent = bld.vsub32(bld.def(v1), exponent, Operand(1023u)); + Temp exponent = + bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), val_hi, Operand::c32(20u), Operand::c32(11u)); + exponent = bld.vsub32(bld.def(v1), exponent, Operand::c32(1023u)); /* Extract the fractional part. */ - Temp fract_mask = - bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(-1u), Operand(0x000fffffu)); + Temp fract_mask = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::c32(-1u), + Operand::c32(0x000fffffu)); fract_mask = bld.vop3(aco_opcode::v_lshr_b64, bld.def(v2), fract_mask, exponent); Temp fract_mask_lo = bld.tmp(v1), fract_mask_hi = bld.tmp(v1); @@ -1234,15 +1238,15 @@ emit_trunc_f64(isel_context* ctx, Builder& bld, Definition dst, Temp val) fract_hi = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), val_hi, tmp); /* Get the sign bit. */ - Temp sign = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0x80000000u), val_hi); + Temp sign = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x80000000u), val_hi); /* Decide the operation to apply depending on the unbiased exponent. */ - Temp exp_lt0 = - bld.vopc_e64(aco_opcode::v_cmp_lt_i32, bld.hint_vcc(bld.def(bld.lm)), exponent, Operand(0u)); + Temp exp_lt0 = bld.vopc_e64(aco_opcode::v_cmp_lt_i32, bld.hint_vcc(bld.def(bld.lm)), exponent, + Operand::zero()); Temp dst_lo = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), fract_lo, - bld.copy(bld.def(v1), Operand(0u)), exp_lt0); + bld.copy(bld.def(v1), Operand::zero()), exp_lt0); Temp dst_hi = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), fract_hi, sign, exp_lt0); - Temp exp_gt51 = bld.vopc_e64(aco_opcode::v_cmp_gt_i32, bld.def(s2), exponent, Operand(51u)); + Temp exp_gt51 = bld.vopc_e64(aco_opcode::v_cmp_gt_i32, bld.def(s2), exponent, Operand::c32(51u)); dst_lo = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), dst_lo, val_lo, exp_gt51); dst_hi = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), dst_hi, val_hi, exp_gt51); @@ -1259,9 +1263,9 @@ emit_floor_f64(isel_context* ctx, Builder& bld, Definition dst, Temp val) * lowered at NIR level for precision reasons). */ Temp src0 = as_vgpr(ctx, val); - Temp mask = bld.copy(bld.def(s1), Operand(3u)); /* isnan */ - Temp min_val = - bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand(-1u), Operand(0x3fefffffu)); + Temp mask = bld.copy(bld.def(s1), Operand::c32(3u)); /* isnan */ + Temp min_val = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::c32(-1u), + Operand::c32(0x3fefffffu)); Temp isnan = bld.vopc_e64(aco_opcode::v_cmp_class_f64, bld.hint_vcc(bld.def(bld.lm)), src0, mask); @@ -1289,8 +1293,8 @@ uadd32_sat(Builder& bld, Definition dst, Temp src0, Temp src1) { if (bld.program->chip_class < GFX8) { Builder::Result add = bld.vadd32(bld.def(v1), src0, src1, true); - return bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, add.def(0).getTemp(), - Operand((uint32_t)-1), add.def(1).getTemp()); + return bld.vop2_e64(aco_opcode::v_cndmask_b32, dst, add.def(0).getTemp(), Operand::c32(-1), + add.def(1).getTemp()); } Builder::Result add(NULL); @@ -1337,7 +1341,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) ctx->allocated_vec.emplace(dst.id(), elems); } else { bool use_s_pack = ctx->program->chip_class >= GFX9; - Temp mask = bld.copy(bld.def(s1), Operand((1u << instr->dest.dest.ssa.bit_size) - 1)); + Temp mask = bld.copy(bld.def(s1), Operand::c32((1u << instr->dest.dest.ssa.bit_size) - 1)); std::array packed; uint32_t const_vals[NIR_MAX_VEC_COMPONENTS] = {}; @@ -1356,7 +1360,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) if (offset) elems[i] = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), elems[i], - Operand(offset)); + Operand::c32(offset)); if (packed[idx].id()) packed[idx] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), elems[i], @@ -1374,10 +1378,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) packed[i * 2 + 1]); else if (packed[i * 2 + 1].id()) packed[i] = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), - Operand(const_vals[i * 2]), packed[i * 2 + 1]); + Operand::c32(const_vals[i * 2]), packed[i * 2 + 1]); else if (packed[i * 2].id()) packed[i] = bld.sop2(aco_opcode::s_pack_ll_b32_b16, bld.def(s1), packed[i * 2], - Operand(const_vals[i * 2 + 1])); + Operand::c32(const_vals[i * 2 + 1])); if (same) const_vals[i] = const_vals[i * 2] | (const_vals[i * 2 + 1] << 16); @@ -1389,9 +1393,9 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) for (unsigned i = 0; i < dst.size(); i++) { if (const_vals[i] && packed[i].id()) packed[i] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), - Operand(const_vals[i]), packed[i]); + Operand::c32(const_vals[i]), packed[i]); else if (!packed[i].id()) - packed[i] = bld.copy(bld.def(s1), Operand(const_vals[i])); + packed[i] = bld.copy(bld.def(s1), Operand::c32(const_vals[i])); } if (dst.size() == 1) @@ -1446,7 +1450,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) bld.sop1(aco_opcode::s_abs_i32, Definition(dst), bld.def(s1, scc), src); } else if (dst.regClass() == v1) { bld.vop2(aco_opcode::v_max_i32, Definition(dst), src, - bld.vsub32(bld.def(v1), Operand(0u), src)); + bld.vsub32(bld.def(v1), Operand::zero(), src)); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); } @@ -1455,30 +1459,31 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) case nir_op_isign: { Temp src = get_alu_src(ctx, instr->src[0]); if (dst.regClass() == s1) { - Temp tmp = bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), src, - Operand((uint32_t)-1)); - bld.sop2(aco_opcode::s_min_i32, Definition(dst), bld.def(s1, scc), tmp, Operand(1u)); + Temp tmp = + bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), src, Operand::c32(-1)); + bld.sop2(aco_opcode::s_min_i32, Definition(dst), bld.def(s1, scc), tmp, Operand::c32(1u)); } else if (dst.regClass() == s2) { Temp neg = - bld.sop2(aco_opcode::s_ashr_i64, bld.def(s2), bld.def(s1, scc), src, Operand(63u)); + bld.sop2(aco_opcode::s_ashr_i64, bld.def(s2), bld.def(s1, scc), src, Operand::c32(63u)); Temp neqz; if (ctx->program->chip_class >= GFX8) - neqz = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), src, Operand(0u)); + neqz = bld.sopc(aco_opcode::s_cmp_lg_u64, bld.def(s1, scc), src, Operand::zero()); else - neqz = bld.sop2(aco_opcode::s_or_b64, bld.def(s2), bld.def(s1, scc), src, Operand(0u)) - .def(1) - .getTemp(); + neqz = + bld.sop2(aco_opcode::s_or_b64, bld.def(s2), bld.def(s1, scc), src, Operand::zero()) + .def(1) + .getTemp(); /* SCC gets zero-extended to 64 bit */ bld.sop2(aco_opcode::s_or_b64, Definition(dst), bld.def(s1, scc), neg, bld.scc(neqz)); } else if (dst.regClass() == v1) { - bld.vop3(aco_opcode::v_med3_i32, Definition(dst), Operand((uint32_t)-1), src, Operand(1u)); + bld.vop3(aco_opcode::v_med3_i32, Definition(dst), Operand::c32(-1), src, Operand::c32(1u)); } else if (dst.regClass() == v2) { Temp upper = emit_extract_vector(ctx, src, 1, v1); - Temp neg = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand(31u), upper); + Temp neg = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), upper); Temp gtz = - bld.vopc(aco_opcode::v_cmp_ge_i64, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src); - Temp lower = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(1u), neg, gtz); - upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), neg, gtz); + bld.vopc(aco_opcode::v_cmp_ge_i64, bld.hint_vcc(bld.def(bld.lm)), Operand::zero(), src); + Temp lower = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(1u), neg, gtz); + upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), neg, gtz); bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); @@ -1692,11 +1697,11 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) Temp msb_rev = bld.sop1(op, bld.def(s1), src); Builder::Result sub = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc), - Operand(src.size() * 32u - 1u), msb_rev); + Operand::c32(src.size() * 32u - 1u), msb_rev); Temp msb = sub.def(0).getTemp(); Temp carry = sub.def(1).getTemp(); - bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), Operand((uint32_t)-1), msb, + bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), Operand::c32(-1), msb, bld.scc(carry)); } else if (src.regClass() == v1) { aco_opcode op = @@ -1705,9 +1710,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) emit_vop1_instruction(ctx, instr, op, msb_rev); Temp msb = bld.tmp(v1); Temp carry = - bld.vsub32(Definition(msb), Operand(31u), Operand(msb_rev), true).def(1).getTemp(); - bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), msb, Operand((uint32_t)-1), - carry); + bld.vsub32(Definition(msb), Operand::c32(31u), Operand(msb_rev), true).def(1).getTemp(); + bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), msb, Operand::c32(-1), carry); } else if (src.regClass() == v2) { aco_opcode op = instr->op == nir_op_ufind_msb ? aco_opcode::v_ffbh_u32 : aco_opcode::v_ffbh_i32; @@ -1715,19 +1719,17 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) Temp lo = bld.tmp(v1), hi = bld.tmp(v1); bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), src); - lo = uadd32_sat(bld, bld.def(v1), bld.copy(bld.def(s1), Operand(32u)), + lo = uadd32_sat(bld, bld.def(v1), bld.copy(bld.def(s1), Operand::c32(32u)), bld.vop1(op, bld.def(v1), lo)); hi = bld.vop1(op, bld.def(v1), hi); - Temp found_hi = - bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand((uint32_t)-1), hi); + Temp found_hi = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::c32(-1), hi); Temp msb_rev = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), lo, hi, found_hi); Temp msb = bld.tmp(v1); Temp carry = - bld.vsub32(Definition(msb), Operand(63u), Operand(msb_rev), true).def(1).getTemp(); - bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), msb, Operand((uint32_t)-1), - carry); + bld.vsub32(Definition(msb), Operand::c32(63u), Operand(msb_rev), true).def(1).getTemp(); + bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), msb, Operand::c32(-1), carry); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); } @@ -1796,7 +1798,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) if (dst.regClass() == s1) { Temp tmp = bld.tmp(s1), carry = bld.tmp(s1); bld.sop2(aco_opcode::s_add_u32, Definition(tmp), bld.scc(Definition(carry)), src0, src1); - bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), Operand((uint32_t)-1), tmp, + bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), Operand::c32(-1), tmp, bld.scc(carry)); } else if (dst.regClass() == v2b) { Instruction* add_instr; @@ -1825,7 +1827,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) } if (dst.regClass() == v1) { Temp carry = bld.vadd32(bld.def(v1), src0, src1, true).def(1).getTemp(); - bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand(0u), Operand(1u), carry); + bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), Operand::c32(1u), + carry); break; } @@ -1842,13 +1845,13 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) bld.scc(carry)) .def(1) .getTemp(); - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), carry, Operand(0u)); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), carry, Operand::zero()); } else if (dst.regClass() == v2) { Temp carry = bld.vadd32(bld.def(v1), src00, src10, true).def(1).getTemp(); carry = bld.vadd32(bld.def(v1), src01, src11, true, carry).def(1).getTemp(); - carry = - bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), Operand(1u), carry); - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), carry, Operand(0u)); + carry = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), + Operand::c32(1u), carry); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), carry, Operand::zero()); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); } @@ -1911,7 +1914,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) break; } else if (dst.regClass() == v1) { Temp borrow = bld.vsub32(bld.def(v1), src0, src1, true).def(1).getTemp(); - bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand(0u), Operand(1u), borrow); + bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), Operand::c32(1u), + borrow); break; } @@ -1928,13 +1932,13 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) bld.scc(borrow)) .def(1) .getTemp(); - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), borrow, Operand(0u)); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), borrow, Operand::zero()); } else if (dst.regClass() == v2) { Temp borrow = bld.vsub32(bld.def(v1), src00, src10, true).def(1).getTemp(); borrow = bld.vsub32(bld.def(v1), src01, src11, true, Operand(borrow)).def(1).getTemp(); - borrow = - bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), Operand(1u), borrow); - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), borrow, Operand(0u)); + borrow = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), + Operand::c32(1u), borrow); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), borrow, Operand::zero()); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); } @@ -1968,7 +1972,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) Operand op0(src0); Operand op1(src1); bld.vop3(aco_opcode::v_mad_u32_u16, Definition(dst), bld.set16bit(op0), - bld.set16bit(op1), Operand(0u)); + bld.set16bit(op1), Operand::zero()); } else if (src0_ub <= 0xffffff && src1_ub <= 0xffffff) { emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_u32_u24, dst, true); } else if (nir_src_is_const(instr->src[0].src)) { @@ -2122,9 +2126,9 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) ma = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), ma); Temp sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), src[0], src[1], src[2]); Temp tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), src[0], src[1], src[2]); - sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand(0x3f000000u /*0.5*/), + sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3f000000u /*0.5*/), bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), sc, ma)); - tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand(0x3f000000u /*0.5*/), + tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3f000000u /*0.5*/), bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), tc, ma)); bld.pseudo(aco_opcode::p_create_vector, Definition(dst), sc, tc); break; @@ -2157,24 +2161,24 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) case nir_op_fneg: { if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) { Temp src = get_alu_src_vop3p(ctx, instr->src[0]); - bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand(uint16_t(0xBC00)), + bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand::c16(uint16_t(0xBC00)), instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1); emit_split_vector(ctx, dst, 2); break; } Temp src = get_alu_src(ctx, instr->src[0]); if (dst.regClass() == v2b) { - bld.vop2(aco_opcode::v_mul_f16, Definition(dst), Operand((uint16_t)0xbc00u), - as_vgpr(ctx, src)); + bld.vop2(aco_opcode::v_mul_f16, Definition(dst), Operand::c16(0xbc00u), as_vgpr(ctx, src)); } else if (dst.regClass() == v1) { - bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand(0xbf800000u), as_vgpr(ctx, src)); + bld.vop2(aco_opcode::v_mul_f32, Definition(dst), Operand::c32(0xbf800000u), + as_vgpr(ctx, src)); } else if (dst.regClass() == v2) { if (ctx->block->fp_mode.must_flush_denorms16_64) src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), - Operand(UINT64_C(0x3FF0000000000000)), as_vgpr(ctx, src)); + Operand::c64(UINT64_C(0x3FF0000000000000)), as_vgpr(ctx, src)); Temp upper = bld.tmp(v1), lower = bld.tmp(v1); bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src); - upper = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), Operand(0x80000000u), upper); + upper = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), Operand::c32(0x80000000u), upper); bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); @@ -2185,21 +2189,21 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) Temp src = get_alu_src(ctx, instr->src[0]); if (dst.regClass() == v2b) { Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f16, Definition(dst), - Operand((uint16_t)0x3c00), as_vgpr(ctx, src)) + Operand::c16(0x3c00), as_vgpr(ctx, src)) .instr; mul->vop3().abs[1] = true; } else if (dst.regClass() == v1) { Instruction* mul = bld.vop2_e64(aco_opcode::v_mul_f32, Definition(dst), - Operand(0x3f800000u), as_vgpr(ctx, src)) + Operand::c32(0x3f800000u), as_vgpr(ctx, src)) .instr; mul->vop3().abs[1] = true; } else if (dst.regClass() == v2) { if (ctx->block->fp_mode.must_flush_denorms16_64) src = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), - Operand(UINT64_C(0x3FF0000000000000)), as_vgpr(ctx, src)); + Operand::c64(UINT64_C(0x3FF0000000000000)), as_vgpr(ctx, src)); Temp upper = bld.tmp(v1), lower = bld.tmp(v1); bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src); - upper = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0x7FFFFFFFu), upper); + upper = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7FFFFFFFu), upper); bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); @@ -2209,24 +2213,25 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) case nir_op_fsat: { if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) { Temp src = get_alu_src_vop3p(ctx, instr->src[0]); - Instruction* vop3p = - bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, Operand(uint16_t(0x3C00)), - instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1); + Instruction* vop3p = bld.vop3p(aco_opcode::v_pk_mul_f16, Definition(dst), src, + Operand::c16(uint16_t(0x3C00)), + instr->src[0].swizzle[0] & 1, instr->src[0].swizzle[1] & 1); vop3p->vop3p().clamp = true; emit_split_vector(ctx, dst, 2); break; } Temp src = get_alu_src(ctx, instr->src[0]); if (dst.regClass() == v2b) { - bld.vop3(aco_opcode::v_med3_f16, Definition(dst), Operand((uint16_t)0u), - Operand((uint16_t)0x3c00), src); + bld.vop3(aco_opcode::v_med3_f16, Definition(dst), Operand::c16(0u), Operand::c16(0x3c00), + src); } else if (dst.regClass() == v1) { - bld.vop3(aco_opcode::v_med3_f32, Definition(dst), Operand(0u), Operand(0x3f800000u), src); + bld.vop3(aco_opcode::v_med3_f32, Definition(dst), Operand::zero(), + Operand::c32(0x3f800000u), src); /* apparently, it is not necessary to flush denorms if this instruction is used with these * operands */ // TODO: confirm that this holds under any circumstances } else if (dst.regClass() == v2) { - Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand(0u)); + Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand::zero()); add->vop3().clamp = true; } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); @@ -2323,16 +2328,17 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) */ Temp src0 = get_alu_src(ctx, instr->src[0]); Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src0); - Temp tmp0 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.def(bld.lm), src0, Operand(0u)); + Temp tmp0 = + bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.def(bld.lm), src0, Operand::zero()); Temp tmp1 = bld.vopc(aco_opcode::v_cmp_lg_f64, bld.hint_vcc(bld.def(bld.lm)), src0, trunc); Temp cond = bld.sop2(aco_opcode::s_and_b64, bld.hint_vcc(bld.def(s2)), bld.def(s1, scc), tmp0, tmp1); - Temp add = - bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), bld.copy(bld.def(v1), Operand(0u)), - bld.copy(bld.def(v1), Operand(0x3ff00000u)), cond); + Temp add = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), + bld.copy(bld.def(v1), Operand::zero()), + bld.copy(bld.def(v1), Operand::c32(0x3ff00000u)), cond); add = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), - bld.copy(bld.def(v1), Operand(0u)), add); + bld.copy(bld.def(v1), Operand::zero()), add); bld.vop3(aco_opcode::v_add_f64, Definition(dst), trunc, add); } } else { @@ -2367,21 +2373,22 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) Temp src0 = get_alu_src(ctx, instr->src[0]); bld.pseudo(aco_opcode::p_split_vector, Definition(src0_lo), Definition(src0_hi), src0); - Temp bitmask = - bld.sop1(aco_opcode::s_brev_b32, bld.def(s1), bld.copy(bld.def(s1), Operand(-2u))); - Temp bfi = bld.vop3(aco_opcode::v_bfi_b32, bld.def(v1), bitmask, - bld.copy(bld.def(v1), Operand(0x43300000u)), as_vgpr(ctx, src0_hi)); + Temp bitmask = bld.sop1(aco_opcode::s_brev_b32, bld.def(s1), + bld.copy(bld.def(s1), Operand::c32(-2u))); + Temp bfi = + bld.vop3(aco_opcode::v_bfi_b32, bld.def(v1), bitmask, + bld.copy(bld.def(v1), Operand::c32(0x43300000u)), as_vgpr(ctx, src0_hi)); Temp tmp = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), src0, - bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi)); + bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), bfi)); Instruction* sub = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), tmp, - bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi)); + bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), bfi)); sub->vop3().neg[1] = true; tmp = sub->definitions[0].getTemp(); - Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(-1u), - Operand(0x432fffffu)); + Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::c32(-1u), + Operand::c32(0x432fffffu)); Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.hint_vcc(bld.def(bld.lm)), src0, v); vop3->vop3().abs[0] = true; @@ -2406,13 +2413,13 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) Temp src = as_vgpr(ctx, get_alu_src(ctx, instr->src[0])); aco_ptr norm; if (dst.regClass() == v2b) { - Temp half_pi = bld.copy(bld.def(s1), Operand(0x3118u)); + Temp half_pi = bld.copy(bld.def(s1), Operand::c32(0x3118u)); Temp tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v1), half_pi, src); aco_opcode opcode = instr->op == nir_op_fsin ? aco_opcode::v_sin_f16 : aco_opcode::v_cos_f16; bld.vop1(opcode, Definition(dst), tmp); } else if (dst.regClass() == v1) { - Temp half_pi = bld.copy(bld.def(s1), Operand(0x3e22f983u)); + Temp half_pi = bld.copy(bld.def(s1), Operand::c32(0x3e22f983u)); Temp tmp = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), half_pi, src); /* before GFX9, v_sin_f32 and v_cos_f32 had a valid input domain of [-256, +256] */ @@ -2455,7 +2462,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) if (instr->src[0].src.ssa->bit_size == 16) { Temp src = get_alu_src(ctx, instr->src[0]); Temp tmp = bld.vop1(aco_opcode::v_frexp_exp_i16_f16, bld.def(v1), src); - tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), tmp, Operand(0u)); + tmp = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v1b), tmp, Operand::zero()); convert_int(ctx, bld, tmp, 8, 32, true, dst); } else if (instr->src[0].src.ssa->bit_size == 32) { emit_vop1_instruction(ctx, instr, aco_opcode::v_frexp_exp_i32_f32, dst); @@ -2471,27 +2478,28 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) if (dst.regClass() == v2b) { assert(ctx->program->chip_class >= GFX9); /* replace negative zero with positive zero */ - src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand(0u), src); - src = bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand((uint16_t)-1), src, - Operand((uint16_t)1u)); + src = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), Operand::zero(), src); + src = + bld.vop3(aco_opcode::v_med3_i16, bld.def(v2b), Operand::c16(-1), src, Operand::c16(1u)); bld.vop1(aco_opcode::v_cvt_f16_i16, Definition(dst), src); } else if (dst.regClass() == v1) { - src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand(0u), src); + src = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::zero(), src); src = - bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand((uint32_t)-1), src, Operand(1u)); + bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand::c32(-1), src, Operand::c32(1u)); bld.vop1(aco_opcode::v_cvt_f32_i32, Definition(dst), src); } else if (dst.regClass() == v2) { - Temp cond = - bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src); - Temp tmp = bld.copy(bld.def(v1), Operand(0x3FF00000u)); + Temp cond = bld.vopc(aco_opcode::v_cmp_nlt_f64, bld.hint_vcc(bld.def(bld.lm)), + Operand::zero(), src); + Temp tmp = bld.copy(bld.def(v1), Operand::c32(0x3FF00000u)); Temp upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), tmp, emit_extract_vector(ctx, src, 1, v1), cond); - cond = bld.vopc(aco_opcode::v_cmp_le_f64, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), src); - tmp = bld.copy(bld.def(v1), Operand(0xBFF00000u)); + cond = + bld.vopc(aco_opcode::v_cmp_le_f64, bld.hint_vcc(bld.def(bld.lm)), Operand::zero(), src); + tmp = bld.copy(bld.def(v1), Operand::c32(0xBFF00000u)); upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), tmp, upper, cond); - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand(0u), upper); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(), upper); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); } @@ -2518,7 +2526,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) if (ctx->block->fp_mode.round16_64 == fp_round_tz) bld.vop1(aco_opcode::v_cvt_f16_f32, Definition(dst), src); else if (ctx->program->chip_class == GFX8 || ctx->program->chip_class == GFX9) - bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, Definition(dst), src, Operand(0u)); + bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, Definition(dst), src, Operand::zero()); else bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, Definition(dst), src, as_vgpr(ctx, src)); break; @@ -2592,7 +2600,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src); lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower); upper = bld.vop1(aco_opcode::v_cvt_f64_i32, bld.def(v2), upper); - upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand(32u)); + upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand::c32(32u)); upper = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), lower, upper); bld.vop1(aco_opcode::v_cvt_f32_f64, Definition(dst), upper); } @@ -2612,7 +2620,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src); lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower); upper = bld.vop1(aco_opcode::v_cvt_f64_i32, bld.def(v2), upper); - upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand(32u)); + upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand::c32(32u)); bld.vop3(aco_opcode::v_add_f64, Definition(dst), lower, upper); } else { @@ -2667,7 +2675,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src); lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower); upper = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), upper); - upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand(32u)); + upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand::c32(32u)); upper = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), lower, upper); bld.vop1(aco_opcode::v_cvt_f32_f64, Definition(dst), upper); } @@ -2686,7 +2694,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), src); lower = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), lower); upper = bld.vop1(aco_opcode::v_cvt_f64_u32, bld.def(v2), upper); - upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand(32u)); + upper = bld.vop3(aco_opcode::v_ldexp_f64, bld.def(v2), upper, Operand::c32(32u)); bld.vop3(aco_opcode::v_add_f64, Definition(dst), lower, upper); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); @@ -2784,25 +2792,25 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::vgpr) { Temp exponent = bld.vop1(aco_opcode::v_frexp_exp_i32_f32, bld.def(v1), src); - exponent = - bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand(0x0u), exponent, Operand(64u)); - Temp mantissa = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0x7fffffu), src); - Temp sign = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand(31u), src); - mantissa = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand(0x800000u), mantissa); - mantissa = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(7u), mantissa); - mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), mantissa); + exponent = bld.vop3(aco_opcode::v_med3_i32, bld.def(v1), Operand::zero(), exponent, + Operand::c32(64u)); + Temp mantissa = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffu), src); + Temp sign = bld.vop2(aco_opcode::v_ashrrev_i32, bld.def(v1), Operand::c32(31u), src); + mantissa = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(0x800000u), mantissa); + mantissa = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(7u), mantissa); + mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), mantissa); Temp new_exponent = bld.tmp(v1); Temp borrow = - bld.vsub32(Definition(new_exponent), Operand(63u), exponent, true).def(1).getTemp(); + bld.vsub32(Definition(new_exponent), Operand::c32(63u), exponent, true).def(1).getTemp(); if (ctx->program->chip_class >= GFX8) mantissa = bld.vop3(aco_opcode::v_lshrrev_b64, bld.def(v2), new_exponent, mantissa); else mantissa = bld.vop3(aco_opcode::v_lshr_b64, bld.def(v2), mantissa, new_exponent); - Temp saturate = bld.vop1(aco_opcode::v_bfrev_b32, bld.def(v1), Operand(0xfffffffeu)); + Temp saturate = bld.vop1(aco_opcode::v_bfrev_b32, bld.def(v1), Operand::c32(0xfffffffeu)); Temp lower = bld.tmp(v1), upper = bld.tmp(v1); bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), mantissa); - lower = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), lower, Operand(0xffffffffu), - borrow); + lower = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), lower, + Operand::c32(0xffffffffu), borrow); upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), upper, saturate, borrow); lower = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), sign, lower); upper = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), sign, upper); @@ -2814,30 +2822,30 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) } else if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::sgpr) { if (src.type() == RegType::vgpr) src = bld.as_uniform(src); - Temp exponent = - bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src, Operand(0x80017u)); - exponent = - bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc), exponent, Operand(126u)); - exponent = - bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), Operand(0u), exponent); - exponent = - bld.sop2(aco_opcode::s_min_i32, bld.def(s1), bld.def(s1, scc), Operand(64u), exponent); - Temp mantissa = - bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), Operand(0x7fffffu), src); + Temp exponent = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src, + Operand::c32(0x80017u)); + exponent = bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc), exponent, + Operand::c32(126u)); + exponent = bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), Operand::zero(), + exponent); + exponent = bld.sop2(aco_opcode::s_min_i32, bld.def(s1), bld.def(s1, scc), + Operand::c32(64u), exponent); + Temp mantissa = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), + Operand::c32(0x7fffffu), src); Temp sign = - bld.sop2(aco_opcode::s_ashr_i32, bld.def(s1), bld.def(s1, scc), src, Operand(31u)); + bld.sop2(aco_opcode::s_ashr_i32, bld.def(s1), bld.def(s1, scc), src, Operand::c32(31u)); mantissa = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), - Operand(0x800000u), mantissa); - mantissa = - bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), mantissa, Operand(7u)); - mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand(0u), mantissa); - exponent = - bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc), Operand(63u), exponent); + Operand::c32(0x800000u), mantissa); + mantissa = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), mantissa, + Operand::c32(7u)); + mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(), mantissa); + exponent = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc), + Operand::c32(63u), exponent); mantissa = bld.sop2(aco_opcode::s_lshr_b64, bld.def(s2), bld.def(s1, scc), mantissa, exponent); Temp cond = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), exponent, - Operand(0xffffffffu)); // exp >= 64 - Temp saturate = bld.sop1(aco_opcode::s_brev_b64, bld.def(s2), Operand(0xfffffffeu)); + Operand::c32(0xffffffffu)); // exp >= 64 + Temp saturate = bld.sop1(aco_opcode::s_brev_b64, bld.def(s2), Operand::c32(0xfffffffeu)); mantissa = bld.sop2(aco_opcode::s_cselect_b64, bld.def(s2), saturate, mantissa, cond); Temp lower = bld.tmp(s1), upper = bld.tmp(s1); bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), mantissa); @@ -2851,12 +2859,12 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper); } else if (instr->src[0].src.ssa->bit_size == 64) { - Temp vec = - bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand(0u), Operand(0x3df00000u)); + Temp vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(), + Operand::c32(0x3df00000u)); Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src); Temp mul = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), trunc, vec); - vec = - bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand(0u), Operand(0xc1f00000u)); + vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(), + Operand::c32(0xc1f00000u)); Temp floor = emit_floor_f64(ctx, bld, bld.def(v2), mul); Temp fma = bld.vop3(aco_opcode::v_fma_f64, bld.def(v2), floor, vec, trunc); Temp lower = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), fma); @@ -2880,16 +2888,16 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::vgpr) { Temp exponent = bld.vop1(aco_opcode::v_frexp_exp_i32_f32, bld.def(v1), src); Temp exponent_in_range = bld.vopc(aco_opcode::v_cmp_ge_i32, bld.hint_vcc(bld.def(bld.lm)), - Operand(64u), exponent); - exponent = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), Operand(0x0u), exponent); - Temp mantissa = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0x7fffffu), src); - mantissa = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand(0x800000u), mantissa); - Temp exponent_small = bld.vsub32(bld.def(v1), Operand(24u), exponent); + Operand::c32(64u), exponent); + exponent = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), Operand::zero(), exponent); + Temp mantissa = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffu), src); + mantissa = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(0x800000u), mantissa); + Temp exponent_small = bld.vsub32(bld.def(v1), Operand::c32(24u), exponent); Temp small = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), exponent_small, mantissa); - mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), mantissa); + mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand::zero(), mantissa); Temp new_exponent = bld.tmp(v1); Temp cond_small = - bld.vsub32(Definition(new_exponent), exponent, Operand(24u), true).def(1).getTemp(); + bld.vsub32(Definition(new_exponent), exponent, Operand::c32(24u), true).def(1).getTemp(); if (ctx->program->chip_class >= GFX8) mantissa = bld.vop3(aco_opcode::v_lshlrev_b64, bld.def(v2), new_exponent, mantissa); else @@ -2897,54 +2905,56 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) Temp lower = bld.tmp(v1), upper = bld.tmp(v1); bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), mantissa); lower = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), lower, small, cond_small); - upper = - bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), upper, Operand(0u), cond_small); - lower = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0xffffffffu), lower, + upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), upper, Operand::zero(), + cond_small); + lower = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(0xffffffffu), lower, exponent_in_range); - upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0xffffffffu), upper, + upper = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::c32(0xffffffffu), upper, exponent_in_range); bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper); } else if (instr->src[0].src.ssa->bit_size <= 32 && dst.type() == RegType::sgpr) { if (src.type() == RegType::vgpr) src = bld.as_uniform(src); - Temp exponent = - bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src, Operand(0x80017u)); - exponent = - bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc), exponent, Operand(126u)); - exponent = - bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), Operand(0u), exponent); - Temp mantissa = - bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), Operand(0x7fffffu), src); + Temp exponent = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), src, + Operand::c32(0x80017u)); + exponent = bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc), exponent, + Operand::c32(126u)); + exponent = bld.sop2(aco_opcode::s_max_i32, bld.def(s1), bld.def(s1, scc), Operand::zero(), + exponent); + Temp mantissa = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), + Operand::c32(0x7fffffu), src); mantissa = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), - Operand(0x800000u), mantissa); - Temp exponent_small = - bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc), Operand(24u), exponent); + Operand::c32(0x800000u), mantissa); + Temp exponent_small = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc), + Operand::c32(24u), exponent); Temp small = bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), mantissa, exponent_small); - mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand(0u), mantissa); - Temp exponent_large = - bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc), exponent, Operand(24u)); + mantissa = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(), mantissa); + Temp exponent_large = bld.sop2(aco_opcode::s_sub_u32, bld.def(s1), bld.def(s1, scc), + exponent, Operand::c32(24u)); mantissa = bld.sop2(aco_opcode::s_lshl_b64, bld.def(s2), bld.def(s1, scc), mantissa, exponent_large); - Temp cond = bld.sopc(aco_opcode::s_cmp_ge_i32, bld.def(s1, scc), Operand(64u), exponent); - mantissa = - bld.sop2(aco_opcode::s_cselect_b64, bld.def(s2), mantissa, Operand(0xffffffffu), cond); + Temp cond = + bld.sopc(aco_opcode::s_cmp_ge_i32, bld.def(s1, scc), Operand::c32(64u), exponent); + mantissa = bld.sop2(aco_opcode::s_cselect_b64, bld.def(s2), mantissa, + Operand::c32(0xffffffffu), cond); Temp lower = bld.tmp(s1), upper = bld.tmp(s1); bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), mantissa); Temp cond_small = - bld.sopc(aco_opcode::s_cmp_le_i32, bld.def(s1, scc), exponent, Operand(24u)); + bld.sopc(aco_opcode::s_cmp_le_i32, bld.def(s1, scc), exponent, Operand::c32(24u)); lower = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), small, lower, cond_small); - upper = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand(0u), upper, cond_small); + upper = + bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::zero(), upper, cond_small); bld.pseudo(aco_opcode::p_create_vector, Definition(dst), lower, upper); } else if (instr->src[0].src.ssa->bit_size == 64) { - Temp vec = - bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand(0u), Operand(0x3df00000u)); + Temp vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(), + Operand::c32(0x3df00000u)); Temp trunc = emit_trunc_f64(ctx, bld, bld.def(v2), src); Temp mul = bld.vop3(aco_opcode::v_mul_f64, bld.def(v2), trunc, vec); - vec = - bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand(0u), Operand(0xc1f00000u)); + vec = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand::zero(), + Operand::c32(0xc1f00000u)); Temp floor = emit_floor_f64(ctx, bld, bld.def(v2), mul); Temp fma = bld.vop3(aco_opcode::v_fma_f64, bld.def(v2), floor, vec, trunc); Temp lower = bld.vop1(aco_opcode::v_cvt_u32_f64, bld.def(v1), fma); @@ -2966,10 +2976,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) if (dst.regClass() == s1) { src = bool_to_scalar_condition(ctx, src); - bld.sop2(aco_opcode::s_mul_i32, Definition(dst), Operand(0x3c00u), src); + bld.sop2(aco_opcode::s_mul_i32, Definition(dst), Operand::c32(0x3c00u), src); } else if (dst.regClass() == v2b) { - Temp one = bld.copy(bld.def(v1), Operand(0x3c00u)); - bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand(0u), one, src); + Temp one = bld.copy(bld.def(v1), Operand::c32(0x3c00u)); + bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), one, src); } else { unreachable("Wrong destination register class for nir_op_b2f16."); } @@ -2981,10 +2991,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) if (dst.regClass() == s1) { src = bool_to_scalar_condition(ctx, src); - bld.sop2(aco_opcode::s_mul_i32, Definition(dst), Operand(0x3f800000u), src); + bld.sop2(aco_opcode::s_mul_i32, Definition(dst), Operand::c32(0x3f800000u), src); } else if (dst.regClass() == v1) { - bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand(0u), Operand(0x3f800000u), - src); + bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), + Operand::c32(0x3f800000u), src); } else { unreachable("Wrong destination register class for nir_op_b2f32."); } @@ -2996,12 +3006,13 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) if (dst.regClass() == s2) { src = bool_to_scalar_condition(ctx, src); - bld.sop2(aco_opcode::s_cselect_b64, Definition(dst), Operand(0x3f800000u), Operand(0u), - bld.scc(src)); + bld.sop2(aco_opcode::s_cselect_b64, Definition(dst), Operand::c32(0x3f800000u), + Operand::zero(), bld.scc(src)); } else if (dst.regClass() == v2) { - Temp one = bld.copy(bld.def(v2), Operand(0x3FF00000u)); - Temp upper = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), one, src); - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand(0u), upper); + Temp one = bld.copy(bld.def(v2), Operand::c32(0x3FF00000u)); + Temp upper = + bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), one, src); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(), upper); } else { unreachable("Wrong destination register class for nir_op_b2f64."); } @@ -3053,13 +3064,14 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) if (tmp.regClass() == s1) { bool_to_scalar_condition(ctx, src, tmp); } else if (tmp.type() == RegType::vgpr) { - bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(tmp), Operand(0u), Operand(1u), src); + bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(tmp), Operand::zero(), Operand::c32(1u), + src); } else { unreachable("Invalid register class for b2i32"); } if (tmp != dst) - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand(0u)); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), tmp, Operand::zero()); break; } case nir_op_b2b1: @@ -3071,19 +3083,20 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) assert(src.regClass() == v1 || src.regClass() == v2); assert(dst.regClass() == bld.lm); bld.vopc(src.size() == 2 ? aco_opcode::v_cmp_lg_u64 : aco_opcode::v_cmp_lg_u32, - Definition(dst), Operand(0u), src) + Definition(dst), Operand::zero(), src) .def(0) .setHint(vcc); } else { assert(src.regClass() == s1 || src.regClass() == s2); Temp tmp; if (src.regClass() == s2 && ctx->program->chip_class <= GFX7) { - tmp = bld.sop2(aco_opcode::s_or_b64, bld.def(s2), bld.def(s1, scc), Operand(0u), src) - .def(1) - .getTemp(); + tmp = + bld.sop2(aco_opcode::s_or_b64, bld.def(s2), bld.def(s1, scc), Operand::zero(), src) + .def(1) + .getTemp(); } else { tmp = bld.sopc(src.size() == 2 ? aco_opcode::s_cmp_lg_u64 : aco_opcode::s_cmp_lg_u32, - bld.scc(bld.def(s1)), Operand(0u), src); + bld.scc(bld.def(s1)), Operand::zero(), src); } bool_to_vector_condition(ctx, tmp, dst); } @@ -3124,7 +3137,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) get_alu_src(ctx, instr->src[0])); } else { bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc), - get_alu_src(ctx, instr->src[0]), Operand(1u), Operand(16u), Operand(0u)); + get_alu_src(ctx, instr->src[0]), Operand::c32(1u), Operand::c32(16u), + Operand::zero()); } break; case nir_op_pack_32_2x16_split: { @@ -3135,9 +3149,10 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) src1 = emit_extract_vector(ctx, src1, 0, v2b); bld.pseudo(aco_opcode::p_create_vector, Definition(dst), src0, src1); } else { - src0 = - bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), src0, Operand(0xFFFFu)); - src1 = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), src1, Operand(16u)); + src0 = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), src0, + Operand::c32(0xFFFFu)); + src1 = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), src1, + Operand::c32(16u)); bld.sop2(aco_opcode::s_or_b32, Definition(dst), bld.def(s1, scc), src0, src1); } break; @@ -3184,7 +3199,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) case nir_op_unpack_half_2x16_split_y: { Temp src = get_alu_src(ctx, instr->src[0]); if (src.regClass() == s1) - src = bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), src, Operand(16u)); + src = + bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), src, Operand::c32(16u)); else src = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), src).def(1).getTemp(); @@ -3208,8 +3224,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) Temp f32, cmp_res; if (ctx->program->chip_class >= GFX8) { - Temp mask = bld.copy(bld.def(s1), - Operand(0x36Fu)); /* value is NOT negative/positive denormal value */ + Temp mask = bld.copy( + bld.def(s1), Operand::c32(0x36Fu)); /* value is NOT negative/positive denormal value */ cmp_res = bld.vopc_e64(aco_opcode::v_cmp_class_f16, bld.hint_vcc(bld.def(bld.lm)), f16, mask); f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16); @@ -3218,21 +3234,21 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) * so compare the result and flush to 0 if it's smaller. */ f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16); - Temp smallest = bld.copy(bld.def(s1), Operand(0x38800000u)); + Temp smallest = bld.copy(bld.def(s1), Operand::c32(0x38800000u)); Instruction* tmp0 = bld.vopc_e64(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), f32, smallest); tmp0->vop3().abs[0] = true; Temp tmp1 = - bld.vopc(aco_opcode::v_cmp_lg_f32, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), f32); + bld.vopc(aco_opcode::v_cmp_lg_f32, bld.hint_vcc(bld.def(bld.lm)), Operand::zero(), f32); cmp_res = bld.sop2(aco_opcode::s_nand_b64, bld.def(s2), bld.def(s1, scc), tmp0->definitions[0].getTemp(), tmp1); } if (ctx->block->fp_mode.preserve_signed_zero_inf_nan32) { Temp copysign_0 = - bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0u), as_vgpr(ctx, src)); + bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::zero(), as_vgpr(ctx, src)); bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), copysign_0, f32, cmp_res); } else { - bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand(0u), f32, cmp_res); + bld.vop2(aco_opcode::v_cndmask_b32, Definition(dst), Operand::zero(), f32, cmp_res); } break; } @@ -3261,7 +3277,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) nir_const_value* const_insert = nir_src_as_const_value(instr->src[1].src); Operand lhs; if (const_insert && const_bitmask) { - lhs = Operand(const_insert->u32 & const_bitmask->u32); + lhs = Operand::c32(const_insert->u32 & const_bitmask->u32); } else { insert = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), insert, bitmask); @@ -3271,7 +3287,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) Operand rhs; nir_const_value* const_base = nir_src_as_const_value(instr->src[2].src); if (const_base && const_bitmask) { - rhs = Operand(const_base->u32 & ~const_bitmask->u32); + rhs = Operand::c32(const_base->u32 & ~const_bitmask->u32); } else { base = bld.sop2(aco_opcode::s_andn2_b32, bld.def(s1), bld.def(s1, scc), base, bitmask); rhs = Operand(base); @@ -3300,7 +3316,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) uint32_t extract = (const_bits->u32 << 16) | (const_offset->u32 & 0x1f); aco_opcode opcode = instr->op == nir_op_ubfe ? aco_opcode::s_bfe_u32 : aco_opcode::s_bfe_i32; - bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand(extract)); + bld.sop2(opcode, Definition(dst), bld.def(s1, scc), base, Operand::c32(extract)); break; } @@ -3312,12 +3328,13 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), base, mask); bld.sop2(aco_opcode::s_lshr_b32, Definition(dst), bld.def(s1, scc), masked, offset); } else { - Operand bits_op = const_bits ? Operand(const_bits->u32 << 16) + Operand bits_op = const_bits ? Operand::c32(const_bits->u32 << 16) : bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), - bld.def(s1, scc), bits, Operand(16u)); - Operand offset_op = const_offset ? Operand(const_offset->u32 & 0x1fu) - : bld.sop2(aco_opcode::s_and_b32, bld.def(s1), - bld.def(s1, scc), offset, Operand(0x1fu)); + bld.def(s1, scc), bits, Operand::c32(16u)); + Operand offset_op = const_offset + ? Operand::c32(const_offset->u32 & 0x1fu) + : bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), + offset, Operand::c32(0x1fu)); Temp extract = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), bits_op, offset_op); @@ -3351,7 +3368,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) } index += swizzle * instr->dest.dest.ssa.bit_size / bits; bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc), Operand(vec), - Operand(index), Operand(bits), Operand((uint32_t)is_signed)); + Operand::c32(index), Operand::c32(bits), Operand::c32(is_signed)); } else { Temp src = get_alu_src(ctx, instr->src[0]); Definition def(dst); @@ -3362,15 +3379,16 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) } assert(def.bytes() <= 4); if (def.regClass() == s1) { - bld.pseudo(aco_opcode::p_extract, def, bld.def(s1, scc), Operand(src), Operand(index), - Operand(bits), Operand((uint32_t)is_signed)); + bld.pseudo(aco_opcode::p_extract, def, bld.def(s1, scc), Operand(src), + Operand::c32(index), Operand::c32(bits), Operand::c32(is_signed)); } else { src = emit_extract_vector(ctx, src, 0, def.regClass()); - bld.pseudo(aco_opcode::p_extract, def, Operand(src), Operand(index), Operand(bits), - Operand((uint32_t)is_signed)); + bld.pseudo(aco_opcode::p_extract, def, Operand(src), Operand::c32(index), + Operand::c32(bits), Operand::c32(is_signed)); } if (dst.size() == 2) - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), def.getTemp(), Operand(0u)); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), def.getTemp(), + Operand::zero()); } break; } @@ -3393,16 +3411,19 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) def = bld.def(src.type(), 1); } if (def.regClass() == s1) { - bld.pseudo(aco_opcode::p_insert, def, bld.def(s1, scc), Operand(src), Operand(index), - Operand(bits)); + bld.pseudo(aco_opcode::p_insert, def, bld.def(s1, scc), Operand(src), + Operand::c32(index), Operand::c32(bits)); } else { src = emit_extract_vector(ctx, src, 0, def.regClass()); - bld.pseudo(aco_opcode::p_insert, def, Operand(src), Operand(index), Operand(bits)); + bld.pseudo(aco_opcode::p_insert, def, Operand(src), Operand::c32(index), + Operand::c32(bits)); } if (dst.size() == 2 && swap) - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand(0u), def.getTemp()); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), Operand::zero(), + def.getTemp()); else if (dst.size() == 2) - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), def.getTemp(), Operand(0u)); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), def.getTemp(), + Operand::zero()); } break; } @@ -3411,11 +3432,11 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr) if (src.regClass() == s1) { bld.sop1(aco_opcode::s_bcnt1_i32_b32, Definition(dst), bld.def(s1, scc), src); } else if (src.regClass() == v1) { - bld.vop3(aco_opcode::v_bcnt_u32_b32, Definition(dst), src, Operand(0u)); + bld.vop3(aco_opcode::v_bcnt_u32_b32, Definition(dst), src, Operand::zero()); } else if (src.regClass() == v2) { bld.vop3(aco_opcode::v_bcnt_u32_b32, Definition(dst), emit_extract_vector(ctx, src, 1, v1), bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), - emit_extract_vector(ctx, src, 0, v1), Operand(0u))); + emit_extract_vector(ctx, src, 0, v1), Operand::zero())); } else if (src.regClass() == s2) { bld.sop1(aco_opcode::s_bcnt1_i32_b64, Definition(dst), bld.def(s1, scc), src); } else { @@ -3537,25 +3558,25 @@ visit_load_const(isel_context* ctx, nir_load_const_instr* instr) if (instr->def.bit_size == 1) { assert(dst.regClass() == bld.lm); int val = instr->value[0].b ? -1 : 0; - Operand op = bld.lm.size() == 1 ? Operand((uint32_t)val) : Operand((uint64_t)val); + Operand op = bld.lm.size() == 1 ? Operand::c32(val) : Operand::c64(val); bld.copy(Definition(dst), op); } else if (instr->def.bit_size == 8) { - bld.copy(Definition(dst), Operand((uint32_t)instr->value[0].u8)); + bld.copy(Definition(dst), Operand::c32(instr->value[0].u8)); } else if (instr->def.bit_size == 16) { /* sign-extend to use s_movk_i32 instead of a literal */ - bld.copy(Definition(dst), Operand((uint32_t)instr->value[0].i16)); + bld.copy(Definition(dst), Operand::c32(instr->value[0].i16)); } else if (dst.size() == 1) { - bld.copy(Definition(dst), Operand(instr->value[0].u32)); + bld.copy(Definition(dst), Operand::c32(instr->value[0].u32)); } else { assert(dst.size() != 1); aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)}; if (instr->def.bit_size == 64) for (unsigned i = 0; i < dst.size(); i++) - vec->operands[i] = Operand{(uint32_t)(instr->value[0].u64 >> i * 32)}; + vec->operands[i] = Operand::c32(instr->value[0].u64 >> i * 32); else { for (unsigned i = 0; i < dst.size(); i++) - vec->operands[i] = Operand{instr->value[i].u32}; + vec->operands[i] = Operand::c32(instr->value[i].u32); } vec->definitions[0] = Definition(dst); ctx->block->instructions.emplace_back(std::move(vec)); @@ -3665,12 +3686,12 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info, } Temp offset_tmp = offset.isTemp() ? offset.getTemp() : Temp(); if (offset.isConstant()) { - offset = Operand(offset.constantValue() + to_add); + offset = Operand::c32(offset.constantValue() + to_add); } else if (offset_tmp.regClass() == s1) { offset = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), offset_tmp, - Operand(to_add)); + Operand::c32(to_add)); } else if (offset_tmp.regClass() == v1) { - offset = bld.vadd32(bld.def(v1), offset_tmp, Operand(to_add)); + offset = bld.vadd32(bld.def(v1), offset_tmp, Operand::c32(to_add)); } else { Temp lo = bld.tmp(offset_tmp.type(), 1); Temp hi = bld.tmp(offset_tmp.type(), 1); @@ -3679,14 +3700,14 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info, if (offset_tmp.regClass() == s2) { Temp carry = bld.tmp(s1); lo = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), lo, - Operand(to_add)); + Operand::c32(to_add)); hi = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), hi, carry); offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), lo, hi); } else { Temp new_lo = bld.tmp(v1); Temp carry = - bld.vadd32(Definition(new_lo), lo, Operand(to_add), true).def(1).getTemp(); - hi = bld.vadd32(bld.def(v1), hi, Operand(0u), false, carry); + bld.vadd32(Definition(new_lo), lo, Operand::c32(to_add), true).def(1).getTemp(); + hi = bld.vadd32(bld.def(v1), hi, Operand::zero(), false, carry); offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), new_lo, hi); } } @@ -3699,20 +3720,20 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info, align = 4; Temp offset_tmp = offset.isTemp() ? offset.getTemp() : Temp(); if (offset.isConstant()) { - aligned_offset = Operand(offset.constantValue() & 0xfffffffcu); + aligned_offset = Operand::c32(offset.constantValue() & 0xfffffffcu); } else if (offset_tmp.regClass() == s1) { aligned_offset = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), - Operand(0xfffffffcu), offset_tmp); + Operand::c32(0xfffffffcu), offset_tmp); } else if (offset_tmp.regClass() == s2) { aligned_offset = bld.sop2(aco_opcode::s_and_b64, bld.def(s2), bld.def(s1, scc), - Operand((uint64_t)0xfffffffffffffffcllu), offset_tmp); + Operand::c64(0xfffffffffffffffcllu), offset_tmp); } else if (offset_tmp.regClass() == v1) { aligned_offset = - bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0xfffffffcu), offset_tmp); + bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0xfffffffcu), offset_tmp); } else if (offset_tmp.regClass() == v2) { Temp hi = bld.tmp(v1), lo = bld.tmp(v1); bld.pseudo(aco_opcode::p_split_vector, Definition(lo), Definition(hi), offset_tmp); - lo = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0xfffffffcu), lo); + lo = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0xfffffffcu), lo); aligned_offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), lo, hi); } } @@ -3731,10 +3752,10 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info, /* shift result right if needed */ if (params.byte_align_loads && info.component_size < 4) { - Operand byte_align_off((uint32_t)byte_align); + Operand byte_align_off = Operand::c32(byte_align); if (byte_align == -1) { if (offset.isConstant()) - byte_align_off = Operand(offset.constantValue() % 4u); + byte_align_off = Operand::c32(offset.constantValue() % 4u); else if (offset.size() == 2) byte_align_off = Operand(emit_extract_vector(ctx, offset.getTemp(), 0, RegClass(offset.getTemp().type(), 1))); @@ -3793,7 +3814,8 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info, assert(i == num_vals); RegClass new_rc = RegClass::get(reg_type, tmp[0].bytes() / component_size * component_size); - tmp[0] = bld.pseudo(aco_opcode::p_extract_vector, bld.def(new_rc), tmp[0], Operand(0u)); + tmp[0] = + bld.pseudo(aco_opcode::p_extract_vector, bld.def(new_rc), tmp[0], Operand::zero()); } RegClass elem_rc = RegClass::get(reg_type, component_size); @@ -3852,7 +3874,7 @@ Operand load_lds_size_m0(Builder& bld) { /* TODO: m0 does not need to be initialized on GFX9+ */ - return bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand(0xffffffffu))); + return bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0xffffffffu))); } Temp @@ -3902,7 +3924,7 @@ lds_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned if (const_offset > (const_offset_range - const_offset_unit)) { unsigned excess = const_offset - (const_offset % const_offset_range); - offset = bld.vadd32(bld.def(v1), offset, Operand(excess)); + offset = bld.vadd32(bld.def(v1), offset, Operand::c32(excess)); const_offset -= excess; } @@ -3950,7 +3972,7 @@ smem_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigned load->operands[1] = Operand(offset); } else { load->operands[0] = Operand(offset); - load->operands[1] = Operand(0u); + load->operands[1] = Operand::zero(); } RegClass rc(RegType::sgpr, size); Temp val = dst_hint.id() && dst_hint.regClass() == rc ? dst_hint : bld.tmp(rc); @@ -3969,7 +3991,7 @@ mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne unsigned align_, unsigned const_offset, Temp dst_hint) { Operand vaddr = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1); - Operand soffset = offset.type() == RegType::sgpr ? Operand(offset) : Operand((uint32_t)0); + Operand soffset = offset.type() == RegType::sgpr ? Operand(offset) : Operand::c32(0); if (info.soffset.id()) { if (soffset.isTemp()) @@ -4027,10 +4049,10 @@ get_gfx6_global_rsrc(Builder& bld, Temp addr) S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); if (addr.type() == RegType::vgpr) - return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), Operand(0u), Operand(0u), - Operand(-1u), Operand(rsrc_conf)); - return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), addr, Operand(-1u), - Operand(rsrc_conf)); + return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), Operand::zero(), Operand::zero(), + Operand::c32(-1u), Operand::c32(rsrc_conf)); + return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), addr, Operand::c32(-1u), + Operand::c32(rsrc_conf)); } Temp @@ -4077,7 +4099,7 @@ global_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsign create_instruction(op, Format::MUBUF, 3, 1)}; mubuf->operands[0] = Operand(get_gfx6_global_rsrc(bld, offset)); mubuf->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1); - mubuf->operands[2] = Operand(0u); + mubuf->operands[2] = Operand::zero(); mubuf->glc = info.glc; mubuf->dlc = false; mubuf->offset = 0; @@ -4333,7 +4355,7 @@ store_lds(isel_context* ctx, unsigned elem_size_bytes, Temp data, uint32_t wrmas unsigned max_offset = write2 ? (255 - write2_off) * split_data.bytes() : 65535; Temp address_offset = address; if (inline_offset > max_offset) { - address_offset = bld.vadd32(bld.def(v1), Operand(base_offset), address_offset); + address_offset = bld.vadd32(bld.def(v1), Operand::c32(base_offset), address_offset); inline_offset = offsets[i]; } @@ -4446,8 +4468,8 @@ create_vec_from_array(isel_context* ctx, Temp arr[], unsigned cnt, RegType reg_t allocated_vec[i] = arr[i]; instr->operands[i] = Operand(arr[i]); } else { - Temp zero = - bld.copy(bld.def(RegClass(reg_type, dword_size)), Operand(0u, dword_size == 2)); + Temp zero = bld.copy(bld.def(RegClass(reg_type, dword_size)), + Operand::zero(dword_size == 2 ? 8 : 4)); allocated_vec[i] = zero; instr->operands[i] = Operand(zero); } @@ -4471,12 +4493,12 @@ resolve_excess_vmem_const_offset(Builder& bld, Temp& voffset, unsigned const_off const_offset %= 4096u; if (!voffset.id()) - voffset = bld.copy(bld.def(v1), Operand(excess_const_offset)); + voffset = bld.copy(bld.def(v1), Operand::c32(excess_const_offset)); else if (unlikely(voffset.regClass() == s1)) voffset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), - Operand(excess_const_offset), Operand(voffset)); + Operand::c32(excess_const_offset), Operand(voffset)); else if (likely(voffset.regClass() == v1)) - voffset = bld.vadd32(bld.def(v1), Operand(voffset), Operand(excess_const_offset)); + voffset = bld.vadd32(bld.def(v1), Operand(voffset), Operand::c32(excess_const_offset)); else unreachable("Unsupported register class of voffset"); } @@ -4498,7 +4520,7 @@ emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp s const_offset = resolve_excess_vmem_const_offset(bld, voffset, const_offset); Operand voffset_op = voffset.id() ? Operand(as_vgpr(ctx, voffset)) : Operand(v1); - Operand soffset_op = soffset.id() ? Operand(soffset) : Operand(0u); + Operand soffset_op = soffset.id() ? Operand(soffset) : Operand::zero(); Builder::Result r = bld.mubuf(op, Operand(descriptor), voffset_op, soffset_op, Operand(vdata), const_offset, /* offen */ !voffset_op.isUndefined(), /* swizzled */ swizzled, @@ -4561,7 +4583,7 @@ wave_id_in_threadgroup(isel_context* ctx) { Builder bld(ctx->program, ctx->block); return bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), - get_arg(ctx, ctx->args->ac.merged_wave_info), Operand(24u | (4u << 16))); + get_arg(ctx, ctx->args->ac.merged_wave_info), Operand::c32(24u | (4u << 16))); } Temp @@ -4576,8 +4598,9 @@ thread_id_in_threadgroup(isel_context* ctx) return tid_in_wave; Temp wave_id_in_tg = wave_id_in_threadgroup(ctx); - Temp num_pre_threads = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), - wave_id_in_tg, Operand(ctx->program->wave_size == 64 ? 6u : 5u)); + Temp num_pre_threads = + bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), wave_id_in_tg, + Operand::c32(ctx->program->wave_size == 64 ? 6u : 5u)); return bld.vadd32(bld.def(v1), Operand(num_pre_threads), Operand(tid_in_wave)); } @@ -4589,7 +4612,7 @@ get_tess_rel_patch_id(isel_context* ctx) switch (ctx->shader->info.stage) { case MESA_SHADER_TESS_CTRL: return bld.pseudo(aco_opcode::p_extract, bld.def(v1), get_arg(ctx, ctx->args->ac.tcs_rel_ids), - Operand(0u), Operand(8u), Operand(0u)); + Operand::zero(), Operand::c32(8u), Operand::zero()); case MESA_SHADER_TESS_EVAL: return get_arg(ctx, ctx->args->ac.tes_rel_patch_id); default: unreachable("Unsupported stage in get_tess_rel_patch_id"); } @@ -4691,7 +4714,7 @@ emit_interp_instr(isel_context* ctx, unsigned idx, unsigned component, Temp src, if (ctx->program->dev.has_16bank_lds) { assert(ctx->options->chip_class <= GFX8); Builder::Result interp_p1 = - bld.vintrp(aco_opcode::v_interp_mov_f32, bld.def(v1), Operand(2u) /* P0 */, + bld.vintrp(aco_opcode::v_interp_mov_f32, bld.def(v1), Operand::c32(2u) /* P0 */, bld.m0(prim_mask), idx, component); interp_p1 = bld.vintrp(aco_opcode::v_interp_p1lv_f16, bld.def(v2b), coord1, bld.m0(prim_mask), interp_p1, idx, component); @@ -4749,20 +4772,22 @@ emit_load_frag_coord(isel_context* ctx, Temp dst, unsigned num_components) /* adjusted_frag_z * 0.0625 + frag_z */ adjusted_frag_z = bld.vop3(aco_opcode::v_fma_f32, bld.def(v1), adjusted_frag_z, - Operand(0x3d800000u /* 0.0625 */), frag_z); + Operand::c32(0x3d800000u /* 0.0625 */), frag_z); /* VRS Rate X = Ancillary[2:3] */ - Temp x_rate = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), - get_arg(ctx, ctx->args->ac.ancillary), Operand(2u), Operand(2u)); + Temp x_rate = + bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->ac.ancillary), + Operand::c32(2u), Operand::c32(2u)); /* xRate = xRate == 0x1 ? adjusted_frag_z : frag_z. */ - Temp cond = bld.vopc(aco_opcode::v_cmp_eq_i32, bld.def(bld.lm), Operand(1u), Operand(x_rate)); + Temp cond = + bld.vopc(aco_opcode::v_cmp_eq_i32, bld.def(bld.lm), Operand::c32(1u), Operand(x_rate)); vec->operands[2] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), frag_z, adjusted_frag_z, cond); } for (Operand& op : vec->operands) - op = op.isUndefined() ? Operand(0u) : op; + op = op.isUndefined() ? Operand::zero() : op; vec->definitions[0] = Definition(dst); ctx->block->instructions.emplace_back(std::move(vec)); @@ -4780,19 +4805,19 @@ emit_load_frag_shading_rate(isel_context* ctx, Temp dst) * VRS Rate Y = Ancillary[4:5] */ Temp x_rate = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->ac.ancillary), - Operand(2u), Operand(2u)); + Operand::c32(2u), Operand::c32(2u)); Temp y_rate = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->ac.ancillary), - Operand(4u), Operand(2u)); + Operand::c32(4u), Operand::c32(2u)); /* xRate = xRate == 0x1 ? Horizontal2Pixels : None. */ - cond = bld.vopc(aco_opcode::v_cmp_eq_i32, bld.def(bld.lm), Operand(1u), Operand(x_rate)); - x_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), bld.copy(bld.def(v1), Operand(0u)), - bld.copy(bld.def(v1), Operand(4u)), cond); + cond = bld.vopc(aco_opcode::v_cmp_eq_i32, bld.def(bld.lm), Operand::c32(1u), Operand(x_rate)); + x_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), bld.copy(bld.def(v1), Operand::zero()), + bld.copy(bld.def(v1), Operand::c32(4u)), cond); /* yRate = yRate == 0x1 ? Vertical2Pixels : None. */ - cond = bld.vopc(aco_opcode::v_cmp_eq_i32, bld.def(bld.lm), Operand(1u), Operand(y_rate)); - y_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), bld.copy(bld.def(v1), Operand(0u)), - bld.copy(bld.def(v1), Operand(1u)), cond); + cond = bld.vopc(aco_opcode::v_cmp_eq_i32, bld.def(bld.lm), Operand::c32(1u), Operand(y_rate)); + y_rate = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), bld.copy(bld.def(v1), Operand::zero()), + bld.copy(bld.def(v1), Operand::c32(1u)), cond); bld.vop2(aco_opcode::v_or_b32, Definition(dst), Operand(x_rate), Operand(y_rate)); } @@ -4907,12 +4932,13 @@ adjust_vertex_fetch_alpha(isel_context* ctx, unsigned adjustment, Temp alpha) * exponent. */ unsigned offset = adjustment == AC_FETCH_FORMAT_SNORM ? 23u : 0u; - alpha = bld.vop3(aco_opcode::v_bfe_i32, bld.def(v1), alpha, Operand(offset), Operand(2u)); + alpha = + bld.vop3(aco_opcode::v_bfe_i32, bld.def(v1), alpha, Operand::c32(offset), Operand::c32(2u)); /* Convert back to the right type. */ if (adjustment == AC_FETCH_FORMAT_SNORM) { alpha = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), alpha); - alpha = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0xbf800000u), alpha); + alpha = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand::c32(0xbf800000u), alpha); } else if (adjustment == AC_FETCH_FORMAT_SSCALED) { alpha = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), alpha); } @@ -4960,7 +4986,7 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr) ctx->program->info->vs.use_per_attribute_vb_descs ? location : attrib_binding; desc_index = util_bitcount(ctx->program->info->vs.vb_desc_usage_mask & u_bit_consecutive(0, desc_index)); - Operand off = bld.copy(bld.def(s1), Operand(desc_index * 16u)); + Operand off = bld.copy(bld.def(s1), Operand::c32(desc_index * 16u)); Temp list = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), vertex_buffers, off); Temp index; @@ -5027,13 +5053,13 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr) Temp fetch_index = index; if (attrib_stride != 0 && fetch_offset > attrib_stride) { fetch_index = - bld.vadd32(bld.def(v1), Operand(fetch_offset / attrib_stride), fetch_index); + bld.vadd32(bld.def(v1), Operand::c32(fetch_offset / attrib_stride), fetch_index); fetch_offset = fetch_offset % attrib_stride; } - Operand soffset(0u); + Operand soffset = Operand::zero(); if (fetch_offset >= 4096) { - soffset = bld.copy(bld.def(s1), Operand(fetch_offset / 4096 * 4096)); + soffset = bld.copy(bld.def(s1), Operand::c32(fetch_offset / 4096 * 4096)); fetch_offset %= 4096; } @@ -5136,11 +5162,11 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr) num_temp++; elems[i] = channel; } else if (is_float && idx == 3) { - vec->operands[i] = Operand(0x3f800000u); + vec->operands[i] = Operand::c32(0x3f800000u); } else if (!is_float && idx == 3) { - vec->operands[i] = Operand(1u); + vec->operands[i] = Operand::c32(1u); } else { - vec->operands[i] = Operand(0u); + vec->operands[i] = Operand::zero(); } } vec->definitions[0] = Definition(dst); @@ -5178,14 +5204,14 @@ visit_load_input(isel_context* ctx, nir_intrinsic_instr* instr) } if (dst.size() == 1) { - bld.vintrp(aco_opcode::v_interp_mov_f32, Definition(dst), Operand(vertex_id), + bld.vintrp(aco_opcode::v_interp_mov_f32, Definition(dst), Operand::c32(vertex_id), bld.m0(prim_mask), idx, component); } else { aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)}; for (unsigned i = 0; i < dst.size(); i++) vec->operands[i] = - bld.vintrp(aco_opcode::v_interp_mov_f32, bld.def(v1), Operand(vertex_id), + bld.vintrp(aco_opcode::v_interp_mov_f32, bld.def(v1), Operand::c32(vertex_id), bld.m0(prim_mask), idx, component + i); vec->definitions[0] = Definition(dst); bld.insert(std::move(vec)); @@ -5228,11 +5254,11 @@ visit_load_tess_coord(isel_context* ctx, nir_intrinsic_instr* instr) Operand tes_u(get_arg(ctx, ctx->args->ac.tes_u)); Operand tes_v(get_arg(ctx, ctx->args->ac.tes_v)); - Operand tes_w(0u); + Operand tes_w = Operand::zero(); if (ctx->shader->info.tess.primitive_mode == GL_TRIANGLES) { Temp tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), tes_u, tes_v); - tmp = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), Operand(0x3f800000u /* 1.0f */), tmp); + tmp = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), Operand::c32(0x3f800000u /* 1.0f */), tmp); tes_w = Operand(tmp); } @@ -5246,7 +5272,7 @@ load_desc_ptr(isel_context* ctx, unsigned desc_set) if (ctx->program->info->need_indirect_descriptor_sets) { Builder bld(ctx->program, ctx->block); Temp ptr64 = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->descriptor_sets[0])); - Operand off = bld.copy(bld.def(s1), Operand(desc_set << 2)); + Operand off = bld.copy(bld.def(s1), Operand::c32(desc_set << 2)); return bld.smem(aco_opcode::s_load_dword, bld.def(s1), ptr64, off); //, false, false, false); } @@ -5281,21 +5307,21 @@ visit_load_resource(isel_context* ctx, nir_intrinsic_instr* instr) } if (nir_src_is_const(instr->src[0])) { - index = bld.copy(bld.def(s1), - Operand((uint32_t)(offset + nir_src_as_uint(instr->src[0]) * stride))); + index = + bld.copy(bld.def(s1), Operand::c32((offset + nir_src_as_uint(instr->src[0]) * stride))); } else if (index.type() == RegType::vgpr) { if (stride != 1) { bool index24bit = layout->binding[binding].array_size <= 0x1000000; index = bld.v_mul_imm(bld.def(v1), index, stride, index24bit); } if (offset) - index = bld.vadd32(bld.def(v1), Operand(offset), index); + index = bld.vadd32(bld.def(v1), Operand::c32(offset), index); } else { if (stride != 1) - index = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand(stride), index); + index = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand::c32(stride), index); if (offset) - index = - bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), Operand(offset), index); + index = bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), + Operand::c32(offset), index); } Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); @@ -5303,7 +5329,7 @@ visit_load_resource(isel_context* ctx, nir_intrinsic_instr* instr) elems[0] = desc_ptr; elems[1] = index; ctx->allocated_vec.emplace(dst.id(), elems); - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), desc_ptr, index, Operand(0u)); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), desc_ptr, index, Operand::zero()); } void @@ -5382,8 +5408,8 @@ visit_load_ubo(isel_context* ctx, nir_intrinsic_instr* instr) S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); } rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), rsrc, - Operand(S_008F04_BASE_ADDRESS_HI(ctx->options->address32_hi)), - Operand(0xFFFFFFFFu), Operand(desc_type)); + Operand::c32(S_008F04_BASE_ADDRESS_HI(ctx->options->address32_hi)), + Operand::c32(0xFFFFFFFFu), Operand::c32(desc_type)); } else { rsrc = load_buffer_rsrc(ctx, rsrc); } @@ -5404,15 +5430,15 @@ visit_load_sbt_amd(isel_context* ctx, nir_intrinsic_instr* instr) Builder bld(ctx->program, ctx->block); Temp desc_base = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->ac.sbt_descriptors)); - Operand desc_off = bld.copy(bld.def(s1), Operand(binding * 16u)); + Operand desc_off = bld.copy(bld.def(s1), Operand::c32(binding * 16u)); Temp rsrc = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), desc_base, desc_off); /* If we want more we need to implement */ assert(instr->dest.ssa.bit_size == 32); assert(instr->num_components == 1); - bld.mubuf(aco_opcode::buffer_load_dword, Definition(dst), rsrc, index, Operand(0u), base, false, - false, true); + bld.mubuf(aco_opcode::buffer_load_dword, Definition(dst), rsrc, index, Operand::zero(), base, + false, false, true); } void @@ -5444,8 +5470,8 @@ visit_load_push_constant(isel_context* ctx, nir_intrinsic_instr* instr) Temp index = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa)); if (offset != 0) // TODO check if index != 0 as well - index = bld.nuw().sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), Operand(offset), - index); + index = bld.nuw().sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), + Operand::c32(offset), index); Temp ptr = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->args->ac.push_constants)); Temp vec = dst; bool trim = false; @@ -5483,7 +5509,7 @@ visit_load_push_constant(isel_context* ctx, nir_intrinsic_instr* instr) bld.smem(op, Definition(vec), ptr, index).instr->smem().prevent_overflow = true; if (!aligned) { - Operand byte_offset = index_cv ? Operand((offset + index_cv->u32) % 4) : Operand(index); + Operand byte_offset = index_cv ? Operand::c32((offset + index_cv->u32) % 4) : Operand(index); byte_align_scalar(ctx, vec, byte_offset, dst); return; } @@ -5521,15 +5547,15 @@ visit_load_constant(isel_context* ctx, nir_intrinsic_instr* instr) Temp offset = get_ssa_temp(ctx, instr->src[0].ssa); if (base && offset.type() == RegType::sgpr) offset = bld.nuw().sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset, - Operand(base)); + Operand::c32(base)); else if (base && offset.type() == RegType::vgpr) - offset = bld.vadd32(bld.def(v1), Operand(base), offset); + offset = bld.vadd32(bld.def(v1), Operand::c32(base), offset); - Temp rsrc = - bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), - bld.pseudo(aco_opcode::p_constaddr, bld.def(s2), bld.def(s1, scc), - Operand(ctx->constant_data_offset)), - Operand(MIN2(base + range, ctx->shader->constant_data_size)), Operand(desc_type)); + Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), + bld.pseudo(aco_opcode::p_constaddr, bld.def(s2), bld.def(s1, scc), + Operand::c32(ctx->constant_data_offset)), + Operand::c32(MIN2(base + range, ctx->shader->constant_data_size)), + Operand::c32(desc_type)); unsigned size = instr->dest.ssa.bit_size / 8; // TODO: get alignment information for subdword constants load_buffer(ctx, instr->num_components, size, dst, rsrc, offset, size, 0); @@ -5586,8 +5612,8 @@ visit_discard(isel_context* ctx, nir_intrinsic_instr* instr) ctx->program->needs_exact = true; /* save exec somewhere temporarily so that it doesn't get * overwritten before the discard from outer exec masks */ - Temp cond = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), Operand(0xFFFFFFFF), - Operand(exec, bld.lm)); + Temp cond = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), + Operand::c32(0xFFFFFFFF), Operand(exec, bld.lm)); bld.pseudo(aco_opcode::p_discard_if, cond); ctx->block->kind |= block_kind_uses_discard_if; return; @@ -5608,7 +5634,7 @@ visit_discard(isel_context* ctx, nir_intrinsic_instr* instr) if (!ctx->cf_info.parent_if.is_divergent) { /* program just ends here */ ctx->block->kind |= block_kind_uses_discard_if; - bld.pseudo(aco_opcode::p_discard_if, Operand(0xFFFFFFFFu)); + bld.pseudo(aco_opcode::p_discard_if, Operand::c32(0xFFFFFFFFu)); // TODO: it will potentially be followed by a branch which is dead code to sanitize NIR phis } else { ctx->block->kind |= block_kind_discard; @@ -5672,7 +5698,7 @@ get_sampler_desc(isel_context* ctx, nir_deref_instr* deref_instr, if (array_size != 1) indirect = - bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand(array_size), indirect); + bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand::c32(array_size), indirect); if (!index_set) { index = indirect; @@ -5745,19 +5771,19 @@ get_sampler_desc(isel_context* ctx, nir_deref_instr* deref_instr, const uint32_t* samplers = radv_immutable_samplers(layout, binding); uint32_t dword0_mask = tex_instr->op == nir_texop_tg4 ? C_008F30_TRUNC_COORD : 0xffffffffu; return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), - Operand(samplers[constant_index * 4 + 0] & dword0_mask), - Operand(samplers[constant_index * 4 + 1]), - Operand(samplers[constant_index * 4 + 2]), - Operand(samplers[constant_index * 4 + 3])); + Operand::c32(samplers[constant_index * 4 + 0] & dword0_mask), + Operand::c32(samplers[constant_index * 4 + 1]), + Operand::c32(samplers[constant_index * 4 + 2]), + Operand::c32(samplers[constant_index * 4 + 3])); } Operand off; if (!index_set) { - off = bld.copy(bld.def(s1), Operand(offset)); + off = bld.copy(bld.def(s1), Operand::c32(offset)); } else { off = Operand( - (Temp)bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), Operand(offset), - bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand(stride), index))); + (Temp)bld.sop2(aco_opcode::s_add_i32, bld.def(s1), bld.def(s1, scc), Operand::c32(offset), + bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand::c32(stride), index))); } Temp res = bld.smem(opcode, bld.def(type), list, off); @@ -5791,9 +5817,8 @@ get_sampler_desc(isel_context* ctx, nir_deref_instr* deref_instr, /* WRITE_COMPRESS_ENABLE must be 0 for all image loads to workaround a * hardware bug. */ - components[6] = - bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), components[6], - bld.copy(bld.def(s1), Operand((uint32_t)C_00A018_WRITE_COMPRESS_ENABLE))); + components[6] = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), components[6], + bld.copy(bld.def(s1), Operand::c32(C_00A018_WRITE_COMPRESS_ENABLE))); res = bld.pseudo(aco_opcode::p_create_vector, bld.def(s8), components[0], components[1], components[2], components[3], components[4], components[5], components[6], @@ -5810,7 +5835,7 @@ get_sampler_desc(isel_context* ctx, nir_deref_instr* deref_instr, * nir_texop_tg4, even if the sampler uses nearest/point filtering. */ components[0] = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), components[0], - Operand((uint32_t)C_008F30_TRUNC_COORD)); + Operand::c32(C_008F30_TRUNC_COORD)); res = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), components[0], components[1], components[2], components[3]); @@ -5966,32 +5991,33 @@ adjust_sample_index_using_fmask(isel_context* ctx, bool da, std::vector& c Operand sample_index4; if (sample_index.isConstant()) { if (sample_index.constantValue() < 16) { - sample_index4 = Operand(sample_index.constantValue() << 2); + sample_index4 = Operand::c32(sample_index.constantValue() << 2); } else { - sample_index4 = Operand(0u); + sample_index4 = Operand::zero(); } } else if (sample_index.regClass() == s1) { - sample_index4 = - bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), sample_index, Operand(2u)); + sample_index4 = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), sample_index, + Operand::c32(2u)); } else { assert(sample_index.regClass() == v1); - sample_index4 = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(2u), sample_index); + sample_index4 = + bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), sample_index); } Temp final_sample; if (sample_index4.isConstant() && sample_index4.constantValue() == 0) - final_sample = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(15u), fmask); + final_sample = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(15u), fmask); else if (sample_index4.isConstant() && sample_index4.constantValue() == 28) - final_sample = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand(28u), fmask); + final_sample = bld.vop2(aco_opcode::v_lshrrev_b32, bld.def(v1), Operand::c32(28u), fmask); else final_sample = - bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), fmask, sample_index4, Operand(4u)); + bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), fmask, sample_index4, Operand::c32(4u)); /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK * resource descriptor is 0 (invalid), */ Temp compare = bld.tmp(bld.lm); - bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(compare), Operand(0u), + bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(compare), Operand::zero(), emit_extract_vector(ctx, fmask_desc_ptr, 1, s1)) .def(0) .setHint(vcc); @@ -6025,8 +6051,8 @@ get_image_coords(isel_context* ctx, const nir_intrinsic_instr* instr, const stru if (instr->intrinsic == nir_intrinsic_image_deref_load || instr->intrinsic == nir_intrinsic_image_deref_sparse_load) { nir_const_value* sample_cv = nir_src_as_const_value(instr->src[2]); - Operand sample_index = - sample_cv ? Operand(sample_cv->u32) : Operand(emit_extract_vector(ctx, src2, 0, v1)); + Operand sample_index = sample_cv ? Operand::c32(sample_cv->u32) + : Operand(emit_extract_vector(ctx, src2, 0, v1)); std::vector fmask_load_address; for (unsigned i = 0; i < (is_array ? 3 : 2); i++) fmask_load_address.emplace_back(emit_extract_vector(ctx, src0, i, v1)); @@ -6044,7 +6070,7 @@ get_image_coords(isel_context* ctx, const nir_intrinsic_instr* instr, const stru if (gfx9_1d) { coords[0] = emit_extract_vector(ctx, src0, 0, v1); coords.resize(coords.size() + 1); - coords[1] = bld.copy(bld.def(v1), Operand(0u)); + coords[1] = bld.copy(bld.def(v1), Operand::zero()); if (is_array) coords[2] = emit_extract_vector(ctx, src0, 1, v1); } else { @@ -6091,7 +6117,7 @@ emit_tfe_init(Builder& bld, Temp dst) aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)}; for (unsigned i = 0; i < dst.size(); i++) - vec->operands[i] = Operand(0u); + vec->operands[i] = Operand::zero(); vec->definitions[0] = Definition(tmp); /* Since this is fixed to an instruction's definition register, any CSE will * just create copies. Copying costs about the same as zero-initialization, @@ -6159,7 +6185,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr) create_instruction(opcode, Format::MUBUF, 3 + is_sparse, 1)}; load->operands[0] = Operand(resource); load->operands[1] = Operand(vindex); - load->operands[2] = Operand((uint32_t)0); + load->operands[2] = Operand::c32(0); load->definitions[0] = Definition(tmp); load->idxen = true; load->glc = access & (ACCESS_VOLATILE | ACCESS_COHERENT); @@ -6193,7 +6219,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr) * 32-bit. So add a zero to the end so expand_vector() works correctly. */ tmp = bld.pseudo(aco_opcode::p_create_vector, bld.def(RegType::vgpr, tmp.size() + 1), tmp, - Operand(0u)); + Operand::zero()); } expand_vector(ctx, tmp, dst, instr->dest.ssa.num_components, expand_mask); @@ -6237,7 +6263,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr) create_instruction(opcode, Format::MUBUF, 4, 0)}; store->operands[0] = Operand(rsrc); store->operands[1] = Operand(vindex); - store->operands[2] = Operand((uint32_t)0); + store->operands[2] = Operand::c32(0); store->operands[3] = Operand(data); store->idxen = true; store->glc = glc; @@ -6361,7 +6387,7 @@ visit_image_atomic(isel_context* ctx, nir_intrinsic_instr* instr) is_64bit ? buf_op64 : buf_op, Format::MUBUF, 4, return_previous ? 1 : 0)}; mubuf->operands[0] = Operand(resource); mubuf->operands[1] = Operand(vindex); - mubuf->operands[2] = Operand((uint32_t)0); + mubuf->operands[2] = Operand::c32(0); mubuf->operands[3] = Operand(data); if (return_previous) mubuf->definitions[0] = Definition(dst); @@ -6404,15 +6430,15 @@ get_buffer_size(isel_context* ctx, Temp desc, Temp dst) Temp size = emit_extract_vector(ctx, desc, 2, s1); Temp size_div3 = bld.vop3(aco_opcode::v_mul_hi_u32, bld.def(v1), - bld.copy(bld.def(v1), Operand(0xaaaaaaabu)), size); + bld.copy(bld.def(v1), Operand::c32(0xaaaaaaabu)), size); size_div3 = bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), - bld.as_uniform(size_div3), Operand(1u)); + bld.as_uniform(size_div3), Operand::c32(1u)); Temp stride = emit_extract_vector(ctx, desc, 1, s1); stride = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), stride, - Operand((5u << 16) | 16u)); + Operand::c32((5u << 16) | 16u)); - Temp is12 = bld.sopc(aco_opcode::s_cmp_eq_i32, bld.def(s1, scc), stride, Operand(12u)); + Temp is12 = bld.sopc(aco_opcode::s_cmp_eq_i32, bld.def(s1, scc), stride, Operand::c32(12u)); size = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), size_div3, size, bld.scc(is12)); Temp shr_dst = dst.type() == RegType::vgpr ? bld.tmp(s1) : dst; @@ -6445,7 +6471,7 @@ visit_image_size(isel_context* ctx, nir_intrinsic_instr* instr) /* LOD */ assert(nir_src_as_uint(instr->src[1]) == 0); - std::vector lod{bld.copy(bld.def(v1), Operand(0u))}; + std::vector lod{bld.copy(bld.def(v1), Operand::zero())}; /* Resource */ Temp resource = get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), @@ -6468,7 +6494,7 @@ visit_image_size(isel_context* ctx, nir_intrinsic_instr* instr) emit_split_vector(ctx, tmp, 3); /* divide 3rd value by 6 by multiplying with magic number */ - Temp c = bld.copy(bld.def(s1), Operand((uint32_t)0x2AAAAAAB)); + Temp c = bld.copy(bld.def(s1), Operand::c32(0x2AAAAAAB)); Temp by_6 = bld.vop3(aco_opcode::v_mul_hi_i32, bld.def(v1), emit_extract_vector(ctx, tmp, 2, v1), c); @@ -6492,24 +6518,24 @@ get_image_samples(isel_context* ctx, Definition dst, Temp resource) Temp dword3 = emit_extract_vector(ctx, resource, 3, s1); Temp samples_log2 = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), dword3, - Operand(16u | 4u << 16)); - Temp samples = - bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), Operand(1u), samples_log2); + Operand::c32(16u | 4u << 16)); + Temp samples = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), Operand::c32(1u), + samples_log2); Temp type = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), dword3, - Operand(28u | 4u << 16 /* offset=28, width=4 */)); + Operand::c32(28u | 4u << 16 /* offset=28, width=4 */)); - Operand default_sample = Operand(1u); + Operand default_sample = Operand::c32(1u); if (ctx->options->robust_buffer_access) { /* Extract the second dword of the descriptor, if it's * all zero, then it's a null descriptor. */ Temp dword1 = emit_extract_vector(ctx, resource, 1, s1); Temp is_non_null_descriptor = - bld.sopc(aco_opcode::s_cmp_gt_u32, bld.def(s1, scc), dword1, Operand(0u)); + bld.sopc(aco_opcode::s_cmp_gt_u32, bld.def(s1, scc), dword1, Operand::zero()); default_sample = Operand(is_non_null_descriptor); } - Temp is_msaa = bld.sopc(aco_opcode::s_cmp_ge_u32, bld.def(s1, scc), type, Operand(14u)); + Temp is_msaa = bld.sopc(aco_opcode::s_cmp_ge_u32, bld.def(s1, scc), type, Operand::c32(14u)); bld.sop2(aco_opcode::s_cselect_b32, dst, samples, default_sample, bld.scc(is_msaa)); } @@ -6577,7 +6603,7 @@ visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr) create_instruction(op, Format::MUBUF, 4, 0)}; store->operands[0] = Operand(rsrc); store->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1); - store->operands[2] = offset.type() == RegType::sgpr ? Operand(offset) : Operand((uint32_t)0); + store->operands[2] = offset.type() == RegType::sgpr ? Operand(offset) : Operand::c32(0); store->operands[3] = Operand(write_datas[i]); store->offset = offsets[i]; store->offen = (offset.type() == RegType::vgpr); @@ -6657,7 +6683,7 @@ visit_atomic_ssbo(isel_context* ctx, nir_intrinsic_instr* instr) create_instruction(op, Format::MUBUF, 4, return_previous ? 1 : 0)}; mubuf->operands[0] = Operand(rsrc); mubuf->operands[1] = offset.type() == RegType::vgpr ? Operand(offset) : Operand(v1); - mubuf->operands[2] = offset.type() == RegType::sgpr ? Operand(offset) : Operand((uint32_t)0); + mubuf->operands[2] = offset.type() == RegType::sgpr ? Operand(offset) : Operand::c32(0); mubuf->operands[3] = Operand(data); if (return_previous) mubuf->definitions[0] = Definition(dst); @@ -6753,9 +6779,9 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr) bld.pseudo(aco_opcode::p_split_vector, Definition(addr0), Definition(addr1), addr); bld.vop2(aco_opcode::v_add_co_u32, Definition(new_addr0), - bld.hint_vcc(Definition(carry)), Operand(offset), addr0); - bld.vop2(aco_opcode::v_addc_co_u32, Definition(new_addr1), bld.def(bld.lm), Operand(0u), - addr1, carry) + bld.hint_vcc(Definition(carry)), Operand::c32(offset), addr0); + bld.vop2(aco_opcode::v_addc_co_u32, Definition(new_addr1), bld.def(bld.lm), + Operand::zero(), addr1, carry) .def(1) .setHint(vcc); @@ -6805,7 +6831,7 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr) create_instruction(op, Format::MUBUF, 4, 0)}; mubuf->operands[0] = Operand(rsrc); mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1); - mubuf->operands[2] = Operand(0u); + mubuf->operands[2] = Operand::zero(); mubuf->operands[3] = Operand(write_datas[i]); mubuf->glc = glc; mubuf->dlc = false; @@ -6958,7 +6984,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr) create_instruction(op, Format::MUBUF, 4, return_previous ? 1 : 0)}; mubuf->operands[0] = Operand(rsrc); mubuf->operands[1] = addr.type() == RegType::vgpr ? Operand(addr) : Operand(v1); - mubuf->operands[2] = Operand(0u); + mubuf->operands[2] = Operand::zero(); mubuf->operands[3] = Operand(data); if (return_previous) mubuf->definitions[0] = Definition(dst); @@ -7200,7 +7226,7 @@ visit_shared_atomic(isel_context* ctx, nir_intrinsic_instr* instr) } if (offset > 65535) { - address = bld.vadd32(bld.def(v1), Operand(offset), address); + address = bld.vadd32(bld.def(v1), Operand::c32(offset), address); offset = 0; } @@ -7227,7 +7253,8 @@ get_scratch_resource(isel_context* ctx) Builder bld(ctx->program, ctx->block); Temp scratch_addr = ctx->program->private_segment_buffer; if (ctx->stage != compute_cs) - scratch_addr = bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand(0u)); + scratch_addr = + bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand::zero()); uint32_t rsrc_conf = S_008F0C_ADD_TID_ENABLE(1) | S_008F0C_INDEX_STRIDE(ctx->program->wave_size == 64 ? 3 : 2); @@ -7245,8 +7272,8 @@ get_scratch_resource(isel_context* ctx) if (ctx->program->chip_class <= GFX8) rsrc_conf |= S_008F0C_ELEMENT_SIZE(1); - return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), scratch_addr, Operand(-1u), - Operand(rsrc_conf)); + return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), scratch_addr, Operand::c32(-1u), + Operand::c32(rsrc_conf)); } void @@ -7309,10 +7336,11 @@ visit_load_sample_mask_in(isel_context* ctx, nir_intrinsic_instr* instr) if (log2_ps_iter_samples) { /* gl_SampleMaskIn[0] = (SampleCoverage & (1 << gl_SampleID)). */ - Temp sample_id = bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), - get_arg(ctx, ctx->args->ac.ancillary), Operand(8u), Operand(4u)); + Temp sample_id = + bld.vop3(aco_opcode::v_bfe_u32, bld.def(v1), get_arg(ctx, ctx->args->ac.ancillary), + Operand::c32(8u), Operand::c32(4u)); Temp mask = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), sample_id, - bld.copy(bld.def(v1), Operand(1u))); + bld.copy(bld.def(v1), Operand::c32(1u))); bld.vop2(aco_opcode::v_and_b32, Definition(dst), mask, get_arg(ctx, ctx->args->ac.sample_coverage)); } else { @@ -7331,8 +7359,9 @@ visit_emit_vertex_with_counter(isel_context* ctx, nir_intrinsic_instr* instr) nir_const_value* next_vertex_cv = nir_src_as_const_value(instr->src[0]); /* get GSVS ring */ - Temp gsvs_ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), - ctx->program->private_segment_buffer, Operand(RING_GSVS_GS * 16u)); + Temp gsvs_ring = + bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), ctx->program->private_segment_buffer, + Operand::c32(RING_GSVS_GS * 16u)); unsigned num_components = ctx->program->info->gs.num_stream_output_components[stream]; @@ -7354,18 +7383,18 @@ visit_emit_vertex_with_counter(isel_context* ctx, nir_intrinsic_instr* instr) Definition(gsvs_dwords[2]), Definition(gsvs_dwords[3]), gsvs_ring); if (stream_offset) { - Temp stream_offset_tmp = bld.copy(bld.def(s1), Operand(stream_offset)); + Temp stream_offset_tmp = bld.copy(bld.def(s1), Operand::c32(stream_offset)); Temp carry = bld.tmp(s1); gsvs_dwords[0] = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.scc(Definition(carry)), gsvs_dwords[0], stream_offset_tmp); gsvs_dwords[1] = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), - gsvs_dwords[1], Operand(0u), bld.scc(carry)); + gsvs_dwords[1], Operand::zero(), bld.scc(carry)); } gsvs_dwords[1] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), gsvs_dwords[1], - Operand(S_008F04_STRIDE(stride))); - gsvs_dwords[2] = bld.copy(bld.def(s1), Operand((uint32_t)ctx->program->wave_size)); + Operand::c32(S_008F04_STRIDE(stride))); + gsvs_dwords[2] = bld.copy(bld.def(s1), Operand::c32(ctx->program->wave_size)); gsvs_ring = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), gsvs_dwords[0], gsvs_dwords[1], gsvs_dwords[2], gsvs_dwords[3]); @@ -7384,10 +7413,10 @@ visit_emit_vertex_with_counter(isel_context* ctx, nir_intrinsic_instr* instr) unsigned const_offset = (offset + (next_vertex_cv ? next_vertex_cv->u32 : 0u)) * 4u; if (const_offset >= 4096u) { if (vaddr_offset.isUndefined()) - vaddr_offset = bld.copy(bld.def(v1), Operand(const_offset / 4096u * 4096u)); + vaddr_offset = bld.copy(bld.def(v1), Operand::c32(const_offset / 4096u * 4096u)); else - vaddr_offset = - bld.vadd32(bld.def(v1), Operand(const_offset / 4096u * 4096u), vaddr_offset); + vaddr_offset = bld.vadd32(bld.def(v1), Operand::c32(const_offset / 4096u * 4096u), + vaddr_offset); const_offset %= 4096u; } @@ -7457,7 +7486,7 @@ emit_boolean_reduce(isel_context* ctx, nir_op op, unsigned cluster_size, Temp sr Temp tmp = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), src, Operand(exec, bld.lm)); tmp = bld.sop1(Builder::s_bcnt1_i32, bld.def(s1), bld.def(s1, scc), tmp); - tmp = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), tmp, Operand(1u)) + tmp = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), tmp, Operand::c32(1u)) .def(1) .getTemp(); return bool_to_vector_condition(ctx, tmp); @@ -7474,7 +7503,7 @@ emit_boolean_reduce(isel_context* ctx, nir_op op, unsigned cluster_size, Temp sr */ Temp lane_id = emit_mbcnt(ctx, bld.tmp(v1)); Temp cluster_offset = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), - Operand(~uint32_t(cluster_size - 1)), lane_id); + Operand::c32(~uint32_t(cluster_size - 1)), lane_id); Temp tmp; if (op == nir_op_iand) @@ -7494,17 +7523,17 @@ emit_boolean_reduce(isel_context* ctx, nir_op op, unsigned cluster_size, Temp sr tmp = bld.vop2_e64(aco_opcode::v_lshrrev_b32, bld.def(v1), cluster_offset, tmp); tmp = emit_extract_vector(ctx, tmp, 0, v1); if (cluster_mask != 0xffffffff) - tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(cluster_mask), tmp); + tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(cluster_mask), tmp); if (op == nir_op_iand) { - return bld.vopc(aco_opcode::v_cmp_eq_u32, bld.hint_vcc(bld.lm), Operand(cluster_mask), + return bld.vopc(aco_opcode::v_cmp_eq_u32, bld.hint_vcc(bld.lm), Operand::c32(cluster_mask), tmp); } else if (op == nir_op_ior) { - return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.hint_vcc(bld.lm), Operand(0u), tmp); + return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.hint_vcc(bld.lm), Operand::zero(), tmp); } else if (op == nir_op_ixor) { - tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(1u), - bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), tmp, Operand(0u))); - return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.hint_vcc(bld.lm), Operand(0u), tmp); + tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(1u), + bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), tmp, Operand::zero())); + return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.hint_vcc(bld.lm), Operand::zero(), tmp); } assert(false); return Temp(); @@ -7531,12 +7560,12 @@ emit_boolean_exclusive_scan(isel_context* ctx, nir_op op, Temp src) Temp mbcnt = emit_mbcnt(ctx, bld.tmp(v1), Operand(tmp)); if (op == nir_op_iand) - return bld.vopc(aco_opcode::v_cmp_eq_u32, bld.hint_vcc(bld.lm), Operand(0u), mbcnt); + return bld.vopc(aco_opcode::v_cmp_eq_u32, bld.hint_vcc(bld.lm), Operand::zero(), mbcnt); else if (op == nir_op_ior) - return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.hint_vcc(bld.lm), Operand(0u), mbcnt); + return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.hint_vcc(bld.lm), Operand::zero(), mbcnt); else if (op == nir_op_ixor) - return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.hint_vcc(bld.lm), Operand(0u), - bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(1u), mbcnt)); + return bld.vopc(aco_opcode::v_cmp_lg_u32, bld.hint_vcc(bld.lm), Operand::zero(), + bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(1u), mbcnt)); assert(false); return Temp(); @@ -7635,21 +7664,22 @@ emit_addition_uniform_reduce(isel_context* ctx, nir_op op, Definition dst, nir_s src_tmp = bld.as_uniform(src_tmp); if (op == nir_op_ixor && count.type() == RegType::sgpr) - count = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), count, Operand(1u)); + count = + bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), count, Operand::c32(1u)); else if (op == nir_op_ixor) - count = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(1u), count); + count = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(1u), count); assert(dst.getTemp().type() == count.type()); if (nir_src_is_const(src)) { if (nir_src_as_uint(src) == 1 && dst.bytes() <= 2) - bld.pseudo(aco_opcode::p_extract_vector, dst, count, Operand(0u)); + bld.pseudo(aco_opcode::p_extract_vector, dst, count, Operand::zero()); else if (nir_src_as_uint(src) == 1) bld.copy(dst, count); else if (nir_src_as_uint(src) == 0 && dst.bytes() <= 2) - bld.vop1(aco_opcode::v_mov_b32, dst, Operand(0u)); /* RA will use SDWA if possible */ + bld.vop1(aco_opcode::v_mov_b32, dst, Operand::zero()); /* RA will use SDWA if possible */ else if (nir_src_as_uint(src) == 0) - bld.copy(dst, Operand(0u)); + bld.copy(dst, Operand::zero()); else if (count.type() == RegType::vgpr) bld.v_mul_imm(dst, count, nir_src_as_uint(src)); else @@ -7707,7 +7737,7 @@ emit_uniform_scan(isel_context* ctx, nir_intrinsic_instr* instr) Temp packed_tid; if (inc) - packed_tid = emit_mbcnt(ctx, bld.tmp(v1), Operand(exec, bld.lm), Operand(1u)); + packed_tid = emit_mbcnt(ctx, bld.tmp(v1), Operand(exec, bld.lm), Operand::c32(1u)); else packed_tid = emit_mbcnt(ctx, bld.tmp(v1), Operand(exec, bld.lm)); @@ -7733,12 +7763,15 @@ emit_uniform_scan(isel_context* ctx, nir_intrinsic_instr* instr) uint32_t identity_lo = get_reduction_identity(reduce_op, 0); uint32_t identity_hi = get_reduction_identity(reduce_op, 1); - lo = bld.writelane(bld.def(v1), bld.copy(bld.hint_m0(s1), Operand(identity_lo)), lane, lo); - hi = bld.writelane(bld.def(v1), bld.copy(bld.hint_m0(s1), Operand(identity_hi)), lane, hi); + lo = + bld.writelane(bld.def(v1), bld.copy(bld.hint_m0(s1), Operand::c32(identity_lo)), lane, lo); + hi = + bld.writelane(bld.def(v1), bld.copy(bld.hint_m0(s1), Operand::c32(identity_hi)), lane, hi); bld.pseudo(aco_opcode::p_create_vector, dst, lo, hi); } else { uint32_t identity = get_reduction_identity(reduce_op, 0); - bld.writelane(dst, bld.copy(bld.hint_m0(s1), Operand(identity)), lane, as_vgpr(ctx, src)); + bld.writelane(dst, bld.copy(bld.hint_m0(s1), Operand::c32(identity)), lane, + as_vgpr(ctx, src)); } return true; @@ -7919,15 +7952,15 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) Operand offset; if (const_addr) { sample_pos_offset += const_addr->u32 << 3; - offset = Operand(sample_pos_offset); + offset = Operand::c32(sample_pos_offset); } else if (ctx->options->chip_class >= GFX9) { offset = bld.sop2(aco_opcode::s_lshl3_add_u32, bld.def(s1), bld.def(s1, scc), addr, - Operand(sample_pos_offset)); + Operand::c32(sample_pos_offset)); } else { - offset = - bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), addr, Operand(3u)); + offset = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), addr, + Operand::c32(3u)); offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), offset, - Operand(sample_pos_offset)); + Operand::c32(sample_pos_offset)); } Operand off = bld.copy(bld.def(s1), Operand(offset)); @@ -7935,7 +7968,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, off); } else if (ctx->options->chip_class >= GFX9) { - addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(3u), addr); + addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), addr); sample_pos = bld.global(aco_opcode::global_load_dwordx2, bld.def(v2), addr, private_segment_buffer, sample_pos_offset); } else if (ctx->options->chip_class >= GFX7) { @@ -7945,16 +7978,16 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) bld.pseudo(aco_opcode::p_split_vector, Definition(tmp0), Definition(tmp1), private_segment_buffer); Definition scc_tmp = bld.def(s1, scc); - tmp0 = - bld.sop2(aco_opcode::s_add_u32, bld.def(s1), scc_tmp, tmp0, Operand(sample_pos_offset)); - tmp1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), tmp1, Operand(0u), - bld.scc(scc_tmp.getTemp())); - addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(3u), addr); + tmp0 = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), scc_tmp, tmp0, + Operand::c32(sample_pos_offset)); + tmp1 = bld.sop2(aco_opcode::s_addc_u32, bld.def(s1), bld.def(s1, scc), tmp1, + Operand::zero(), bld.scc(scc_tmp.getTemp())); + addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), addr); Temp pck0 = bld.tmp(v1); Temp carry = bld.vadd32(Definition(pck0), tmp0, addr, true).def(1).getTemp(); tmp1 = as_vgpr(ctx, tmp1); Temp pck1 = bld.vop2_e64(aco_opcode::v_addc_co_u32, bld.def(v1), - bld.hint_vcc(bld.def(bld.lm)), tmp1, Operand(0u), carry); + bld.hint_vcc(bld.def(bld.lm)), tmp1, Operand::zero(), carry); addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), pck0, pck1); /* sample_pos = flat_load_dwordx2 addr */ @@ -7965,10 +7998,10 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) uint32_t rsrc_conf = S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); Temp rsrc = bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer, - Operand(0u), Operand(rsrc_conf)); + Operand::zero(), Operand::c32(rsrc_conf)); - addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(3u), addr); - addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), addr, Operand(0u)); + addr = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), addr); + addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), addr, Operand::zero()); sample_pos = bld.tmp(v2); @@ -7977,7 +8010,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) load->definitions[0] = Definition(sample_pos); load->operands[0] = Operand(rsrc); load->operands[1] = Operand(addr); - load->operands[2] = Operand(0u); + load->operands[2] = Operand::zero(); load->offset = sample_pos_offset; load->offen = 0; load->addr64 = true; @@ -7991,8 +8024,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) Temp pos1 = bld.tmp(RegClass(sample_pos.type(), 1)); Temp pos2 = bld.tmp(RegClass(sample_pos.type(), 1)); bld.pseudo(aco_opcode::p_split_vector, Definition(pos1), Definition(pos2), sample_pos); - pos1 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), pos1, Operand(0x3f000000u)); - pos2 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), pos2, Operand(0x3f000000u)); + pos1 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), pos1, Operand::c32(0x3f000000u)); + pos2 = bld.vop2_e64(aco_opcode::v_sub_f32, bld.def(v1), pos2, Operand::c32(0x3f000000u)); emit_interp_center(ctx, get_ssa_temp(ctx, &instr->dest.ssa), pos1, pos2); break; @@ -8007,7 +8040,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) } case nir_intrinsic_load_front_face: { bld.vopc(aco_opcode::v_cmp_lg_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), - Operand(0u), get_arg(ctx, ctx->args->ac.front_face)) + Operand::zero(), get_arg(ctx, ctx->args->ac.front_face)) .def(0) .setHint(vcc); break; @@ -8024,7 +8057,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) case nir_intrinsic_load_layer_id: { unsigned idx = nir_intrinsic_base(instr); bld.vintrp(aco_opcode::v_interp_mov_f32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), - Operand(2u), bld.m0(get_arg(ctx, ctx->args->ac.prim_mask)), idx, 0); + Operand::c32(2u), bld.m0(get_arg(ctx, ctx->args->ac.prim_mask)), idx, 0); break; } case nir_intrinsic_load_frag_coord: { @@ -8037,9 +8070,10 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) case nir_intrinsic_load_sample_pos: { Temp posx = get_arg(ctx, ctx->args->ac.frag_pos[0]); Temp posy = get_arg(ctx, ctx->args->ac.frag_pos[1]); - bld.pseudo(aco_opcode::p_create_vector, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), - posx.id() ? bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), posx) : Operand(0u), - posy.id() ? bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), posy) : Operand(0u)); + bld.pseudo( + aco_opcode::p_create_vector, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), + posx.id() ? bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), posx) : Operand::zero(), + posy.id() ? bld.vop1(aco_opcode::v_fract_f32, bld.def(v1), posy) : Operand::zero()); break; } case nir_intrinsic_load_tess_coord: visit_load_tess_coord(ctx, instr); break; @@ -8130,9 +8164,9 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); struct ac_arg* args = ctx->args->ac.workgroup_ids; bld.pseudo(aco_opcode::p_create_vector, Definition(dst), - args[0].used ? Operand(get_arg(ctx, args[0])) : Operand(0u), - args[1].used ? Operand(get_arg(ctx, args[1])) : Operand(0u), - args[2].used ? Operand(get_arg(ctx, args[2])) : Operand(0u)); + args[0].used ? Operand(get_arg(ctx, args[0])) : Operand::zero(), + args[1].used ? Operand(get_arg(ctx, args[1])) : Operand::zero(), + args[2].used ? Operand(get_arg(ctx, args[2])) : Operand::zero()); emit_split_vector(ctx, dst, 3); break; } @@ -8155,15 +8189,16 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) /* After the s_and the bits are already multiplied by 64 (left shifted by 6) so we can just * feed that to v_or */ Temp tg_num = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), - Operand(0xfc0u), get_arg(ctx, ctx->args->ac.tg_size)); + Operand::c32(0xfc0u), get_arg(ctx, ctx->args->ac.tg_size)); bld.vop2(aco_opcode::v_or_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), tg_num, id); } else { /* Extract the bit field and multiply the result by 32 (left shift by 5), then do the OR */ - Temp tg_num = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), - get_arg(ctx, ctx->args->ac.tg_size), Operand(0x6u | (0x6u << 16))); + Temp tg_num = + bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), + get_arg(ctx, ctx->args->ac.tg_size), Operand::c32(0x6u | (0x6u << 16))); bld.vop3(aco_opcode::v_lshl_or_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), - tg_num, Operand(0x5u), id); + tg_num, Operand::c32(0x5u), id); } break; } @@ -8171,14 +8206,14 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) if (ctx->stage == compute_cs) { bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), bld.def(s1, scc), get_arg(ctx, ctx->args->ac.tg_size), - Operand(0x6u | (0x6u << 16))); + Operand::c32(0x6u | (0x6u << 16))); } else if (ctx->stage.hw == HWStage::NGG) { /* Get the id of the current wave within the threadgroup (workgroup) */ bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), bld.def(s1, scc), get_arg(ctx, ctx->args->ac.merged_wave_info), - Operand(24u | (4u << 16))); + Operand::c32(24u | (4u << 16))); } else { - bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), Operand(0x0u)); + bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), Operand::zero()); } break; } @@ -8189,13 +8224,13 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) case nir_intrinsic_load_num_subgroups: { if (ctx->stage == compute_cs) bld.sop2(aco_opcode::s_and_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), - bld.def(s1, scc), Operand(0x3fu), get_arg(ctx, ctx->args->ac.tg_size)); + bld.def(s1, scc), Operand::c32(0x3fu), get_arg(ctx, ctx->args->ac.tg_size)); else if (ctx->stage.hw == HWStage::NGG) bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), bld.def(s1, scc), get_arg(ctx, ctx->args->ac.merged_wave_info), - Operand(28u | (4u << 16))); + Operand::c32(28u | (4u << 16))); else - bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), Operand(0x1u)); + bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), Operand::c32(0x1u)); break; } case nir_intrinsic_ballot: { @@ -8205,9 +8240,9 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) if (instr->src[0].ssa->bit_size == 1) { assert(src.regClass() == bld.lm); } else if (instr->src[0].ssa->bit_size == 32 && src.regClass() == v1) { - src = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), src); + src = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), src); } else if (instr->src[0].ssa->bit_size == 64 && src.regClass() == v2) { - src = bld.vopc(aco_opcode::v_cmp_lg_u64, bld.def(bld.lm), Operand(0u), src); + src = bld.vopc(aco_opcode::v_cmp_lg_u64, bld.def(bld.lm), Operand::zero(), src); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); } @@ -8217,7 +8252,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) src = bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), Operand(exec, bld.lm), src); if (dst.size() != bld.lm.size()) { /* Wave32 with ballot size set to 64 */ - src = bld.pseudo(aco_opcode::p_create_vector, bld.def(dst.regClass()), src, Operand(0u)); + src = + bld.pseudo(aco_opcode::p_create_vector, bld.def(dst.regClass()), src, Operand::zero()); } emit_wqm(bld, src, dst); @@ -8269,8 +8305,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) else tmp = bld.vop2_e64(aco_opcode::v_lshrrev_b32, bld.def(v1), tid, src); tmp = emit_extract_vector(ctx, tmp, 0, v1); - tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(1u), tmp); - emit_wqm(bld, bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), tmp), + tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(1u), tmp); + emit_wqm(bld, bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), tmp), dst); } else { isel_err(&instr->instr, "Unimplemented NIR instr bit size"); @@ -8280,7 +8316,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) } case nir_intrinsic_load_sample_id: { bld.vop3(aco_opcode::v_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), - get_arg(ctx, ctx->args->ac.ancillary), Operand(8u), Operand(4u)); + get_arg(ctx, ctx->args->ac.ancillary), Operand::c32(8u), Operand::c32(4u)); break; } case nir_intrinsic_load_sample_mask_in: { @@ -8421,8 +8457,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) assert(src.regClass() == bld.lm); assert(dst.regClass() == bld.lm); uint32_t half_mask = 0x11111111u << lane; - Temp mask_tmp = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), Operand(half_mask), - Operand(half_mask)); + Temp mask_tmp = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), + Operand::c32(half_mask), Operand::c32(half_mask)); Temp tmp = bld.tmp(bld.lm); bld.sop1(Builder::s_wqm, Definition(tmp), bld.sop2(Builder::s_and, bld.def(bld.lm), bld.def(s1, scc), mask_tmp, @@ -8501,13 +8537,13 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) if (instr->dest.ssa.bit_size == 1) { assert(src.regClass() == bld.lm); - src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), - Operand((uint32_t)-1), src); + src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), + Operand::c32(-1), src); if (ctx->program->chip_class >= GFX8) src = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1), src, dpp_ctrl); else src = bld.ds(aco_opcode::ds_swizzle_b32, bld.def(v1), src, dpp_ctrl); - Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), src); + Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), src); emit_wqm(bld, tmp, dst); } else if (instr->dest.ssa.bit_size == 8) { Temp tmp = bld.tmp(v1); @@ -8561,10 +8597,10 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) if (instr->dest.ssa.bit_size == 1) { assert(src.regClass() == bld.lm); - src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), - Operand((uint32_t)-1), src); + src = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), + Operand::c32(-1), src); src = emit_masked_swizzle(ctx, bld, src, mask); - Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand(0u), src); + Temp tmp = bld.vopc(aco_opcode::v_cmp_lg_u32, bld.def(bld.lm), Operand::zero(), src); emit_wqm(bld, tmp, dst); } else if (dst.regClass() == v1b) { Temp tmp = emit_wqm(bld, emit_masked_swizzle(ctx, bld, src, mask)); @@ -8654,7 +8690,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) break; } case nir_intrinsic_demote: - bld.pseudo(aco_opcode::p_demote_to_helper, Operand(-1u)); + bld.pseudo(aco_opcode::p_demote_to_helper, Operand::c32(-1u)); if (ctx->block->loop_nest_depth || ctx->cf_info.parent_if.is_divergent) ctx->cf_info.exec_potentially_empty_discard = true; @@ -8682,15 +8718,15 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) case nir_intrinsic_last_invocation: { Temp flbit = bld.sop1(Builder::s_flbit_i32, bld.def(s1), Operand(exec, bld.lm)); Temp last = bld.sop2(aco_opcode::s_sub_i32, bld.def(s1), bld.def(s1, scc), - Operand(ctx->program->wave_size - 1u), flbit); + Operand::c32(ctx->program->wave_size - 1u), flbit); emit_wqm(bld, last, get_ssa_temp(ctx, &instr->dest.ssa)); break; } case nir_intrinsic_elect: { Temp first = bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm)); - emit_wqm(bld, - bld.sop2(Builder::s_lshl, bld.def(bld.lm), bld.def(s1, scc), Operand(1u), first), - get_ssa_temp(ctx, &instr->dest.ssa)); + emit_wqm( + bld, bld.sop2(Builder::s_lshl, bld.def(bld.lm), bld.def(s1, scc), Operand::c32(1u), first), + get_ssa_temp(ctx, &instr->dest.ssa)); break; } case nir_intrinsic_shader_clock: { @@ -8699,7 +8735,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) ctx->options->chip_class >= GFX10_3) { /* "((size - 1) << 11) | register" (SHADER_CYCLES is encoded as register 29) */ Temp clock = bld.sopk(aco_opcode::s_getreg_b32, bld.def(s1), ((20 - 1) << 11) | 29); - bld.pseudo(aco_opcode::p_create_vector, Definition(dst), clock, Operand(0u)); + bld.pseudo(aco_opcode::p_create_vector, Definition(dst), clock, Operand::zero()); } else { aco_opcode opcode = nir_intrinsic_memory_scope(instr) == NIR_SCOPE_DEVICE ? aco_opcode::s_memrealtime @@ -8739,13 +8775,13 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) if (ctx->shader->info.stage == MESA_SHADER_GEOMETRY) { if (ctx->options->chip_class >= GFX10) - bld.vop2_e64(aco_opcode::v_and_b32, Definition(dst), Operand(127u), + bld.vop2_e64(aco_opcode::v_and_b32, Definition(dst), Operand::c32(127u), get_arg(ctx, ctx->args->ac.gs_invocation_id)); else bld.copy(Definition(dst), get_arg(ctx, ctx->args->ac.gs_invocation_id)); } else if (ctx->shader->info.stage == MESA_SHADER_TESS_CTRL) { bld.vop3(aco_opcode::v_bfe_u32, Definition(dst), get_arg(ctx, ctx->args->ac.tcs_rel_ids), - Operand(8u), Operand(5u)); + Operand::c32(8u), Operand::c32(5u)); } else { unreachable("Unsupported stage for load_invocation_id"); } @@ -8782,7 +8818,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) ctx->shader->info.stage == MESA_SHADER_TESS_EVAL); Temp dst = get_ssa_temp(ctx, &instr->dest.ssa); - bld.copy(Definition(dst), Operand(ctx->args->options->key.tcs.input_vertices)); + bld.copy(Definition(dst), Operand::c32(ctx->args->options->key.tcs.input_vertices)); break; } case nir_intrinsic_emit_vertex_with_counter: { @@ -8809,7 +8845,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) } case nir_intrinsic_load_ring_tess_factors_amd: { bld.smem(aco_opcode::s_load_dwordx4, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), - ctx->program->private_segment_buffer, Operand(RING_HS_TESS_FACTOR * 16u)); + ctx->program->private_segment_buffer, Operand::c32(RING_HS_TESS_FACTOR * 16u)); break; } case nir_intrinsic_load_ring_tess_factors_offset_amd: { @@ -8819,7 +8855,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) } case nir_intrinsic_load_ring_tess_offchip_amd: { bld.smem(aco_opcode::s_load_dwordx4, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), - ctx->program->private_segment_buffer, Operand(RING_HS_TESS_OFFCHIP * 16u)); + ctx->program->private_segment_buffer, Operand::c32(RING_HS_TESS_OFFCHIP * 16u)); break; } case nir_intrinsic_load_ring_tess_offchip_offset_amd: { @@ -8830,7 +8866,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) case nir_intrinsic_load_ring_esgs_amd: { unsigned ring = ctx->stage.hw == HWStage::ES ? RING_ESGS_VS : RING_ESGS_GS; bld.smem(aco_opcode::s_load_dwordx4, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), - ctx->program->private_segment_buffer, Operand(ring * 16u)); + ctx->program->private_segment_buffer, Operand::c32(ring * 16u)); break; } case nir_intrinsic_load_ring_es2gs_offset_amd: { @@ -8858,7 +8894,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) instr->intrinsic == nir_intrinsic_load_workgroup_num_input_vertices_amd ? 12 : 22; bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), bld.def(s1, scc), get_arg(ctx, ctx->args->ac.gs_tg_info), - Operand(pos | (9u << 16u))); + Operand::c32(pos | (9u << 16u))); break; } case nir_intrinsic_load_initial_edgeflag_amd: { @@ -8868,7 +8904,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) Temp gs_invocation_id = get_arg(ctx, ctx->args->ac.gs_invocation_id); bld.vop3(aco_opcode::v_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)), - gs_invocation_id, Operand(8u + i), Operand(1u)); + gs_invocation_id, Operand::c32(8u + i), Operand::c32(1u)); break; } case nir_intrinsic_load_packed_passthrough_primitive_amd: { @@ -8907,8 +8943,9 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr) } case nir_intrinsic_load_shader_query_enabled_amd: { unsigned cmp_bit = 0; - Temp shader_query_enabled = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), - get_arg(ctx, ctx->args->ngg_gs_state), Operand(cmp_bit)); + Temp shader_query_enabled = + bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), + get_arg(ctx, ctx->args->ngg_gs_state), Operand::c32(cmp_bit)); bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), bool_to_vector_condition(ctx, shader_query_enabled)); break; @@ -8999,15 +9036,15 @@ build_cube_select(isel_context* ctx, Temp ma, Temp id, Temp deriv, Temp* out_ma, Temp deriv_y = emit_extract_vector(ctx, deriv, 1, v1); Temp deriv_z = emit_extract_vector(ctx, deriv, 2, v1); - Operand neg_one(0xbf800000u); - Operand one(0x3f800000u); - Operand two(0x40000000u); - Operand four(0x40800000u); + Operand neg_one = Operand::c32(0xbf800000u); + Operand one = Operand::c32(0x3f800000u); + Operand two = Operand::c32(0x40000000u); + Operand four = Operand::c32(0x40800000u); Temp is_ma_positive = - bld.vopc(aco_opcode::v_cmp_le_f32, bld.hint_vcc(bld.def(bld.lm)), Operand(0u), ma); + bld.vopc(aco_opcode::v_cmp_le_f32, bld.hint_vcc(bld.def(bld.lm)), Operand::zero(), ma); Temp sgn_ma = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), neg_one, one, is_ma_positive); - Temp neg_sgn_ma = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), Operand(0u), sgn_ma); + Temp neg_sgn_ma = bld.vop2(aco_opcode::v_sub_f32, bld.def(v1), Operand::zero(), sgn_ma); Temp is_ma_z = bld.vopc(aco_opcode::v_cmp_le_f32, bld.hint_vcc(bld.def(bld.lm)), four, id); Temp is_ma_y = bld.vopc(aco_opcode::v_cmp_le_f32, bld.def(bld.lm), two, id); @@ -9031,7 +9068,7 @@ build_cube_select(isel_context* ctx, Temp ma, Temp id, Temp deriv, Temp* out_ma, tmp = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), deriv_x, deriv_y, is_ma_y), deriv_z, is_ma_z); - tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0x7fffffffu), tmp); + tmp = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x7fffffffu), tmp); *out_ma = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), two, tmp); } @@ -9051,7 +9088,7 @@ prepare_cube_coords(isel_context* ctx, std::vector& coords, Temp* ddx, Tem /* see comment in ac_prepare_cube_coords() */ if (ctx->options->chip_class <= GFX8) - coords[3] = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), coords[3]); + coords[3] = bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand::zero(), coords[3]); } ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), coords[0], coords[1], coords[2]); @@ -9066,11 +9103,11 @@ prepare_cube_coords(isel_context* ctx, std::vector& coords, Temp* ddx, Tem sc = bld.vop3(aco_opcode::v_cubesc_f32, bld.def(v1), coords[0], coords[1], coords[2]); if (!is_deriv) - sc = bld.vop2(madak, bld.def(v1), sc, invma, Operand(0x3fc00000u /*1.5*/)); + sc = bld.vop2(madak, bld.def(v1), sc, invma, Operand::c32(0x3fc00000u /*1.5*/)); tc = bld.vop3(aco_opcode::v_cubetc_f32, bld.def(v1), coords[0], coords[1], coords[2]); if (!is_deriv) - tc = bld.vop2(madak, bld.def(v1), tc, invma, Operand(0x3fc00000u /*1.5*/)); + tc = bld.vop2(madak, bld.def(v1), tc, invma, Operand::c32(0x3fc00000u /*1.5*/)); id = bld.vop3(aco_opcode::v_cubeid_f32, bld.def(v1), coords[0], coords[1], coords[2]); @@ -9095,12 +9132,12 @@ prepare_cube_coords(isel_context* ctx, std::vector& coords, Temp* ddx, Tem *(i ? ddy : ddx) = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), x, y); } - sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand(0x3fc00000u /*1.5*/), sc); - tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand(0x3fc00000u /*1.5*/), tc); + sc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3fc00000u /*1.5*/), sc); + tc = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), Operand::c32(0x3fc00000u /*1.5*/), tc); } if (is_array) - id = bld.vop2(madmk, bld.def(v1), coords[3], id, Operand(0x41000000u /*8.0*/)); + id = bld.vop2(madmk, bld.def(v1), coords[3], id, Operand::c32(0x41000000u /*8.0*/)); coords.resize(3); coords[0] = sc; coords[1] = tc; @@ -9224,12 +9261,12 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) continue; acc = emit_extract_vector(ctx, offset, i, s1); - acc = - bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), acc, Operand(0x3Fu)); + acc = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), acc, + Operand::c32(0x3Fu)); if (i) { acc = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), acc, - Operand(8u * i)); + Operand::c32(8u * i)); } if (pack == Temp()) { @@ -9241,17 +9278,17 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) if (pack_const && pack != Temp()) pack = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), - Operand(pack_const), pack); + Operand::c32(pack_const), pack); } else { for (unsigned i = 0; i < offset.size(); i++) { if (const_offset[i]) continue; acc = emit_extract_vector(ctx, offset, i, v1); - acc = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(0x3Fu), acc); + acc = bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(0x3Fu), acc); if (i) { - acc = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(8u * i), acc); + acc = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(8u * i), acc); } if (pack == Temp()) { @@ -9262,10 +9299,10 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) } if (pack_const && pack != Temp()) - pack = bld.sop2(aco_opcode::v_or_b32, bld.def(v1), Operand(pack_const), pack); + pack = bld.sop2(aco_opcode::v_or_b32, bld.def(v1), Operand::c32(pack_const), pack); } if (pack_const && pack == Temp()) - offset = bld.copy(bld.def(v1), Operand(pack_const)); + offset = bld.copy(bld.def(v1), Operand::c32(pack_const)); else if (pack == Temp()) has_offset = false; else @@ -9280,7 +9317,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) if (has_ddx || has_ddy) { if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && ctx->options->chip_class == GFX9) { assert(has_ddx && has_ddy && ddx.size() == 1 && ddy.size() == 1); - Temp zero = bld.copy(bld.def(v1), Operand(0u)); + Temp zero = bld.copy(bld.def(v1), Operand::zero()); derivs = {ddx, zero, ddy, zero}; } else { for (unsigned i = 0; has_ddx && i < ddx.size(); i++) @@ -9307,10 +9344,9 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) instr->op != nir_texop_lod && instr->coord_components) { assert(coords.size() > 0 && coords.size() < 3); - coords.insert( - std::next(coords.begin()), - bld.copy(bld.def(v1), instr->op == nir_texop_txf ? Operand((uint32_t)0) - : Operand((uint32_t)0x3f000000))); + coords.insert(std::next(coords.begin()), + bld.copy(bld.def(v1), instr->op == nir_texop_txf ? Operand::c32(0) + : Operand::c32(0x3f000000))); } bool da = should_declare_array(ctx, instr->sampler_dim, instr->is_array); @@ -9325,7 +9361,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) assert(has_sample_index); Operand op(sample_index); if (sample_index_cv) - op = Operand(sample_index_cv->u32); + op = Operand::c32(sample_index_cv->u32); sample_index = adjust_sample_index_using_fmask(ctx, da, coords, op, fmask_ptr); } @@ -9368,7 +9404,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels) { if (!has_lod) - lod = bld.copy(bld.def(v1), Operand(0u)); + lod = bld.copy(bld.def(v1), Operand::zero()); bool div_by_6 = instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && instr->is_array && (dmask & (1 << 2)); @@ -9391,7 +9427,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) if (div_by_6) { /* divide 3rd value by 6 by multiplying with magic number */ emit_split_vector(ctx, tmp_dst, tmp_dst.size()); - Temp c = bld.copy(bld.def(s1), Operand((uint32_t)0x2AAAAAAB)); + Temp c = bld.copy(bld.def(s1), Operand::c32(0x2AAAAAAB)); Temp by_6 = bld.vop3(aco_opcode::v_mul_hi_i32, bld.def(v1), emit_extract_vector(ctx, tmp_dst, 2, v1), c); assert(instr->dest.ssa.num_components == 3); @@ -9408,7 +9444,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) Temp tg4_compare_cube_wa64 = Temp(); if (tg4_integer_workarounds) { - Temp tg4_lod = bld.copy(bld.def(v1), Operand(0u)); + Temp tg4_lod = bld.copy(bld.def(v1), Operand::zero()); Temp size = bld.tmp(v2); MIMG_instruction* tex = emit_mimg(bld, aco_opcode::image_get_resinfo, Definition(size), resource, Operand(s4), std::vector{tg4_lod}); @@ -9422,8 +9458,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) half_texel[i] = emit_extract_vector(ctx, size, i, v1); half_texel[i] = bld.vop1(aco_opcode::v_cvt_f32_i32, bld.def(v1), half_texel[i]); half_texel[i] = bld.vop1(aco_opcode::v_rcp_iflag_f32, bld.def(v1), half_texel[i]); - half_texel[i] = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0xbf000000 /*-0.5*/), - half_texel[i]); + half_texel[i] = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), + Operand::c32(0xbf000000 /*-0.5*/), half_texel[i]); } if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D && !instr->is_array) { @@ -9436,13 +9472,13 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) */ unsigned bit_idx = ffs(S_008F30_FORCE_UNNORMALIZED(1)) - 1; Temp not_needed = - bld.sopc(aco_opcode::s_bitcmp0_b32, bld.def(s1, scc), sampler, Operand(bit_idx)); + bld.sopc(aco_opcode::s_bitcmp0_b32, bld.def(s1, scc), sampler, Operand::c32(bit_idx)); not_needed = bool_to_vector_condition(ctx, not_needed); half_texel[0] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), - Operand(0xbf000000 /*-0.5*/), half_texel[0], not_needed); + Operand::c32(0xbf000000 /*-0.5*/), half_texel[0], not_needed); half_texel[1] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), - Operand(0xbf000000 /*-0.5*/), half_texel[1], not_needed); + Operand::c32(0xbf000000 /*-0.5*/), half_texel[1], not_needed); } Temp new_coords[2] = {bld.vop2(aco_opcode::v_add_f32, bld.def(v1), coords[0], half_texel[0]), @@ -9461,29 +9497,28 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) ctx->block->instructions.emplace_back(std::move(split)); Temp dfmt = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), desc[1], - Operand(20u | (6u << 16))); + Operand::c32(20u | (6u << 16))); Temp compare_cube_wa = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), dfmt, - Operand((uint32_t)V_008F14_IMG_DATA_FORMAT_8_8_8_8)); + Operand::c32(V_008F14_IMG_DATA_FORMAT_8_8_8_8)); Temp nfmt; if (stype == GLSL_TYPE_UINT) { - nfmt = - bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), - Operand((uint32_t)V_008F14_IMG_NUM_FORMAT_USCALED), - Operand((uint32_t)V_008F14_IMG_NUM_FORMAT_UINT), bld.scc(compare_cube_wa)); + nfmt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), + Operand::c32(V_008F14_IMG_NUM_FORMAT_USCALED), + Operand::c32(V_008F14_IMG_NUM_FORMAT_UINT), bld.scc(compare_cube_wa)); } else { - nfmt = - bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), - Operand((uint32_t)V_008F14_IMG_NUM_FORMAT_SSCALED), - Operand((uint32_t)V_008F14_IMG_NUM_FORMAT_SINT), bld.scc(compare_cube_wa)); + nfmt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), + Operand::c32(V_008F14_IMG_NUM_FORMAT_SSCALED), + Operand::c32(V_008F14_IMG_NUM_FORMAT_SINT), bld.scc(compare_cube_wa)); } tg4_compare_cube_wa64 = bld.tmp(bld.lm); bool_to_vector_condition(ctx, compare_cube_wa, tg4_compare_cube_wa64); - nfmt = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), nfmt, Operand(26u)); + nfmt = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), nfmt, + Operand::c32(26u)); desc[1] = bld.sop2(aco_opcode::s_and_b32, bld.def(s1), bld.def(s1, scc), desc[1], - Operand((uint32_t)C_008F14_NUM_FORMAT)); + Operand::c32(C_008F14_NUM_FORMAT)); desc[1] = bld.sop2(aco_opcode::s_or_b32, bld.def(s1), bld.def(s1, scc), desc[1], nfmt); aco_ptr vec{create_instruction( @@ -9521,7 +9556,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) create_instruction(op, Format::MUBUF, 3 + instr->is_sparse, 1)}; mubuf->operands[0] = Operand(resource); mubuf->operands[1] = Operand(coords[0]); - mubuf->operands[2] = Operand((uint32_t)0); + mubuf->operands[2] = Operand::c32(0); mubuf->definitions[0] = Definition(tmp_dst); mubuf->idxen = true; mubuf->tfe = instr->is_sparse; @@ -9577,7 +9612,7 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr) assert(dmask == 1 && dst.regClass() == bld.lm); assert(dst.id() != tmp_dst.id()); - bld.vopc(aco_opcode::v_cmp_eq_u32, Definition(dst), Operand(0u), tmp_dst) + bld.vopc(aco_opcode::v_cmp_eq_u32, Definition(dst), Operand::zero(), tmp_dst) .def(0) .setHint(vcc); } else { @@ -9746,9 +9781,11 @@ get_phi_operand(isel_context* ctx, nir_ssa_def* ssa, RegClass rc, bool logical) } else if (logical && ssa->bit_size == 1 && ssa->parent_instr->type == nir_instr_type_load_const) { if (ctx->program->wave_size == 64) - return Operand(nir_instr_as_load_const(ssa->parent_instr)->value[0].b ? UINT64_MAX : 0u); + return Operand::c64(nir_instr_as_load_const(ssa->parent_instr)->value[0].b ? UINT64_MAX + : 0u); else - return Operand(nir_instr_as_load_const(ssa->parent_instr)->value[0].b ? UINT32_MAX : 0u); + return Operand::c32(nir_instr_as_load_const(ssa->parent_instr)->value[0].b ? UINT32_MAX + : 0u); } else { return Operand(tmp); } @@ -9858,12 +9895,12 @@ visit_undef(isel_context* ctx, nir_ssa_undef_instr* instr) assert(dst.type() == RegType::sgpr); if (dst.size() == 1) { - Builder(ctx->program, ctx->block).copy(Definition(dst), Operand(0u)); + Builder(ctx->program, ctx->block).copy(Definition(dst), Operand::zero()); } else { aco_ptr vec{create_instruction( aco_opcode::p_create_vector, Format::PSEUDO, dst.size(), 1)}; for (unsigned i = 0; i < dst.size(); i++) - vec->operands[i] = Operand(0u); + vec->operands[i] = Operand::zero(); vec->definitions[0] = Definition(dst); ctx->block->instructions.emplace_back(std::move(vec)); } @@ -10157,7 +10194,7 @@ visit_loop(isel_context* ctx, nir_loop* loop) */ if (nir_cf_node_cf_tree_next(&loop->cf_node)->predecessors->entries == 0) { Builder bld(ctx->program, ctx->block); - Temp cond = bld.copy(bld.def(s1, scc), Operand(0u)); + Temp cond = bld.copy(bld.def(s1, scc), Operand::zero()); if_context ic; begin_uniform_if_then(ctx, &ic, cond); emit_loop_break(ctx); @@ -10618,7 +10655,7 @@ export_vs_psiz_layer_viewport_vrs(isel_context* ctx, int* next_pos) } else { Builder bld(ctx->program, ctx->block); - Temp out = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(16u), + Temp out = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(16u), Operand(ctx->outputs.temps[VARYING_SLOT_VIEWPORT * 4u])); if (exp->operands[2].isTemp()) out = bld.vop2(aco_opcode::v_or_b32, bld.def(v1), Operand(out), exp->operands[2]); @@ -10644,13 +10681,13 @@ export_vs_psiz_layer_viewport_vrs(isel_context* ctx, int* next_pos) * at the same time. */ Builder bld(ctx->program, ctx->block); - Temp rates = bld.copy(bld.def(v1), Operand((unsigned)ctx->options->force_vrs_rates)); + Temp rates = bld.copy(bld.def(v1), Operand::c32((unsigned)ctx->options->force_vrs_rates)); /* If Pos.W != 1 (typical for non-GUI elements), use 2x2 coarse shading. */ - Temp cond = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), Operand(0x3f800000u), + Temp cond = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), Operand::c32(0x3f800000u), Operand(ctx->outputs.temps[VARYING_SLOT_POS + 3])); - rates = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), bld.copy(bld.def(v1), Operand(0u)), - rates, cond); + rates = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), + bld.copy(bld.def(v1), Operand::zero()), rates, cond); exp->operands[1] = Operand(rates); exp->enabled_mask |= 0x2; @@ -10746,7 +10783,7 @@ export_fs_mrt_z(isel_context* ctx) if (ctx->program->info->ps.writes_stencil) { /* Stencil should be in X[23:16]. */ values[0] = Operand(ctx->outputs.temps[FRAG_RESULT_STENCIL * 4u]); - values[0] = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(16u), values[0]); + values[0] = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(16u), values[0]); enabled_channels |= 0x3; } @@ -10824,9 +10861,9 @@ export_fs_mrt_color(isel_context* ctx, int slot) continue; Temp isnan = bld.vopc(aco_opcode::v_cmp_class_f32, bld.hint_vcc(bld.def(bld.lm)), - values[i], bld.copy(bld.def(v1), Operand(3u))); + values[i], bld.copy(bld.def(v1), Operand::c32(3u))); values[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), values[i], - bld.copy(bld.def(v1), Operand(0u)), isnan); + bld.copy(bld.def(v1), Operand::zero()), isnan); } } @@ -10859,8 +10896,8 @@ export_fs_mrt_color(isel_context* ctx, int slot) } else if (ctx->options->chip_class == GFX8 || ctx->options->chip_class == GFX9) { values[i] = bld.vop3(aco_opcode::v_cvt_pkrtz_f16_f32_e64, bld.def(v1), - values[i * 2].isUndefined() ? Operand(0u) : values[i * 2], - values[i * 2 + 1].isUndefined() ? Operand(0u) : values[i * 2 + 1]); + values[i * 2].isUndefined() ? Operand::zero() : values[i * 2], + values[i * 2 + 1].isUndefined() ? Operand::zero() : values[i * 2 + 1]); } else { values[i] = bld.vop2(aco_opcode::v_cvt_pkrtz_f16_f32, bld.def(v1), @@ -10897,13 +10934,13 @@ export_fs_mrt_color(isel_context* ctx, int slot) if (is_int8 || is_int10) { /* clamp */ uint32_t max_rgb = is_int8 ? 255 : is_int10 ? 1023 : 0; - Temp max_rgb_val = bld.copy(bld.def(s1), Operand(max_rgb)); + Temp max_rgb_val = bld.copy(bld.def(s1), Operand::c32(max_rgb)); for (unsigned i = 0; i < 4; i++) { if ((write_mask >> i) & 1) { values[i] = bld.vop2(aco_opcode::v_min_u32, bld.def(v1), - i == 3 && is_int10 ? Operand(3u) : Operand(max_rgb_val), values[i]); + i == 3 && is_int10 ? Operand::c32(3u) : Operand(max_rgb_val), values[i]); } } } else if (is_16bit) { @@ -10923,17 +10960,17 @@ export_fs_mrt_color(isel_context* ctx, int slot) /* clamp */ uint32_t max_rgb = is_int8 ? 127 : is_int10 ? 511 : 0; uint32_t min_rgb = is_int8 ? -128 : is_int10 ? -512 : 0; - Temp max_rgb_val = bld.copy(bld.def(s1), Operand(max_rgb)); - Temp min_rgb_val = bld.copy(bld.def(s1), Operand(min_rgb)); + Temp max_rgb_val = bld.copy(bld.def(s1), Operand::c32(max_rgb)); + Temp min_rgb_val = bld.copy(bld.def(s1), Operand::c32(min_rgb)); for (unsigned i = 0; i < 4; i++) { if ((write_mask >> i) & 1) { values[i] = bld.vop2(aco_opcode::v_min_i32, bld.def(v1), - i == 3 && is_int10 ? Operand(1u) : Operand(max_rgb_val), values[i]); - values[i] = - bld.vop2(aco_opcode::v_max_i32, bld.def(v1), - i == 3 && is_int10 ? Operand(-2u) : Operand(min_rgb_val), values[i]); + i == 3 && is_int10 ? Operand::c32(1u) : Operand(max_rgb_val), values[i]); + values[i] = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), + i == 3 && is_int10 ? Operand::c32(-2u) : Operand(min_rgb_val), + values[i]); } } } else if (is_16bit) { @@ -10958,9 +10995,9 @@ export_fs_mrt_color(isel_context* ctx, int slot) bool enabled = (write_mask >> (i * 2)) & 0x3; if (enabled) { enabled_channels |= 0x3 << (i * 2); - values[i] = bld.vop3(compr_op, bld.def(v1), - values[i * 2].isUndefined() ? Operand(0u) : values[i * 2], - values[i * 2 + 1].isUndefined() ? Operand(0u) : values[i * 2 + 1]); + values[i] = bld.vop3( + compr_op, bld.def(v1), values[i * 2].isUndefined() ? Operand::zero() : values[i * 2], + values[i * 2 + 1].isUndefined() ? Operand::zero() : values[i * 2 + 1]); } else { values[i] = Operand(v1); } @@ -11060,7 +11097,7 @@ emit_stream_output(isel_context* ctx, Temp const* so_buffers, Temp const* so_wri aco_opcode::p_create_vector, Format::PSEUDO, count, 1)}; for (int i = 0; i < count; ++i) vec->operands[i] = - (ctx->outputs.mask[loc] & 1 << (start + i)) ? Operand(out[start + i]) : Operand(0u); + (ctx->outputs.mask[loc] & 1 << (start + i)) ? Operand(out[start + i]) : Operand::zero(); vec->definitions[0] = Definition(write_data); ctx->block->instructions.emplace_back(std::move(vec)); @@ -11077,13 +11114,13 @@ emit_stream_output(isel_context* ctx, Temp const* so_buffers, Temp const* so_wri create_instruction(opcode, Format::MUBUF, 4, 0)}; store->operands[0] = Operand(so_buffers[buf]); store->operands[1] = Operand(so_write_offset[buf]); - store->operands[2] = Operand((uint32_t)0); + store->operands[2] = Operand::c32(0); store->operands[3] = Operand(write_data); if (offset > 4095) { /* Don't think this can happen in RADV, but maybe GL? It's easy to do this anyway. */ Builder bld(ctx->program, ctx->block); store->operands[0] = - bld.vadd32(bld.def(v1), Operand(offset), Operand(so_write_offset[buf])); + bld.vadd32(bld.def(v1), Operand::c32(offset), Operand(so_write_offset[buf])); } else { store->offset = offset; } @@ -11107,12 +11144,13 @@ emit_streamout(isel_context* ctx, unsigned stream) if (!stride) continue; - Operand off = bld.copy(bld.def(s1), Operand(i * 16u)); + Operand off = bld.copy(bld.def(s1), Operand::c32(i * 16u)); so_buffers[i] = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), buf_ptr, off); } - Temp so_vtx_count = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), - get_arg(ctx, ctx->args->ac.streamout_config), Operand(0x70010u)); + Temp so_vtx_count = + bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), + get_arg(ctx, ctx->args->ac.streamout_config), Operand::c32(0x70010u)); Temp tid = emit_mbcnt(ctx, bld.tmp(v1)); @@ -11140,10 +11178,10 @@ emit_streamout(isel_context* ctx, unsigned stream) Temp new_offset = bld.vadd32(bld.def(v1), offset, tid); so_write_offset[i] = - bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(2u), new_offset); + bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), new_offset); } else { Temp offset = bld.v_mul_imm(bld.def(v1), so_write_index, stride * 4u); - Temp offset2 = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand(4u), + Temp offset2 = bld.sop2(aco_opcode::s_mul_i32, bld.def(s1), Operand::c32(4u), get_arg(ctx, ctx->args->ac.streamout_offset[i])); so_write_offset[i] = bld.vadd32(bld.def(v1), offset, offset2); } @@ -11227,9 +11265,9 @@ fix_ls_vgpr_init_bug(isel_context* ctx, Pseudo_instruction* startpgm) assert(ctx->shader->info.stage == MESA_SHADER_VERTEX); Builder bld(ctx->program, ctx->block); constexpr unsigned hs_idx = 1u; - Builder::Result hs_thread_count = - bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), - get_arg(ctx, ctx->args->ac.merged_wave_info), Operand((8u << 16) | (hs_idx * 8u))); + Builder::Result hs_thread_count = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), + get_arg(ctx, ctx->args->ac.merged_wave_info), + Operand::c32((8u << 16) | (hs_idx * 8u))); Temp ls_has_nonzero_hs_threads = bool_to_vector_condition(ctx, hs_thread_count.def(1).getTemp()); /* If there are no HS threads, SPI mistakenly loads the LS VGPRs starting at VGPR 0. */ @@ -11277,7 +11315,7 @@ handle_bc_optimize(isel_context* ctx) ctx->linear_centroid = get_arg(ctx, ctx->args->ac.linear_centroid); if (uses_center && uses_centroid) { Temp sel = bld.vopc_e64(aco_opcode::v_cmp_lt_i32, bld.hint_vcc(bld.def(bld.lm)), - get_arg(ctx, ctx->args->ac.prim_mask), Operand(0u)); + get_arg(ctx, ctx->args->ac.prim_mask), Operand::zero()); if (G_0286CC_PERSP_CENTROID_ENA(spi_ps_input_ena)) { Temp new_coord[2]; @@ -11386,7 +11424,7 @@ lanecount_to_mask(isel_context* ctx, Temp count, bool allow64 = true) assert(count.regClass() == s1); Builder bld(ctx->program, ctx->block); - Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand(0u)); + Temp mask = bld.sop2(aco_opcode::s_bfm_b64, bld.def(s2), count, Operand::zero()); Temp cond; if (ctx->program->wave_size == 64) { @@ -11395,9 +11433,10 @@ lanecount_to_mask(isel_context* ctx, Temp count, bool allow64 = true) return mask; /* Special case for 64 active invocations, because 64 doesn't work with s_bfm */ - Temp active_64 = - bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count, Operand(6u /* log2(64) */)); - cond = bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand(-1u), mask, bld.scc(active_64)); + Temp active_64 = bld.sopc(aco_opcode::s_bitcmp1_b32, bld.def(s1, scc), count, + Operand::c32(6u /* log2(64) */)); + cond = + bld.sop2(Builder::s_cselect, bld.def(bld.lm), Operand::c32(-1u), mask, bld.scc(active_64)); } else { /* We use s_bfm_b64 (not _b32) which works with 32, but we need to extract the lower half of * the register */ @@ -11413,9 +11452,10 @@ merged_wave_info_to_mask(isel_context* ctx, unsigned i) Builder bld(ctx->program, ctx->block); /* lanecount_to_mask() only cares about s0.u[6:0] so we don't need either s_bfe nor s_and here */ - Temp count = i == 0 ? get_arg(ctx, ctx->args->ac.merged_wave_info) - : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), - get_arg(ctx, ctx->args->ac.merged_wave_info), Operand(i * 8u)); + Temp count = i == 0 + ? get_arg(ctx, ctx->args->ac.merged_wave_info) + : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc), + get_arg(ctx, ctx->args->ac.merged_wave_info), Operand::c32(i * 8u)); return lanecount_to_mask(ctx, count); } @@ -11430,16 +11470,16 @@ ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt) if (ctx->program->chip_class == GFX10 && ctx->stage.has(SWStage::GS)) { /* Navi 1x workaround: make sure to always export at least 1 vertex and triangle */ - prm_cnt_0 = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), prm_cnt, Operand(0u)); - prm_cnt = - bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand(1u), prm_cnt, bld.scc(prm_cnt_0)); - vtx_cnt = - bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand(1u), vtx_cnt, bld.scc(prm_cnt_0)); + prm_cnt_0 = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), prm_cnt, Operand::zero()); + prm_cnt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(1u), prm_cnt, + bld.scc(prm_cnt_0)); + vtx_cnt = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1), Operand::c32(1u), vtx_cnt, + bld.scc(prm_cnt_0)); } /* Put the number of vertices and primitives into m0 for the GS_ALLOC_REQ */ Temp tmp = - bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), prm_cnt, Operand(12u)); + bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), prm_cnt, Operand::c32(12u)); tmp = bld.sop2(aco_opcode::s_or_b32, bld.m0(bld.def(s1)), bld.def(s1, scc), tmp, vtx_cnt); /* Request the SPI to allocate space for the primitives and vertices @@ -11453,9 +11493,9 @@ ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt) */ Temp first_lane = bld.sop1(Builder::s_ff1_i32, bld.def(s1), Operand(exec, bld.lm)); Temp cond = bld.sop2(Builder::s_lshl, bld.def(bld.lm), bld.def(s1, scc), - Operand(1u, ctx->program->wave_size == 64), first_lane); + Operand::c32_or_c64(1u, ctx->program->wave_size == 64), first_lane); cond = bld.sop2(Builder::s_cselect, bld.def(bld.lm), cond, - Operand(0u, ctx->program->wave_size == 64), bld.scc(prm_cnt_0)); + Operand::zero(ctx->program->wave_size == 64 ? 8 : 4), bld.scc(prm_cnt_0)); if_context ic_prim_0; begin_divergent_if_then(ctx, &ic_prim_0, cond); @@ -11463,9 +11503,9 @@ ngg_emit_sendmsg_gs_alloc_req(isel_context* ctx, Temp vtx_cnt, Temp prm_cnt) ctx->block->kind |= block_kind_export_end; /* Use zero: means that it's a triangle whose every vertex index is 0. */ - Temp zero = bld.copy(bld.def(v1), Operand(0u)); + Temp zero = bld.copy(bld.def(v1), Operand::zero()); /* Use NaN for the coordinates, so that the rasterizer allways culls it. */ - Temp nan_coord = bld.copy(bld.def(v1), Operand(-1u)); + Temp nan_coord = bld.copy(bld.def(v1), Operand::c32(-1u)); bld.exp(aco_opcode::exp, zero, Operand(v1), Operand(v1), Operand(v1), 1 /* enabled mask */, V_008DFC_SQ_EXP_PRIM /* dest */, false /* compressed */, true /* done */, @@ -11548,8 +11588,8 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const if (ctx.stage == vertex_geometry_gs || ctx.stage == tess_eval_geometry_gs) { ctx.gs_wave_id = bld.pseudo(aco_opcode::p_extract, bld.def(s1, m0), bld.def(s1, scc), - get_arg(&ctx, args->ac.merged_wave_info), Operand(2u), - Operand(8u), Operand(0u)); + get_arg(&ctx, args->ac.merged_wave_info), Operand::c32(2u), + Operand::c32(8u), Operand::zero()); } } else if (ctx.stage == geometry_gs) ctx.gs_wave_id = get_arg(&ctx, args->ac.gs_wave_id); @@ -11614,14 +11654,14 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_ Builder bld(ctx.program, ctx.block); Temp gsvs_ring = bld.smem(aco_opcode::s_load_dwordx4, bld.def(s4), - program->private_segment_buffer, Operand(RING_GSVS_VS * 16u)); + program->private_segment_buffer, Operand::c32(RING_GSVS_VS * 16u)); - Operand stream_id(0u); + Operand stream_id = Operand::zero(); if (args->shader_info->so.num_outputs) stream_id = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc), - get_arg(&ctx, ctx.args->ac.streamout_config), Operand(0x20018u)); + get_arg(&ctx, ctx.args->ac.streamout_config), Operand::c32(0x20018u)); - Temp vtx_offset = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(2u), + Temp vtx_offset = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(2u), get_arg(&ctx, ctx.args->ac.vertex_id)); std::stack if_contexts; @@ -11638,7 +11678,7 @@ select_gs_copy_shader(Program* program, struct nir_shader* gs_shader, ac_shader_ if (!stream_id.isConstant()) { Temp cond = - bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), stream_id, Operand(stream)); + bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), stream_id, Operand::c32(stream)); if_contexts.emplace(); begin_uniform_if_then(&ctx, &if_contexts.top(), cond); bld.reset(ctx.block); @@ -11724,10 +11764,10 @@ select_trap_handler_shader(Program* program, struct nir_shader* shader, ac_shade /* Load the buffer descriptor from TMA. */ bld.smem(aco_opcode::s_load_dwordx4, Definition(PhysReg{ttmp4}, s4), Operand(PhysReg{tma}, s2), - Operand(0u)); + Operand::zero()); /* Store TTMP0-TTMP1. */ - bld.smem(aco_opcode::s_buffer_store_dwordx2, Operand(PhysReg{ttmp4}, s4), Operand(0u), + bld.smem(aco_opcode::s_buffer_store_dwordx2, Operand(PhysReg{ttmp4}, s4), Operand::zero(), Operand(PhysReg{ttmp0}, s2), memory_sync_info(), true); uint32_t hw_regs_idx[] = { @@ -11743,8 +11783,8 @@ select_trap_handler_shader(Program* program, struct nir_shader* shader, ac_shade bld.sopk(aco_opcode::s_getreg_b32, Definition(PhysReg{ttmp8}, s1), ((20 - 1) << 11) | hw_regs_idx[i]); - bld.smem(aco_opcode::s_buffer_store_dword, Operand(PhysReg{ttmp4}, s4), Operand(8u + i * 4), - Operand(PhysReg{ttmp8}, s1), memory_sync_info(), true); + bld.smem(aco_opcode::s_buffer_store_dword, Operand(PhysReg{ttmp4}, s4), + Operand::c32(8u + i * 4), Operand(PhysReg{ttmp8}, s1), memory_sync_info(), true); } program->config->float_mode = program->blocks[0].fp_mode.val; diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h index a08b576..313c018 100644 --- a/src/amd/compiler/aco_ir.h +++ b/src/amd/compiler/aco_ir.h @@ -665,19 +665,19 @@ public: { if (val == 0x3e22f983 && bytes == 4 && chip >= GFX8) { /* 1/2 PI can be an inline constant on GFX8+ */ - Operand op((uint32_t)val); + Operand op = Operand::c32(val); op.setFixed(PhysReg{248}); return op; } if (bytes == 8) - return Operand(val); + return Operand::c64(val); else if (bytes == 4) - return Operand((uint32_t)val); + return Operand::c32(val); else if (bytes == 2) - return Operand((uint16_t)val); + return Operand::c16(val); assert(bytes == 1); - return Operand((uint8_t)val); + return Operand::c8(val); } static bool is_constant_representable(uint64_t val, unsigned bytes, bool zext = false, diff --git a/src/amd/compiler/aco_lower_phis.cpp b/src/amd/compiler/aco_lower_phis.cpp index 2b10318..cbce86e 100644 --- a/src/amd/compiler/aco_lower_phis.cpp +++ b/src/amd/compiler/aco_lower_phis.cpp @@ -141,7 +141,7 @@ build_merge_code(Program* program, Block* block, Definition dst, Operand prev, O if (!cur_is_constant) bld.sop2(Builder::s_orn2, dst, bld.def(s1, scc), cur, Operand(exec, bld.lm)); else if (cur.constantValue()) - bld.copy(dst, Operand(UINT32_MAX, bld.lm == s2)); + bld.copy(dst, Operand::c32_or_c64(UINT32_MAX, bld.lm == s2)); else bld.sop1(Builder::s_not, dst, bld.def(s1, scc), Operand(exec, bld.lm)); } else { @@ -150,7 +150,7 @@ build_merge_code(Program* program, Block* block, Definition dst, Operand prev, O else if (cur.constantValue()) bld.copy(dst, Operand(exec, bld.lm)); else - bld.copy(dst, Operand(0u, bld.lm == s2)); + bld.copy(dst, Operand::zero(bld.lm.bytes())); } } @@ -294,9 +294,9 @@ lower_subdword_phis(Program* program, Block* block, aco_ptr& phi) Temp tmp = bld.tmp(RegClass(RegType::vgpr, phi_src.size())); insert_before_logical_end(pred, bld.copy(Definition(tmp), phi_src).get_ptr()); Temp new_phi_src = bld.tmp(phi->definitions[0].regClass()); - insert_before_logical_end( - pred, bld.pseudo(aco_opcode::p_extract_vector, Definition(new_phi_src), tmp, Operand(0u)) - .get_ptr()); + insert_before_logical_end(pred, bld.pseudo(aco_opcode::p_extract_vector, + Definition(new_phi_src), tmp, Operand::zero()) + .get_ptr()); phi->operands[i].setTemp(new_phi_src); } diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp index 979eba3..8a9db76 100644 --- a/src/amd/compiler/aco_lower_to_hw_instr.cpp +++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp @@ -480,13 +480,13 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c Builder bld(ctx->program, &ctx->instructions); Operand identity[2]; - identity[0] = Operand(get_reduction_identity(reduce_op, 0)); - identity[1] = Operand(get_reduction_identity(reduce_op, 1)); + identity[0] = Operand::c32(get_reduction_identity(reduce_op, 0)); + identity[1] = Operand::c32(get_reduction_identity(reduce_op, 1)); Operand vcndmask_identity[2] = {identity[0], identity[1]}; /* First, copy the source to tmp and set inactive lanes to the identity */ bld.sop1(Builder::s_or_saveexec, Definition(stmp, bld.lm), Definition(scc, s1), - Definition(exec, bld.lm), Operand(UINT64_MAX), Operand(exec, bld.lm)); + Definition(exec, bld.lm), Operand::c64(UINT64_MAX), Operand(exec, bld.lm)); for (unsigned i = 0; i < src.size(); i++) { /* p_exclusive_scan needs it to be a sgpr or inline constant for the v_writelane_b32 @@ -530,8 +530,8 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c else opcode = aco_opcode::v_bfe_u32; - bld.vop3(opcode, Definition(PhysReg{tmp}, v1), Operand(PhysReg{tmp}, v1), Operand(0u), - Operand(8u)); + bld.vop3(opcode, Definition(PhysReg{tmp}, v1), Operand(PhysReg{tmp}, v1), Operand::zero(), + Operand::c32(8u)); } } else if (src.regClass() == v2b) { if (ctx->program->chip_class >= GFX10 && @@ -555,8 +555,8 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c else opcode = aco_opcode::v_bfe_u32; - bld.vop3(opcode, Definition(PhysReg{tmp}, v1), Operand(PhysReg{tmp}, v1), Operand(0u), - Operand(16u)); + bld.vop3(opcode, Definition(PhysReg{tmp}, v1), Operand(PhysReg{tmp}, v1), Operand::zero(), + Operand::c32(16u)); } } @@ -590,7 +590,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c emit_op(ctx, tmp, vtmp, tmp, PhysReg{0}, reduce_op, src.size()); for (unsigned i = 0; i < src.size(); i++) bld.readlane(Definition(PhysReg{dst.physReg() + i}, s1), Operand(PhysReg{tmp + i}, v1), - Operand(0u)); + Operand::zero()); // TODO: it would be more effective to do the last reduction step on SALU emit_op(ctx, tmp, dst.physReg(), tmp, vtmp, reduce_op, src.size()); reduction_needs_last_op = false; @@ -617,7 +617,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c /* GFX10+ doesn't support row_bcast15 and row_bcast31 */ for (unsigned i = 0; i < src.size(); i++) bld.vop3(aco_opcode::v_permlanex16_b32, Definition(PhysReg{vtmp + i}, v1), - Operand(PhysReg{tmp + i}, v1), Operand(0u), Operand(0u)); + Operand(PhysReg{tmp + i}, v1), Operand::zero(), Operand::zero()); if (cluster_size == 32) { reduction_needs_last_op = true; @@ -627,7 +627,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size()); for (unsigned i = 0; i < src.size(); i++) bld.readlane(Definition(PhysReg{dst.physReg() + i}, s1), Operand(PhysReg{tmp + i}, v1), - Operand(0u)); + Operand::zero()); // TODO: it would be more effective to do the last reduction step on SALU emit_op(ctx, tmp, dst.physReg(), tmp, vtmp, reduce_op, src.size()); break; @@ -650,24 +650,25 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c emit_dpp_mov(ctx, vtmp, tmp, src.size(), dpp_row_sr(1), 0xf, 0xf, true); /* fill in the gaps in rows 1 and 3 */ - bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand(0x10000u)); - bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand(0x10000u)); + bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0x10000u)); + bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand::c32(0x10000u)); for (unsigned i = 0; i < src.size(); i++) { Instruction* perm = bld.vop3(aco_opcode::v_permlanex16_b32, Definition(PhysReg{vtmp + i}, v1), - Operand(PhysReg{tmp + i}, v1), Operand(0xffffffffu), Operand(0xffffffffu)) + Operand(PhysReg{tmp + i}, v1), Operand::c32(0xffffffffu), + Operand::c32(0xffffffffu)) .instr; perm->vop3().opsel = 1; /* FI (Fetch Inactive) */ } - bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(UINT64_MAX)); + bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand::c64(UINT64_MAX)); if (ctx->program->wave_size == 64) { /* fill in the gap in row 2 */ for (unsigned i = 0; i < src.size(); i++) { bld.readlane(Definition(PhysReg{sitmp + i}, s1), Operand(PhysReg{tmp + i}, v1), - Operand(31u)); + Operand::c32(31u)); bld.writelane(Definition(PhysReg{vtmp + i}, v1), Operand(PhysReg{sitmp + i}, s1), - Operand(32u), Operand(PhysReg{vtmp + i}, v1)); + Operand::c32(32u), Operand(PhysReg{vtmp + i}, v1)); } } std::swap(tmp, vtmp); @@ -679,39 +680,41 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c emit_ds_swizzle(bld, vtmp, tmp, src.size(), (1 << 15) | dpp_quad_perm(0, 0, 1, 2)); emit_ds_swizzle(bld, tmp, tmp, src.size(), ds_pattern_bitmode(0x1F, 0x00, 0x07)); /* mirror(8) */ - bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand(0x10101010u)); + bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0x10101010u)); bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand(exec_lo, s1)); for (unsigned i = 0; i < src.size(); i++) bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{vtmp + i}, v1), Operand(PhysReg{tmp + i}, v1)); - bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(UINT64_MAX)); + bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(UINT64_MAX)); emit_ds_swizzle(bld, tmp, tmp, src.size(), ds_pattern_bitmode(0x1F, 0x00, 0x08)); /* swap(8) */ - bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand(0x01000100u)); + bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0x01000100u)); bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand(exec_lo, s1)); for (unsigned i = 0; i < src.size(); i++) bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{vtmp + i}, v1), Operand(PhysReg{tmp + i}, v1)); - bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(UINT64_MAX)); + bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(UINT64_MAX)); emit_ds_swizzle(bld, tmp, tmp, src.size(), ds_pattern_bitmode(0x1F, 0x00, 0x10)); /* swap(16) */ - bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand(1u), Operand(16u)); - bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand(1u), Operand(16u)); + bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand::c32(1u), + Operand::c32(16u)); + bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand::c32(1u), + Operand::c32(16u)); for (unsigned i = 0; i < src.size(); i++) bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg{vtmp + i}, v1), Operand(PhysReg{tmp + i}, v1)); - bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(UINT64_MAX)); + bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(UINT64_MAX)); for (unsigned i = 0; i < src.size(); i++) { - bld.writelane(Definition(PhysReg{vtmp + i}, v1), identity[i], Operand(0u), + bld.writelane(Definition(PhysReg{vtmp + i}, v1), identity[i], Operand::zero(), Operand(PhysReg{vtmp + i}, v1)); bld.readlane(Definition(PhysReg{sitmp + i}, s1), Operand(PhysReg{tmp + i}, v1), - Operand(0u)); + Operand::zero()); bld.writelane(Definition(PhysReg{vtmp + i}, v1), Operand(PhysReg{sitmp + i}, s1), - Operand(32u), Operand(PhysReg{vtmp + i}, v1)); - identity[i] = Operand(0u); /* prevent further uses of identity */ + Operand::c32(32u), Operand(PhysReg{vtmp + i}, v1)); + identity[i] = Operand::zero(); /* prevent further uses of identity */ } std::swap(tmp, vtmp); } @@ -722,7 +725,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c if (ctx->program->chip_class < GFX10) assert((identity[i].isConstant() && !identity[i].isLiteral()) || identity[i].physReg() == PhysReg{sitmp + i}); - bld.writelane(Definition(PhysReg{tmp + i}, v1), identity[i], Operand(0u), + bld.writelane(Definition(PhysReg{tmp + i}, v1), identity[i], Operand::zero(), Operand(PhysReg{tmp + i}, v1)); } } @@ -731,38 +734,41 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c assert(cluster_size == ctx->program->wave_size); if (ctx->program->chip_class <= GFX7) { emit_ds_swizzle(bld, vtmp, tmp, src.size(), ds_pattern_bitmode(0x1e, 0x00, 0x00)); - bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand(0xAAAAAAAAu)); + bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0xAAAAAAAAu)); bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand(exec_lo, s1)); emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size()); - bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(UINT64_MAX)); + bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(UINT64_MAX)); emit_ds_swizzle(bld, vtmp, tmp, src.size(), ds_pattern_bitmode(0x1c, 0x01, 0x00)); - bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand(0xCCCCCCCCu)); + bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0xCCCCCCCCu)); bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand(exec_lo, s1)); emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size()); - bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(UINT64_MAX)); + bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(UINT64_MAX)); emit_ds_swizzle(bld, vtmp, tmp, src.size(), ds_pattern_bitmode(0x18, 0x03, 0x00)); - bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand(0xF0F0F0F0u)); + bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0xF0F0F0F0u)); bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand(exec_lo, s1)); emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size()); - bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(UINT64_MAX)); + bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(UINT64_MAX)); emit_ds_swizzle(bld, vtmp, tmp, src.size(), ds_pattern_bitmode(0x10, 0x07, 0x00)); - bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand(0xFF00FF00u)); + bld.sop1(aco_opcode::s_mov_b32, Definition(exec_lo, s1), Operand::c32(0xFF00FF00u)); bld.sop1(aco_opcode::s_mov_b32, Definition(exec_hi, s1), Operand(exec_lo, s1)); emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size()); - bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand(UINT64_MAX)); + bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(UINT64_MAX)); emit_ds_swizzle(bld, vtmp, tmp, src.size(), ds_pattern_bitmode(0x00, 0x0f, 0x00)); - bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand(16u), Operand(16u)); - bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand(16u), Operand(16u)); + bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand::c32(16u), + Operand::c32(16u)); + bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand::c32(16u), + Operand::c32(16u)); emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size()); for (unsigned i = 0; i < src.size(); i++) bld.readlane(Definition(PhysReg{sitmp + i}, s1), Operand(PhysReg{tmp + i}, v1), - Operand(31u)); - bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand(32u), Operand(32u)); + Operand::c32(31u)); + bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand::c32(32u), + Operand::c32(32u)); emit_op(ctx, tmp, sitmp, tmp, vtmp, reduce_op, src.size()); break; } @@ -776,22 +782,26 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c emit_dpp_op(ctx, tmp, tmp, tmp, vtmp, reduce_op, src.size(), dpp_row_sr(8), 0xf, 0xf, false, identity); if (ctx->program->chip_class >= GFX10) { - bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand(16u), Operand(16u)); - bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand(16u), Operand(16u)); + bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_lo, s1), Operand::c32(16u), + Operand::c32(16u)); + bld.sop2(aco_opcode::s_bfm_b32, Definition(exec_hi, s1), Operand::c32(16u), + Operand::c32(16u)); for (unsigned i = 0; i < src.size(); i++) { Instruction* perm = bld.vop3(aco_opcode::v_permlanex16_b32, Definition(PhysReg{vtmp + i}, v1), - Operand(PhysReg{tmp + i}, v1), Operand(0xffffffffu), Operand(0xffffffffu)) + Operand(PhysReg{tmp + i}, v1), Operand::c32(0xffffffffu), + Operand::c32(0xffffffffu)) .instr; perm->vop3().opsel = 1; /* FI (Fetch Inactive) */ } emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size()); if (ctx->program->wave_size == 64) { - bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand(32u), Operand(32u)); + bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand::c32(32u), + Operand::c32(32u)); for (unsigned i = 0; i < src.size(); i++) bld.readlane(Definition(PhysReg{sitmp + i}, s1), Operand(PhysReg{tmp + i}, v1), - Operand(31u)); + Operand::c32(31u)); emit_op(ctx, tmp, sitmp, tmp, vtmp, reduce_op, src.size()); } } else { @@ -821,7 +831,7 @@ emit_reduction(lower_context* ctx, aco_opcode op, ReduceOp reduce_op, unsigned c if (dst.regClass().type() == RegType::sgpr) { for (unsigned k = 0; k < src.size(); k++) { bld.readlane(Definition(PhysReg{dst.physReg() + k}, s1), Operand(PhysReg{tmp + k}, v1), - Operand(ctx->program->wave_size - 1)); + Operand::c32(ctx->program->wave_size - 1)); } } else if (dst.physReg() != tmp) { for (unsigned k = 0; k < src.size(); k++) { @@ -875,14 +885,14 @@ emit_gfx10_wave64_bpermute(Program* program, aco_ptr& instr, Builde /* Save EXEC */ bld.sop1(aco_opcode::s_mov_b64, tmp_exec, Operand(exec, s2)); /* Set EXEC to enable LO lanes only */ - bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand(32u), Operand(0u)); + bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand::c32(32u), Operand::zero()); /* LO: Copy data from low lanes 0-31 to shared vgpr */ bld.vop1(aco_opcode::v_mov_b32, Definition(shared_vgpr_lo, v1), input_data); /* LO: bpermute shared vgpr (high lanes' data) */ bld.ds(aco_opcode::ds_bpermute_b32, Definition(shared_vgpr_hi, v1), index_x4, Operand(shared_vgpr_hi, v1)); /* Set EXEC to enable HI lanes only */ - bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand(32u), Operand(32u)); + bld.sop2(aco_opcode::s_bfm_b64, Definition(exec, s2), Operand::c32(32u), Operand::c32(32u)); /* HI: bpermute shared vgpr (low lanes' data) */ bld.ds(aco_opcode::ds_bpermute_b32, Definition(shared_vgpr_lo, v1), index_x4, Operand(shared_vgpr_lo, v1)); @@ -904,7 +914,8 @@ emit_gfx10_wave64_bpermute(Program* program, aco_ptr& instr, Builde * it's not there already */ if (input_data.physReg().byte()) { unsigned right_shift = input_data.physReg().byte() * 8; - bld.vop2(aco_opcode::v_lshrrev_b32, dst, Operand(right_shift), Operand(dst.physReg(), v1)); + bld.vop2(aco_opcode::v_lshrrev_b32, dst, Operand::c32(right_shift), + Operand(dst.physReg(), v1)); } } @@ -938,9 +949,10 @@ emit_gfx6_bpermute(Program* program, aco_ptr& instr, Builder& bld) */ for (unsigned n = 0; n < program->wave_size; ++n) { /* Activate the lane which has N for its source index */ - bld.vopc(aco_opcode::v_cmpx_eq_u32, Definition(exec, bld.lm), clobber_vcc, Operand(n), index); + bld.vopc(aco_opcode::v_cmpx_eq_u32, Definition(exec, bld.lm), clobber_vcc, Operand::c32(n), + index); /* Read the data from lane N */ - bld.readlane(Definition(vcc, s1), input, Operand(n)); + bld.readlane(Definition(vcc, s1), input, Operand::c32(n)); /* On the active lane, move the data we read from lane N to the destination VGPR */ bld.vop1(aco_opcode::v_mov_b32, dst, Operand(vcc, s1)); /* Restore original EXEC */ @@ -1026,15 +1038,15 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op) } else if (util_bitreverse(imm) <= 64 || util_bitreverse(imm) >= 0xFFFFFFF0) { uint32_t rev = util_bitreverse(imm); if (dst.regClass() == s1) - bld.sop1(aco_opcode::s_brev_b32, dst, Operand(rev)); + bld.sop1(aco_opcode::s_brev_b32, dst, Operand::c32(rev)); else - bld.vop1(aco_opcode::v_bfrev_b32, dst, Operand(rev)); + bld.vop1(aco_opcode::v_bfrev_b32, dst, Operand::c32(rev)); return; } else if (dst.regClass() == s1 && imm != 0) { unsigned start = (ffs(imm) - 1) & 0x1f; unsigned size = util_bitcount(imm) & 0x1f; if ((((1u << size) - 1u) << start) == imm) { - bld.sop2(aco_opcode::s_bfm_b32, dst, Operand(size), Operand(start)); + bld.sop2(aco_opcode::s_bfm_b32, dst, Operand::c32(size), Operand::c32(start)); return; } } @@ -1051,10 +1063,10 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op) bld.sop1(aco_opcode::s_mov_b64, dst, op); } else if (dst.regClass() == v2) { if (Operand::is_constant_representable(op.constantValue64(), 8, true, false)) { - bld.vop3(aco_opcode::v_lshrrev_b64, dst, Operand(0u), op); + bld.vop3(aco_opcode::v_lshrrev_b64, dst, Operand::zero(), op); } else { assert(Operand::is_constant_representable(op.constantValue64(), 8, false, true)); - bld.vop3(aco_opcode::v_ashrrev_i64, dst, Operand(0u), op); + bld.vop3(aco_opcode::v_ashrrev_i64, dst, Operand::zero(), op); } } else if (dst.regClass() == v1) { bld.vop1(aco_opcode::v_mov_b32, dst, op); @@ -1063,13 +1075,13 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op) if (dst.regClass() == v1b && ctx->program->chip_class >= GFX9) { uint8_t val = op.constantValue(); - Operand op32((uint32_t)val | (val & 0x80u ? 0xffffff00u : 0u)); + Operand op32 = Operand::c32((uint32_t)val | (val & 0x80u ? 0xffffff00u : 0u)); if (op32.isLiteral()) { uint32_t a = (uint32_t)int8_mul_table[val * 2]; uint32_t b = (uint32_t)int8_mul_table[val * 2 + 1]; bld.vop2_sdwa(aco_opcode::v_mul_u32_u24, dst, - Operand(a | (a & 0x80u ? 0xffffff00u : 0x0u)), - Operand(b | (b & 0x80u ? 0xffffff00u : 0x0u))); + Operand::c32(a | (a & 0x80u ? 0xffffff00u : 0x0u)), + Operand::c32(b | (b & 0x80u ? 0xffffff00u : 0x0u))); } else { bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op32); } @@ -1078,9 +1090,9 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op) /* use v_mov_b32 to avoid possible issues with denormal flushing or * NaN. v_add_f16 is still needed for float constants. */ uint32_t val32 = (int32_t)(int16_t)op.constantValue(); - bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, Operand(val32)); + bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, Operand::c32(val32)); } else { - bld.vop2_sdwa(aco_opcode::v_add_f16, dst, op, Operand(0u)); + bld.vop2_sdwa(aco_opcode::v_add_f16, dst, op, Operand::zero()); } } else if (dst.regClass() == v2b && ctx->program->chip_class >= GFX10 && (ctx->block->fp_mode.denorm16_64 & fp_denorm_keep_in)) { @@ -1101,9 +1113,9 @@ copy_constant(lower_context* ctx, Builder& bld, Definition dst, Operand op) dst = Definition(PhysReg(dst.physReg().reg()), v1); Operand def_op(dst.physReg(), v1); if (val != mask) - bld.vop2(aco_opcode::v_and_b32, dst, Operand(~mask), def_op); + bld.vop2(aco_opcode::v_and_b32, dst, Operand::c32(~mask), def_op); if (val != 0) - bld.vop2(aco_opcode::v_or_b32, dst, Operand(val), def_op); + bld.vop2(aco_opcode::v_or_b32, dst, Operand::c32(val), def_op); } } } @@ -1124,14 +1136,14 @@ do_copy(lower_context* ctx, Builder& bld, const copy_operation& copy, bool* pres split_copy(ctx, offset, &def, &op, copy, false, 8); if (def.physReg() == scc) { - bld.sopc(aco_opcode::s_cmp_lg_i32, def, op, Operand(0u)); + bld.sopc(aco_opcode::s_cmp_lg_i32, def, op, Operand::zero()); *preserve_scc = true; } else if (op.isConstant()) { copy_constant(ctx, bld, def, op); } else if (def.regClass() == v1) { bld.vop1(aco_opcode::v_mov_b32, def, op); } else if (def.regClass() == v2) { - bld.vop3(aco_opcode::v_lshrrev_b64, def, Operand(0u), op); + bld.vop3(aco_opcode::v_lshrrev_b64, def, Operand::zero(), op); } else if (def.regClass() == s1) { bld.sop1(aco_opcode::s_mov_b32, def, op); } else if (def.regClass() == s2) { @@ -1139,7 +1151,7 @@ do_copy(lower_context* ctx, Builder& bld, const copy_operation& copy, bool* pres } else if (def.regClass().is_subdword() && ctx->program->chip_class < GFX8) { if (op.physReg().byte()) { assert(def.physReg().byte() == 0); - bld.vop2(aco_opcode::v_lshrrev_b32, def, Operand(op.physReg().byte() * 8), op); + bld.vop2(aco_opcode::v_lshrrev_b32, def, Operand::c32(op.physReg().byte() * 8), op); } else if (def.physReg().byte()) { assert(op.physReg().byte() == 0); /* preserve the target's lower half */ @@ -1151,24 +1163,24 @@ do_copy(lower_context* ctx, Builder& bld, const copy_operation& copy, bool* pres Definition(lo_reg, RegClass::get(RegType::vgpr, lo_half.bytes() + op.bytes())); if (def.physReg().reg() == op.physReg().reg()) { - bld.vop2(aco_opcode::v_and_b32, lo_half, Operand((1 << bits) - 1u), + bld.vop2(aco_opcode::v_and_b32, lo_half, Operand::c32((1 << bits) - 1u), Operand(lo_reg, lo_half.regClass())); if (def.physReg().byte() == 1) { - bld.vop2(aco_opcode::v_mul_u32_u24, dst, Operand((1 << bits) + 1u), op); + bld.vop2(aco_opcode::v_mul_u32_u24, dst, Operand::c32((1 << bits) + 1u), op); } else if (def.physReg().byte() == 2) { bld.vop2(aco_opcode::v_cvt_pk_u16_u32, dst, Operand(lo_reg, v2b), op); } else if (def.physReg().byte() == 3) { bld.sop1(aco_opcode::s_mov_b32, Definition(scratch_sgpr, s1), - Operand((1 << bits) + 1u)); + Operand::c32((1 << bits) + 1u)); bld.vop3(aco_opcode::v_mul_lo_u32, dst, Operand(scratch_sgpr, s1), op); } } else { lo_half.setFixed(lo_half.physReg().advance(4 - def.physReg().byte())); - bld.vop2(aco_opcode::v_lshlrev_b32, lo_half, Operand(32 - bits), + bld.vop2(aco_opcode::v_lshlrev_b32, lo_half, Operand::c32(32 - bits), Operand(lo_reg, lo_half.regClass())); bld.vop3(aco_opcode::v_alignbyte_b32, dst, op, Operand(lo_half.physReg(), lo_half.regClass()), - Operand(4 - def.physReg().byte())); + Operand::c32(4 - def.physReg().byte())); } } else { bld.vop1(aco_opcode::v_mov_b32, def, op); @@ -1241,7 +1253,8 @@ do_swap(lower_context* ctx, Builder& bld, const copy_operation& copy, bool prese PhysReg other = op.physReg() == scc ? def.physReg() : op.physReg(); bld.sop1(aco_opcode::s_mov_b32, Definition(pi->scratch_sgpr, s1), Operand(scc, s1)); - bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand(other, s1), Operand(0u)); + bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand(other, s1), + Operand::zero()); bld.sop1(aco_opcode::s_mov_b32, Definition(other, s1), Operand(pi->scratch_sgpr, s1)); } else if (def.regClass() == s1) { if (preserve_scc) { @@ -1261,10 +1274,10 @@ do_swap(lower_context* ctx, Builder& bld, const copy_operation& copy, bool prese bld.sop2(aco_opcode::s_xor_b64, op_as_def, Definition(scc, s1), op, def_as_op); if (preserve_scc) bld.sopc(aco_opcode::s_cmp_lg_i32, Definition(scc, s1), Operand(pi->scratch_sgpr, s1), - Operand(0u)); + Operand::zero()); } else if (def.bytes() == 2 && def.physReg().reg() == op.physReg().reg()) { bld.vop3(aco_opcode::v_alignbyte_b32, Definition(def.physReg(), v1), def_as_op, op, - Operand(2u)); + Operand::c32(2u)); } else { assert(def.regClass().is_subdword()); bld.vop2_sdwa(aco_opcode::v_xor_b32, op_as_def, op, def_as_op); @@ -1289,7 +1302,7 @@ void do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Operand hi) { if (lo.isConstant() && hi.isConstant()) { - copy_constant(ctx, bld, def, Operand(lo.constantValue() | (hi.constantValue() << 16))); + copy_constant(ctx, bld, def, Operand::c32(lo.constantValue() | (hi.constantValue() << 16))); return; } @@ -1306,9 +1319,9 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera /* a single alignbyte can be sufficient: hi can be a 32-bit integer constant */ if (lo.physReg().byte() == 2 && hi.physReg().byte() == 0 && - (!hi.isConstant() || !Operand(hi.constantValue()).isLiteral() || + (!hi.isConstant() || !Operand::c32(hi.constantValue()).isLiteral() || ctx->program->chip_class >= GFX10)) { - bld.vop3(aco_opcode::v_alignbyte_b32, def, hi, lo, Operand(2u)); + bld.vop3(aco_opcode::v_alignbyte_b32, def, hi, lo, Operand::c32(2u)); return; } @@ -1318,19 +1331,20 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera if (lo.isConstant()) { /* move hi and zero low bits */ if (hi.physReg().byte() == 0) - bld.vop2(aco_opcode::v_lshlrev_b32, def_hi, Operand(16u), hi); + bld.vop2(aco_opcode::v_lshlrev_b32, def_hi, Operand::c32(16u), hi); else - bld.vop2(aco_opcode::v_and_b32, def_hi, Operand(~0xFFFFu), hi); - bld.vop2(aco_opcode::v_or_b32, def, Operand(lo.constantValue()), Operand(def.physReg(), v1)); + bld.vop2(aco_opcode::v_and_b32, def_hi, Operand::c32(~0xFFFFu), hi); + bld.vop2(aco_opcode::v_or_b32, def, Operand::c32(lo.constantValue()), + Operand(def.physReg(), v1)); return; } if (hi.isConstant()) { /* move lo and zero high bits */ if (lo.physReg().byte() == 2) - bld.vop2(aco_opcode::v_lshrrev_b32, def_lo, Operand(16u), lo); + bld.vop2(aco_opcode::v_lshrrev_b32, def_lo, Operand::c32(16u), lo); else - bld.vop2(aco_opcode::v_and_b32, def_lo, Operand(0xFFFFu), lo); - bld.vop2(aco_opcode::v_or_b32, def, Operand(hi.constantValue() << 16u), + bld.vop2(aco_opcode::v_and_b32, def_lo, Operand::c32(0xFFFFu), lo); + bld.vop2(aco_opcode::v_or_b32, def, Operand::c32(hi.constantValue() << 16u), Operand(def.physReg(), v1)); return; } @@ -1338,12 +1352,12 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera if (lo.physReg().reg() == def.physReg().reg()) { /* lo is in the high bits of def */ assert(lo.physReg().byte() == 2); - bld.vop2(aco_opcode::v_lshrrev_b32, def_lo, Operand(16u), lo); + bld.vop2(aco_opcode::v_lshrrev_b32, def_lo, Operand::c32(16u), lo); lo.setFixed(def.physReg()); } else if (hi.physReg() == def.physReg()) { /* hi is in the low bits of def */ assert(hi.physReg().byte() == 0); - bld.vop2(aco_opcode::v_lshlrev_b32, def_hi, Operand(16u), hi); + bld.vop2(aco_opcode::v_lshlrev_b32, def_hi, Operand::c32(16u), hi); hi.setFixed(def.physReg().advance(2)); } else if (ctx->program->chip_class >= GFX8) { /* either lo or hi can be placed with just a v_mov */ @@ -1368,21 +1382,21 @@ do_pack_2x16(lower_context* ctx, Builder& bld, Definition def, Operand lo, Opera if (lo.physReg().byte() != hi.physReg().byte()) { /* | xx lo | hi xx | => | lo hi | lo hi | */ assert(lo.physReg().byte() == 0 && hi.physReg().byte() == 2); - bld.vop3(aco_opcode::v_alignbyte_b32, def, lo, hi, Operand(2u)); + bld.vop3(aco_opcode::v_alignbyte_b32, def, lo, hi, Operand::c32(2u)); lo = Operand(def_hi.physReg(), v2b); hi = Operand(def_lo.physReg(), v2b); } else if (lo.physReg().byte() == 0) { /* | xx hi | xx lo | => | xx hi | lo 00 | */ - bld.vop2(aco_opcode::v_lshlrev_b32, def_hi, Operand(16u), lo); + bld.vop2(aco_opcode::v_lshlrev_b32, def_hi, Operand::c32(16u), lo); lo = Operand(def_hi.physReg(), v2b); } else { /* | hi xx | lo xx | => | 00 hi | lo xx | */ assert(hi.physReg().byte() == 2); - bld.vop2(aco_opcode::v_lshrrev_b32, def_lo, Operand(16u), hi); + bld.vop2(aco_opcode::v_lshrrev_b32, def_lo, Operand::c32(16u), hi); hi = Operand(def_lo.physReg(), v2b); } /* perform the alignbyte */ - bld.vop3(aco_opcode::v_alignbyte_b32, def, hi, lo, Operand(2u)); + bld.vop3(aco_opcode::v_alignbyte_b32, def, hi, lo, Operand::c32(2u)); } void @@ -1816,7 +1830,7 @@ emit_set_mode(Builder& bld, float_mode new_mode, bool set_round, bool set_denorm } else if (set_round || set_denorm) { /* "((size - 1) << 11) | register" (MODE is encoded as register 1) */ Instruction* instr = - bld.sopk(aco_opcode::s_setreg_imm32_b32, Operand(new_mode.val), (7 << 11) | 1).instr; + bld.sopk(aco_opcode::s_setreg_imm32_b32, Operand::c8(new_mode.val), (7 << 11) | 1).instr; /* has to be a literal */ instr->operands[0].setFixed(PhysReg{255}); } @@ -1991,10 +2005,10 @@ lower_to_hw_instr(Program* program) for (unsigned i = 0; i < instr->operands[2].size(); i++) { Operand src = instr->operands[2].isConstant() - ? Operand(uint32_t(instr->operands[2].constantValue64() >> (32 * i))) + ? Operand::c32(uint32_t(instr->operands[2].constantValue64() >> (32 * i))) : Operand(PhysReg{instr->operands[2].physReg() + i}, s1); bld.writelane(bld.def(v1, instr->operands[0].physReg()), src, - Operand(instr->operands[1].constantValue() + i), + Operand::c32(instr->operands[1].constantValue() + i), instr->operands[0]); } break; @@ -2003,7 +2017,8 @@ lower_to_hw_instr(Program* program) assert(instr->operands[0].regClass() == v1.as_linear()); for (unsigned i = 0; i < instr->definitions[0].size(); i++) bld.readlane(bld.def(s1, PhysReg{instr->definitions[0].physReg() + i}), - instr->operands[0], Operand(instr->operands[1].constantValue() + i)); + instr->operands[0], + Operand::c32(instr->operands[1].constantValue() + i)); break; } case aco_opcode::p_as_uniform: { @@ -2037,11 +2052,11 @@ lower_to_hw_instr(Program* program) case aco_opcode::p_constaddr: { unsigned id = instr->definitions[0].tempId(); PhysReg reg = instr->definitions[0].physReg(); - bld.sop1(aco_opcode::p_constaddr_getpc, instr->definitions[0], Operand(id)); + bld.sop1(aco_opcode::p_constaddr_getpc, instr->definitions[0], Operand::c32(id)); bld.sop2(aco_opcode::p_constaddr_addlo, Definition(reg, s1), bld.def(s1, scc), - Operand(reg, s1), Operand(id)); + Operand(reg, s1), Operand::c32(id)); bld.sop2(aco_opcode::s_addc_u32, Definition(reg.advance(4), s1), bld.def(s1, scc), - Operand(reg.advance(4), s1), Operand(0u), Operand(scc, s1)); + Operand(reg.advance(4), s1), Operand::zero(), Operand(scc, s1)); break; } case aco_opcode::p_extract: { @@ -2060,22 +2075,22 @@ lower_to_hw_instr(Program* program) if (dst.regClass() == s1) { if (offset == (32 - bits)) { bld.sop2(signext ? aco_opcode::s_ashr_i32 : aco_opcode::s_lshr_b32, dst, - bld.def(s1, scc), op, Operand(offset)); + bld.def(s1, scc), op, Operand::c32(offset)); } else if (offset == 0 && signext && (bits == 8 || bits == 16)) { bld.sop1(bits == 8 ? aco_opcode::s_sext_i32_i8 : aco_opcode::s_sext_i32_i16, dst, op); } else { bld.sop2(signext ? aco_opcode::s_bfe_i32 : aco_opcode::s_bfe_u32, dst, - bld.def(s1, scc), op, Operand((bits << 16) | offset)); + bld.def(s1, scc), op, Operand::c32((bits << 16) | offset)); } } else if (dst.regClass() == v1 || ctx.program->chip_class <= GFX7) { assert(op.physReg().byte() == 0 && dst.physReg().byte() == 0); if (offset == (32 - bits) && op.regClass() != s1) { bld.vop2(signext ? aco_opcode::v_ashrrev_i32 : aco_opcode::v_lshrrev_b32, dst, - Operand(offset), op); + Operand::c32(offset), op); } else { bld.vop3(signext ? aco_opcode::v_bfe_i32 : aco_opcode::v_bfe_u32, dst, op, - Operand(offset), Operand(bits)); + Operand::c32(offset), Operand::c32(bits)); } } else if (dst.regClass() == v2b) { aco_ptr sdwa{create_instruction( @@ -2105,21 +2120,22 @@ lower_to_hw_instr(Program* program) if (dst.regClass() == s1) { if (offset == (32 - bits)) { - bld.sop2(aco_opcode::s_lshl_b32, dst, bld.def(s1, scc), op, Operand(offset)); + bld.sop2(aco_opcode::s_lshl_b32, dst, bld.def(s1, scc), op, + Operand::c32(offset)); } else if (offset == 0) { bld.sop2(aco_opcode::s_bfe_u32, dst, bld.def(s1, scc), op, - Operand(bits << 16)); + Operand::c32(bits << 16)); } else { bld.sop2(aco_opcode::s_bfe_u32, dst, bld.def(s1, scc), op, - Operand(bits << 16)); + Operand::c32(bits << 16)); bld.sop2(aco_opcode::s_lshl_b32, dst, bld.def(s1, scc), - Operand(dst.physReg(), s1), Operand(offset)); + Operand(dst.physReg(), s1), Operand::c32(offset)); } } else if (dst.regClass() == v1 || ctx.program->chip_class <= GFX7) { if (offset == (dst.bytes() * 8u - bits)) { - bld.vop2(aco_opcode::v_lshlrev_b32, dst, Operand(offset), op); + bld.vop2(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op); } else if (offset == 0) { - bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand(0u), Operand(bits)); + bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits)); } else if (program->chip_class >= GFX9 || (op.regClass() != s1 && program->chip_class >= GFX8)) { aco_ptr sdwa{create_instruction( @@ -2131,8 +2147,8 @@ lower_to_hw_instr(Program* program) sdwa->dst_sel = (bits == 8 ? sdwa_ubyte0 : sdwa_uword0) + (offset / bits); bld.insert(std::move(sdwa)); } else { - bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand(0u), Operand(bits)); - bld.vop2(aco_opcode::v_lshlrev_b32, dst, Operand(offset), + bld.vop3(aco_opcode::v_bfe_u32, dst, op, Operand::zero(), Operand::c32(bits)); + bld.vop2(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), Operand(dst.physReg(), v1)); } } else { diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp index da0769e..af7dc3a 100644 --- a/src/amd/compiler/aco_optimizer.cpp +++ b/src/amd/compiler/aco_optimizer.cpp @@ -189,7 +189,7 @@ struct ssa_info { void set_constant(chip_class chip, uint64_t constant) { - Operand op16((uint16_t)constant); + Operand op16 = Operand::c16(constant); Operand op32 = Operand::get_const(chip, constant, 4); add_label(label_literal); val = constant; @@ -204,7 +204,7 @@ struct ssa_info { add_label(label_constant_64bit); if (label & label_constant_64bit) { - val = Operand(constant).constantValue(); + val = Operand::c64(constant).constantValue(); if (val != constant) label &= ~(label_literal | label_constant_16bit | label_constant_32bit); } @@ -831,7 +831,7 @@ Operand get_constant_op(opt_ctx& ctx, ssa_info info, uint32_t bits) { if (bits == 64) - return Operand(info.val, true); + return Operand::c32_or_c64(info.val, true); return Operand::get_const(ctx.program->chip_class, info.val, bits / 8u); } @@ -1161,7 +1161,7 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) mubuf.offen = false; continue; } else if (i == 2 && info.is_constant_or_literal(32) && mubuf.offset + info.val < 4096) { - instr->operands[2] = Operand((uint32_t)0); + instr->operands[2] = Operand::c32(0); mubuf.offset += info.val; continue; } else if (mubuf.offen && i == 1 && @@ -1232,7 +1232,7 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) ((ctx.program->chip_class == GFX6 && info.val <= 0x3FF) || (ctx.program->chip_class == GFX7 && info.val <= 0xFFFFFFFF) || (ctx.program->chip_class >= GFX8 && info.val <= 0xFFFFF))) { - instr->operands[i] = Operand(info.val); + instr->operands[i] = Operand::c32(info.val); continue; } else if (i == 1 && parse_base_offset(ctx, instr.get(), i, &base, &offset, prevent_overflow) && @@ -1243,13 +1243,13 @@ label_instruction(opt_ctx& ctx, aco_ptr& instr) continue; } if (soe) { - smem.operands[1] = Operand(offset); + smem.operands[1] = Operand::c32(offset); smem.operands.back() = Operand(base); } else { SMEM_instruction* new_instr = create_instruction( smem.opcode, Format::SMEM, smem.operands.size() + 1, smem.definitions.size()); new_instr->operands[0] = smem.operands[0]; - new_instr->operands[1] = Operand(offset); + new_instr->operands[1] = Operand::c32(offset); if (smem.definitions.empty()) new_instr->operands[2] = smem.operands[2]; new_instr->operands.back() = Operand(base); @@ -2392,14 +2392,14 @@ combine_add_or_then_and_lshl(opt_ctx& ctx, aco_ptr& instr) (extins->operands[1].constantValue() + 1) * extins->operands[2].constantValue() == 32) { op = new_op_lshl; operands[1] = - Operand(extins->operands[1].constantValue() * extins->operands[2].constantValue()); + Operand::c32(extins->operands[1].constantValue() * extins->operands[2].constantValue()); } else if (is_or && (extins->opcode == aco_opcode::p_insert || (extins->opcode == aco_opcode::p_extract && extins->operands[3].constantEquals(0))) && extins->operands[1].constantEquals(0)) { op = aco_opcode::v_and_or_b32; - operands[1] = Operand(extins->operands[2].constantEquals(8) ? 0xffu : 0xffffu); + operands[1] = Operand::c32(extins->operands[2].constantEquals(8) ? 0xffu : 0xffffu); } else { continue; } @@ -2611,7 +2611,7 @@ combine_add_sub_b2i(opt_ctx& ctx, aco_ptr& instr, aco_opcode new_op ctx.uses.push_back(0); } new_instr->definitions[1].setHint(vcc); - new_instr->operands[0] = Operand(0u); + new_instr->operands[0] = Operand::zero(); new_instr->operands[1] = instr->operands[!i]; new_instr->operands[2] = Operand(ctx.info[instr->operands[i].tempId()].temp); instr = std::move(new_instr); @@ -3045,7 +3045,7 @@ combine_and_subbrev(opt_ctx& ctx, aco_ptr& instr) if (ctx.uses[instr->operands[i].tempId()]) ctx.uses[op_instr->operands[2].tempId()]++; - new_instr->operands[0] = Operand(0u); + new_instr->operands[0] = Operand::zero(); new_instr->operands[1] = instr->operands[!i]; new_instr->operands[2] = Operand(op_instr->operands[2]); new_instr->definitions[0] = instr->definitions[0]; @@ -3092,7 +3092,7 @@ combine_add_lshl(opt_ctx& ctx, aco_ptr& instr) aco_ptr new_instr{ create_instruction(aco_opcode::v_mad_u32_u24, Format::VOP3, 3, 1)}; new_instr->operands[0] = op_instr->operands[!shift_op_idx]; - new_instr->operands[1] = Operand(multiplier); + new_instr->operands[1] = Operand::c32(multiplier); new_instr->operands[2] = instr->operands[!i]; new_instr->definitions[0] = instr->definitions[0]; instr = std::move(new_instr); @@ -3479,7 +3479,7 @@ combine_instruction(opt_ctx& ctx, aco_ptr& instr) aco_ptr new_instr{ create_instruction(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1)}; - new_instr->operands[0] = Operand(0u); + new_instr->operands[0] = Operand::zero(); new_instr->operands[1] = instr->operands[!i]; new_instr->operands[2] = Operand(ctx.info[instr->operands[i].tempId()].temp); new_instr->definitions[0] = instr->definitions[0]; @@ -3726,7 +3726,8 @@ select_instruction(opt_ctx& ctx, aco_ptr& instr) aco_ptr extract{create_instruction( aco_opcode::p_extract_vector, Format::PSEUDO, 2, 1)}; extract->operands[0] = instr->operands[0]; - extract->operands[1] = Operand((uint32_t)split_offset / instr->definitions[idx].bytes()); + extract->operands[1] = + Operand::c32((uint32_t)split_offset / instr->definitions[idx].bytes()); extract->definitions[0] = instr->definitions[idx]; instr.reset(extract.release()); } @@ -3885,7 +3886,7 @@ select_instruction(opt_ctx& ctx, aco_ptr& instr) if (ctx.uses[op.tempId()] < literal_uses) { is_literal_sgpr = op.getTemp().type() == RegType::sgpr; mask = 0; - literal = Operand(ctx.info[op.tempId()].val); + literal = Operand::c32(ctx.info[op.tempId()].val); literal_uses = ctx.uses[op.tempId()]; literal_id = op.tempId(); } @@ -3950,7 +3951,8 @@ apply_literals(opt_ctx& ctx, aco_ptr& instr) new_mad->operands[0] = instr->operands[1 - info->literal_idx]; new_mad->operands[1] = instr->operands[2]; } - new_mad->operands[2] = Operand(ctx.info[instr->operands[info->literal_idx].tempId()].val); + new_mad->operands[2] = + Operand::c32(ctx.info[instr->operands[info->literal_idx].tempId()].val); new_mad->definitions[0] = instr->definitions[0]; ctx.instructions.emplace_back(std::move(new_mad)); return; @@ -3963,7 +3965,7 @@ apply_literals(opt_ctx& ctx, aco_ptr& instr) Operand op = instr->operands[i]; unsigned bits = get_operand_size(instr, i); if (op.isTemp() && ctx.info[op.tempId()].is_literal(bits) && ctx.uses[op.tempId()] == 0) { - Operand literal(ctx.info[op.tempId()].val); + Operand literal = Operand::c32(ctx.info[op.tempId()].val); if (instr->isVALU() && i > 0 && instr->format != Format::VOP3P) to_VOP3(ctx, instr); instr->operands[i] = literal; diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp index 2e426cf..d086eff 100644 --- a/src/amd/compiler/aco_optimizer_postRA.cpp +++ b/src/amd/compiler/aco_optimizer_postRA.cpp @@ -243,7 +243,7 @@ try_optimize_scc_nocompare(pr_opt_ctx& ctx, aco_ptr& instr) ctx.uses[instr->operands[0].tempId()]++; /* Set the opcode and operand to 32-bit */ - instr->operands[1] = Operand(0u); + instr->operands[1] = Operand::zero(); instr->opcode = (instr->opcode == aco_opcode::s_cmp_eq_u32 || instr->opcode == aco_opcode::s_cmp_eq_i32 || instr->opcode == aco_opcode::s_cmp_eq_u64) diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp index 8996fae..40a4d8c 100644 --- a/src/amd/compiler/aco_spill.cpp +++ b/src/amd/compiler/aco_spill.cpp @@ -325,7 +325,7 @@ do_reload(spill_ctx& ctx, Temp tmp, Temp new_name, uint32_t spill_id) } else { aco_ptr reload{ create_instruction(aco_opcode::p_reload, Format::PSEUDO, 1, 1)}; - reload->operands[0] = Operand(spill_id); + reload->operands[0] = Operand::c32(spill_id); reload->definitions[0] = Definition(new_name); ctx.is_reloaded[spill_id] = true; return reload; @@ -863,7 +863,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) aco_ptr spill{ create_instruction(aco_opcode::p_spill, Format::PSEUDO, 2, 0)}; spill->operands[0] = spill_op; - spill->operands[1] = Operand(spill_id); + spill->operands[1] = Operand::c32(spill_id); Block& pred = ctx.program->blocks[pred_idx]; unsigned idx = pred.instructions.size(); do { @@ -920,7 +920,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx) aco_ptr spill{ create_instruction(aco_opcode::p_spill, Format::PSEUDO, 2, 0)}; spill->operands[0] = Operand(var); - spill->operands[1] = Operand(pair.second); + spill->operands[1] = Operand::c32(pair.second); Block& pred = ctx.program->blocks[pred_idx]; unsigned idx = pred.instructions.size(); do { @@ -1204,7 +1204,7 @@ process_block(spill_ctx& ctx, unsigned block_idx, Block* block, aco_ptr spill{ create_instruction(aco_opcode::p_spill, Format::PSEUDO, 2, 0)}; spill->operands[0] = Operand(to_spill); - spill->operands[1] = Operand(spill_id); + spill->operands[1] = Operand::c32(spill_id); instructions.emplace_back(std::move(spill)); } } @@ -1353,11 +1353,11 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Temp private_segment_buffer = ctx.program->private_segment_buffer; if (ctx.program->stage != compute_cs) private_segment_buffer = - bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand(0u)); + bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero()); if (offset) scratch_offset = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), - scratch_offset, Operand(offset)); + scratch_offset, Operand::c32(offset)); uint32_t rsrc_conf = S_008F0C_ADD_TID_ENABLE(1) | S_008F0C_INDEX_STRIDE(ctx.program->wave_size == 64 ? 3 : 2); @@ -1374,8 +1374,8 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, if (ctx.program->chip_class <= GFX8) rsrc_conf |= S_008F0C_ELEMENT_SIZE(1); - return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer, Operand(-1u), - Operand(rsrc_conf)); + return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4), private_segment_buffer, + Operand::c32(-1u), Operand::c32(rsrc_conf)); } void @@ -1666,7 +1666,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) Pseudo_instruction* spill = create_instruction(aco_opcode::p_spill, Format::PSEUDO, 3, 0); spill->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]); - spill->operands[1] = Operand(spill_slot % ctx.wave_size); + spill->operands[1] = Operand::c32(spill_slot % ctx.wave_size); spill->operands[2] = (*it)->operands[0]; instructions.emplace_back(aco_ptr(spill)); } @@ -1750,7 +1750,7 @@ assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) Pseudo_instruction* reload = create_instruction( aco_opcode::p_reload, Format::PSEUDO, 2, 1); reload->operands[0] = Operand(vgpr_spill_temps[spill_slot / ctx.wave_size]); - reload->operands[1] = Operand(spill_slot % ctx.wave_size); + reload->operands[1] = Operand::c32(spill_slot % ctx.wave_size); reload->definitions[0] = (*it)->definitions[0]; instructions.emplace_back(aco_ptr(reload)); } diff --git a/src/amd/compiler/tests/helpers.cpp b/src/amd/compiler/tests/helpers.cpp index d7dd3a9..7b6859c 100644 --- a/src/amd/compiler/tests/helpers.cpp +++ b/src/amd/compiler/tests/helpers.cpp @@ -238,34 +238,35 @@ void finish_assembler_test() void writeout(unsigned i, Temp tmp) { if (tmp.id()) - bld.pseudo(aco_opcode::p_unit_test, Operand(i), tmp); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp); else - bld.pseudo(aco_opcode::p_unit_test, Operand(i)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i)); } void writeout(unsigned i, aco::Builder::Result res) { - bld.pseudo(aco_opcode::p_unit_test, Operand(i), res); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res); } void writeout(unsigned i, Operand op) { - bld.pseudo(aco_opcode::p_unit_test, Operand(i), op); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op); } void writeout(unsigned i, Operand op0, Operand op1) { - bld.pseudo(aco_opcode::p_unit_test, Operand(i), op0, op1); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1); } Temp fneg(Temp src) { - return bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0xbf800000u), src); + return bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0xbf800000u), src); } Temp fabs(Temp src) { - Builder::Result res = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x3f800000u), src); + Builder::Result res = + bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x3f800000u), src); res.instr->vop3().abs[1] = true; return res; } diff --git a/src/amd/compiler/tests/test_assembler.cpp b/src/amd/compiler/tests/test_assembler.cpp index 4769869..e970eba 100644 --- a/src/amd/compiler/tests/test_assembler.cpp +++ b/src/amd/compiler/tests/test_assembler.cpp @@ -220,9 +220,9 @@ BEGIN_TEST(assembler.long_jump.constaddr) //>> s_getpc_b64 s[0:1] ; be801f00 //! s_add_u32 s0, s0, 0xe0 ; 8000ff00 000000e0 - bld.sop1(aco_opcode::p_constaddr_getpc, Definition(PhysReg(0), s2), Operand(0u)); + bld.sop1(aco_opcode::p_constaddr_getpc, Definition(PhysReg(0), s2), Operand::zero()); bld.sop2(aco_opcode::p_constaddr_addlo, Definition(PhysReg(0), s1), bld.def(s1, scc), - Operand(PhysReg(0), s1), Operand(0u)); + Operand(PhysReg(0), s1), Operand::zero()); program->blocks[2].linear_preds.push_back(0u); program->blocks[2].linear_preds.push_back(1u); @@ -238,9 +238,9 @@ BEGIN_TEST(assembler.v_add3) //~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080 //~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080 aco_ptr add3{create_instruction(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)}; - add3->operands[0] = Operand(0u); - add3->operands[1] = Operand(0u); - add3->operands[2] = Operand(0u); + add3->operands[0] = Operand::zero(); + add3->operands[1] = Operand::zero(); + add3->operands[2] = Operand::zero(); add3->definitions[0] = Definition(PhysReg(0), v1); bld.insert(std::move(add3)); @@ -256,9 +256,9 @@ BEGIN_TEST(assembler.v_add3_clamp) //~gfx9>> integer addition + clamp ; d1ff8000 02010080 //~gfx10>> integer addition + clamp ; d76d8000 02010080 aco_ptr add3{create_instruction(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)}; - add3->operands[0] = Operand(0u); - add3->operands[1] = Operand(0u); - add3->operands[2] = Operand(0u); + add3->operands[0] = Operand::zero(); + add3->operands[1] = Operand::zero(); + add3->operands[2] = Operand::zero(); add3->definitions[0] = Definition(PhysReg(0), v1); add3->clamp = 1; bld.insert(std::move(add3)); diff --git a/src/amd/compiler/tests/test_hard_clause.cpp b/src/amd/compiler/tests/test_hard_clause.cpp index 236b5d8..9def23f 100644 --- a/src/amd/compiler/tests/test_hard_clause.cpp +++ b/src/amd/compiler/tests/test_hard_clause.cpp @@ -30,25 +30,27 @@ static void create_mubuf(Temp desc=Temp(0, s8), unsigned vtx_binding=0) { Operand desc_op(desc); desc_op.setFixed(PhysReg(0)); - bld.mubuf(aco_opcode::buffer_load_dword, Definition(PhysReg(256), v1), - desc_op, Operand(PhysReg(256), v1), - Operand(0u), 0, false).instr->mubuf().vtx_binding = vtx_binding; + bld.mubuf(aco_opcode::buffer_load_dword, Definition(PhysReg(256), v1), desc_op, + Operand(PhysReg(256), v1), Operand::zero(), 0, false) + .instr->mubuf() + .vtx_binding = vtx_binding; } static void create_mubuf_store() { - bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4), - Operand(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(0u), 0, false); + bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4), Operand(PhysReg(256), v1), + Operand(PhysReg(256), v1), Operand::zero(), 0, false); } static void create_mtbuf(Temp desc=Temp(0, s8), unsigned vtx_binding=0) { Operand desc_op(desc); desc_op.setFixed(PhysReg(0)); - bld.mtbuf(aco_opcode::tbuffer_load_format_x, Definition(PhysReg(256), v1), - desc_op, Operand(PhysReg(256), v1), Operand(0u), - V_008F0C_BUF_DATA_FORMAT_32, V_008F0C_BUF_NUM_FORMAT_FLOAT, 0, false) - .instr->mtbuf().vtx_binding = vtx_binding; + bld.mtbuf(aco_opcode::tbuffer_load_format_x, Definition(PhysReg(256), v1), desc_op, + Operand(PhysReg(256), v1), Operand::zero(), V_008F0C_BUF_DATA_FORMAT_32, + V_008F0C_BUF_NUM_FORMAT_FLOAT, 0, false) + .instr->mtbuf() + .vtx_binding = vtx_binding; } static void create_flat() @@ -82,16 +84,15 @@ static void create_mimg(bool nsa, Temp desc=Temp(0, s8)) static void create_smem() { - bld.smem(aco_opcode::s_load_dword, Definition(PhysReg(0), s1), - Operand(PhysReg(0), s2), Operand(0u)); + bld.smem(aco_opcode::s_load_dword, Definition(PhysReg(0), s1), Operand(PhysReg(0), s2), + Operand::zero()); } static void create_smem_buffer(Temp desc=Temp(0, s4)) { Operand desc_op(desc); desc_op.setFixed(PhysReg(0)); - bld.smem(aco_opcode::s_buffer_load_dword, Definition(PhysReg(0), s1), - desc_op, Operand(0u)); + bld.smem(aco_opcode::s_buffer_load_dword, Definition(PhysReg(0), s1), desc_op, Operand::zero()); } BEGIN_TEST(form_hard_clauses.type_restrictions) @@ -102,7 +103,7 @@ BEGIN_TEST(form_hard_clauses.type_restrictions) //! s_clause imm:1 //; search_re('image_sample') //; search_re('image_sample') - bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); create_mimg(false); create_mimg(false); @@ -110,7 +111,7 @@ BEGIN_TEST(form_hard_clauses.type_restrictions) //! s_clause imm:1 //; search_re('buffer_load_dword') //; search_re('buffer_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(1u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); create_mubuf(); create_mubuf(); @@ -118,7 +119,7 @@ BEGIN_TEST(form_hard_clauses.type_restrictions) //! s_clause imm:1 //; search_re('global_load_dword') //; search_re('global_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); create_global(); create_global(); @@ -126,7 +127,7 @@ BEGIN_TEST(form_hard_clauses.type_restrictions) //! s_clause imm:1 //; search_re('flat_load_dword') //; search_re('flat_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(3u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); create_flat(); create_flat(); @@ -134,28 +135,28 @@ BEGIN_TEST(form_hard_clauses.type_restrictions) //! s_clause imm:1 //; search_re('s_load_dword') //; search_re('s_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(4u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); create_smem(); create_smem(); //>> p_unit_test 5 //; search_re('buffer_load_dword') //; search_re('flat_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(5u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); create_mubuf(); create_flat(); //>> p_unit_test 6 //; search_re('buffer_load_dword') //; search_re('s_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(6u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); create_mubuf(); create_smem(); //>> p_unit_test 7 //; search_re('flat_load_dword') //; search_re('s_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(7u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); create_flat(); create_smem(); @@ -168,14 +169,14 @@ BEGIN_TEST(form_hard_clauses.size) //>> p_unit_test 0 //; search_re('s_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); create_smem(); //>> p_unit_test 1 //! s_clause imm:63 //; for i in range(64): //; search_re('s_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(1u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); for (unsigned i = 0; i < 64; i++) create_smem(); @@ -183,7 +184,7 @@ BEGIN_TEST(form_hard_clauses.size) //! s_clause imm:63 //; for i in range(65): //; search_re('s_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); for (unsigned i = 0; i < 65; i++) create_smem(); @@ -194,7 +195,7 @@ BEGIN_TEST(form_hard_clauses.size) //! s_clause imm:1 //; search_re('s_load_dword') //; search_re('s_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(3u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); for (unsigned i = 0; i < 66; i++) create_smem(); @@ -210,7 +211,7 @@ BEGIN_TEST(form_hard_clauses.nsa) //! s_clause imm:1 //; search_re('image_sample .* %0:v\[0\], %0:v\[1\]') //; search_re('image_sample .* %0:v\[0\], %0:v\[1\]') - bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); create_mimg(false); create_mimg(false); @@ -218,7 +219,7 @@ BEGIN_TEST(form_hard_clauses.nsa) //~gfx10_3! s_clause imm:1 //; search_re('image_sample .* %0:v\[0\], %0:v\[1\]') //; search_re('image_sample .* %0:v\[0\], %0:v\[2\]') - bld.pseudo(aco_opcode::p_unit_test, Operand(1u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); create_mimg(false); create_mimg(true); @@ -226,7 +227,7 @@ BEGIN_TEST(form_hard_clauses.nsa) //~gfx10_3! s_clause imm:1 //; search_re('image_sample .* %0:v\[0\], %0:v\[2\]') //; search_re('image_sample .* %0:v\[0\], %0:v\[2\]') - bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); create_mimg(true); create_mimg(true); @@ -248,14 +249,14 @@ BEGIN_TEST(form_hard_clauses.heuristic) //! s_clause imm:1 //; search_re('image_sample') //; search_re('image_sample') - bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); create_mimg(false, img_desc0); create_mimg(false, img_desc0); //>> p_unit_test 1 //; search_re('image_sample') //; search_re('image_sample') - bld.pseudo(aco_opcode::p_unit_test, Operand(1u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); create_mimg(false, img_desc0); create_mimg(false, img_desc1); @@ -263,14 +264,14 @@ BEGIN_TEST(form_hard_clauses.heuristic) //! s_clause imm:1 //; search_re('buffer_load_dword') //; search_re('buffer_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); create_mubuf(buf_desc0); create_mubuf(buf_desc0); //>> p_unit_test 3 //; search_re('buffer_load_dword') //; search_re('buffer_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(3u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); create_mubuf(buf_desc0); create_mubuf(buf_desc1); @@ -278,21 +279,21 @@ BEGIN_TEST(form_hard_clauses.heuristic) //! s_clause imm:1 //; search_re('s_buffer_load_dword') //; search_re('s_buffer_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(4u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); create_smem_buffer(buf_desc0); create_smem_buffer(buf_desc0); //>> p_unit_test 5 //; search_re('s_buffer_load_dword') //; search_re('s_buffer_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(5u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); create_smem_buffer(buf_desc0); create_smem_buffer(buf_desc1); //>> p_unit_test 6 //; search_re('s_buffer_load_dword') //; search_re('s_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(6u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); create_smem_buffer(buf_desc0); create_smem(); @@ -302,7 +303,7 @@ BEGIN_TEST(form_hard_clauses.heuristic) //>> p_unit_test 7 //; search_re('buffer_load_dword') //; search_re('tbuffer_load_format_x') - bld.pseudo(aco_opcode::p_unit_test, Operand(7u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); create_mubuf(buf_desc0); create_mtbuf(buf_desc0); @@ -310,7 +311,7 @@ BEGIN_TEST(form_hard_clauses.heuristic) //! s_clause imm:1 //; search_re('buffer_load_dword') //; search_re('tbuffer_load_format_x') - bld.pseudo(aco_opcode::p_unit_test, Operand(8u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); create_mubuf(buf_desc0, 1); create_mtbuf(buf_desc0, 1); @@ -318,7 +319,7 @@ BEGIN_TEST(form_hard_clauses.heuristic) //! s_clause imm:1 //; search_re('buffer_load_dword') //; search_re('tbuffer_load_format_x') - bld.pseudo(aco_opcode::p_unit_test, Operand(9u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); create_mubuf(buf_desc0, 1); create_mtbuf(buf_desc1, 1); @@ -332,7 +333,7 @@ BEGIN_TEST(form_hard_clauses.stores) //>> p_unit_test 0 //; search_re('buffer_store_dword') //; search_re('buffer_store_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); create_mubuf_store(); create_mubuf_store(); @@ -341,7 +342,7 @@ BEGIN_TEST(form_hard_clauses.stores) //; search_re('buffer_load_dword') //; search_re('buffer_load_dword') //; search_re('buffer_store_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(1u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); create_mubuf(); create_mubuf(); create_mubuf_store(); @@ -351,7 +352,7 @@ BEGIN_TEST(form_hard_clauses.stores) //! s_clause imm:1 //; search_re('buffer_load_dword') //; search_re('buffer_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); create_mubuf_store(); create_mubuf(); create_mubuf(); @@ -361,7 +362,7 @@ BEGIN_TEST(form_hard_clauses.stores) //; search_re('buffer_load_dword') //; search_re('buffer_store_dword') //; search_re('buffer_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(3u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); create_mubuf(); create_mubuf_store(); create_mubuf(); @@ -373,7 +374,7 @@ BEGIN_TEST(form_hard_clauses.stores) //; for i in range(63): //; search_re('buffer_load_dword') //; search_re('buffer_load_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(4u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); create_mubuf_store(); for (unsigned i = 0; i < 64; i++) create_mubuf(); @@ -383,7 +384,7 @@ BEGIN_TEST(form_hard_clauses.stores) //; for i in range(64): //; search_re('buffer_load_dword') //; search_re('buffer_store_dword') - bld.pseudo(aco_opcode::p_unit_test, Operand(5u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); for (unsigned i = 0; i < 64; i++) create_mubuf(); create_mubuf_store(); diff --git a/src/amd/compiler/tests/test_insert_nops.cpp b/src/amd/compiler/tests/test_insert_nops.cpp index 94ea76b..7587246 100644 --- a/src/amd/compiler/tests/test_insert_nops.cpp +++ b/src/amd/compiler/tests/test_insert_nops.cpp @@ -27,9 +27,8 @@ using namespace aco; void create_mubuf(unsigned offset) { - bld.mubuf(aco_opcode::buffer_load_dword, Definition(PhysReg(256), v1), - Operand(PhysReg(0), s4), Operand(PhysReg(256), v1), - Operand(0u), offset, true); + bld.mubuf(aco_opcode::buffer_load_dword, Definition(PhysReg(256), v1), Operand(PhysReg(0), s4), + Operand(PhysReg(256), v1), Operand::zero(), offset, true); } void create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords) @@ -58,7 +57,7 @@ BEGIN_TEST(insert_nops.nsa_to_vmem_bug) //>> p_unit_test 0 //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d storage: semantics: scope:invocation //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:8 offen storage: semantics: scope:invocation - bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); create_mimg(true, 6, 4); create_mubuf(8); @@ -67,7 +66,7 @@ BEGIN_TEST(insert_nops.nsa_to_vmem_bug) //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d storage: semantics: scope:invocation //! s_nop //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen storage: semantics: scope:invocation - bld.pseudo(aco_opcode::p_unit_test, Operand(1u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); create_mimg(true, 6, 4); create_mubuf(4); @@ -75,7 +74,7 @@ BEGIN_TEST(insert_nops.nsa_to_vmem_bug) //! p_unit_test 2 //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[1], %0:v[2], %0:v[3], %0:v[4], %0:v[5] 2d storage: semantics: scope:invocation //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen storage: semantics: scope:invocation - bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); create_mimg(false, 6, 2); create_mubuf(4); @@ -84,7 +83,7 @@ BEGIN_TEST(insert_nops.nsa_to_vmem_bug) //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2], %0:v[4], %0:v[6], %0:v[8], %0:v[10] 2d storage: semantics: scope:invocation //! v_nop //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen storage: semantics: scope:invocation - bld.pseudo(aco_opcode::p_unit_test, Operand(3u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); create_mimg(true, 6, 4); bld.vop1(aco_opcode::v_nop); create_mubuf(4); @@ -93,7 +92,7 @@ BEGIN_TEST(insert_nops.nsa_to_vmem_bug) //! p_unit_test 4 //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d storage: semantics: scope:invocation //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen storage: semantics: scope:invocation - bld.pseudo(aco_opcode::p_unit_test, Operand(4u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); create_mimg(true, 2, 3); create_mubuf(4); @@ -104,7 +103,7 @@ BEGIN_TEST(insert_nops.nsa_to_vmem_bug) //! /* logical preds: / linear preds: BB0, / kind: uniform, */ //! s_nop //! v1: %0:v[0] = buffer_load_dword %0:s[0-3], %0:v[0], 0 offset:4 offen storage: semantics: scope:invocation - bld.pseudo(aco_opcode::p_unit_test, Operand(5u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); create_mimg(true, 6, 4); bld.reset(program->create_and_insert_block()); create_mubuf(4); @@ -123,16 +122,18 @@ BEGIN_TEST(insert_nops.writelane_to_nsa_bug) //! v1: %0:v[255] = v_writelane_b32_e64 0, 0, %0:v[255] //! s_nop //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d storage: semantics: scope:invocation - bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); - bld.writelane(Definition(PhysReg(511), v1), Operand(0u), Operand(0u), Operand(PhysReg(511), v1)); + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); + bld.writelane(Definition(PhysReg(511), v1), Operand::zero(), Operand::zero(), + Operand(PhysReg(511), v1)); create_mimg(true, 2, 3); /* no nop needed because the MIMG is not NSA */ //! p_unit_test 1 //! v1: %0:v[255] = v_writelane_b32_e64 0, 0, %0:v[255] //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[1] 2d storage: semantics: scope:invocation - bld.pseudo(aco_opcode::p_unit_test, Operand(1u)); - bld.writelane(Definition(PhysReg(511), v1), Operand(0u), Operand(0u), Operand(PhysReg(511), v1)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); + bld.writelane(Definition(PhysReg(511), v1), Operand::zero(), Operand::zero(), + Operand(PhysReg(511), v1)); create_mimg(false, 2, 2); /* no nop needed because there's already an instruction in-between */ @@ -140,8 +141,9 @@ BEGIN_TEST(insert_nops.writelane_to_nsa_bug) //! v1: %0:v[255] = v_writelane_b32_e64 0, 0, %0:v[255] //! v_nop //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d storage: semantics: scope:invocation - bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); - bld.writelane(Definition(PhysReg(511), v1), Operand(0u), Operand(0u), Operand(PhysReg(511), v1)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); + bld.writelane(Definition(PhysReg(511), v1), Operand::zero(), Operand::zero(), + Operand(PhysReg(511), v1)); bld.vop1(aco_opcode::v_nop); create_mimg(true, 2, 3); @@ -152,8 +154,9 @@ BEGIN_TEST(insert_nops.writelane_to_nsa_bug) //! /* logical preds: / linear preds: BB0, / kind: uniform, */ //! s_nop //! v1: %0:v[0] = image_sample %0:s[0-7], %0:s[0-3], v1: undef, %0:v[0], %0:v[2] 2d storage: semantics: scope:invocation - bld.pseudo(aco_opcode::p_unit_test, Operand(3u)); - bld.writelane(Definition(PhysReg(511), v1), Operand(0u), Operand(0u), Operand(PhysReg(511), v1)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); + bld.writelane(Definition(PhysReg(511), v1), Operand::zero(), Operand::zero(), + Operand(PhysReg(511), v1)); bld.reset(program->create_and_insert_block()); create_mimg(true, 2, 3); program->blocks[0].linear_succs.push_back(1); diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp index 256fd17..54a11d3 100644 --- a/src/amd/compiler/tests/test_optimizer.cpp +++ b/src/amd/compiler/tests/test_optimizer.cpp @@ -41,7 +41,7 @@ BEGIN_TEST(optimize.neg) //~gfx10! v1: %res1 = v_mul_f32 0x123456, -%a //! p_unit_test 1, %res1 Temp neg_a = fneg(inputs[0]); - writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x123456u), neg_a)); + writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x123456u), neg_a)); //! v1: %res2 = v_mul_f32 %a, %b //! p_unit_test 2, %res2 @@ -97,56 +97,60 @@ BEGIN_TEST(optimize.output_modifiers) //! v1: %res0 = v_add_f32 %a, %b *0.5 //! p_unit_test 0, %res0 Temp tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x3f000000u), tmp)); + writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x3f000000u), tmp)); //! v1: %res1 = v_add_f32 %a, %b *2 //! p_unit_test 1, %res1 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp)); + writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp)); //! v1: %res2 = v_add_f32 %a, %b *4 //! p_unit_test 2, %res2 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40800000u), tmp)); + writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40800000u), tmp)); //! v1: %res3 = v_add_f32 %a, %b clamp //! p_unit_test 3, %res3 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(3, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand(0u), Operand(0x3f800000u), tmp)); + writeout(3, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(), + Operand::c32(0x3f800000u), tmp)); //! v1: %res4 = v_add_f32 %a, %b *2 clamp //! p_unit_test 4, %res4 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]); - tmp = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp); - writeout(4, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand(0u), Operand(0x3f800000u), tmp)); + tmp = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp); + writeout(4, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(), + Operand::c32(0x3f800000u), tmp)); /* 16-bit modifiers */ //! v2b: %res5 = v_add_f16 %a, %b *0.5 //! p_unit_test 5, %res5 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]); - writeout(5, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x3800u), tmp)); + writeout(5, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x3800u), tmp)); //! v2b: %res6 = v_add_f16 %a, %b *2 //! p_unit_test 6, %res6 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]); - writeout(6, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x4000u), tmp)); + writeout(6, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000u), tmp)); //! v2b: %res7 = v_add_f16 %a, %b *4 //! p_unit_test 7, %res7 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]); - writeout(7, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x4400u), tmp)); + writeout(7, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4400u), tmp)); //! v2b: %res8 = v_add_f16 %a, %b clamp //! p_unit_test 8, %res8 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]); - writeout(8, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand((uint16_t)0u), Operand((uint16_t)0x3c00u), tmp)); + writeout(8, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand::c16(0u), + Operand::c16(0x3c00u), tmp)); //! v2b: %res9 = v_add_f16 %a, %b *2 clamp //! p_unit_test 9, %res9 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]); - tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x4000), tmp); - writeout(9, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand((uint16_t)0u), Operand((uint16_t)0x3c00u), tmp)); + tmp = bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000), tmp); + writeout(9, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand::c16(0u), + Operand::c16(0x3c00u), tmp)); /* clamping is done after omod */ @@ -154,8 +158,9 @@ BEGIN_TEST(optimize.output_modifiers) //! v1: %res10 = v_mul_f32 2.0, %res10_tmp //! p_unit_test 10, %res10 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]); - tmp = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand(0u), Operand(0x3f800000u), tmp); - writeout(10, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp)); + tmp = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(), Operand::c32(0x3f800000u), + tmp); + writeout(10, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp)); /* unsupported instructions */ @@ -163,7 +168,7 @@ BEGIN_TEST(optimize.output_modifiers) //! v1: %res11 = v_mul_f32 2.0, %res11_tmp //! p_unit_test 11, %res11 tmp = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), inputs[0], inputs[1]); - writeout(11, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp)); + writeout(11, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp)); /* several users */ @@ -173,12 +178,12 @@ BEGIN_TEST(optimize.output_modifiers) //! p_unit_test 12, %res12 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]); bld.pseudo(aco_opcode::p_unit_test, tmp); - writeout(12, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp)); + writeout(12, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp)); //! v1: %res13 = v_add_f32 %a, %b //! p_unit_test 13, %res13 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]); - bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp); + bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp); writeout(13, tmp); /* omod has no effect if denormals are enabled but clamp is fine */ @@ -193,12 +198,13 @@ BEGIN_TEST(optimize.output_modifiers) //! v1: %res14 = v_mul_f32 2.0, %res13_tmp //! p_unit_test 14, %res14 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(14, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp)); + writeout(14, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp)); //! v1: %res15 = v_add_f32 %a, %b clamp //! p_unit_test 15, %res15 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(15, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand(0u), Operand(0x3f800000u), tmp)); + writeout(15, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(), + Operand::c32(0x3f800000u), tmp)); //>> BB2 //! /* logical preds: / linear preds: / kind: uniform, */ @@ -210,12 +216,13 @@ BEGIN_TEST(optimize.output_modifiers) //! v2b: %res16 = v_mul_f16 2.0, %res15_tmp //! p_unit_test 16, %res16 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]); - writeout(16, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x4000u), tmp)); + writeout(16, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000u), tmp)); //! v2b: %res17 = v_add_f16 %a, %b clamp //! p_unit_test 17, %res17 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]); - writeout(17, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand((uint16_t)0u), Operand((uint16_t)0x3c00u), tmp)); + writeout(17, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand::c16(0u), + Operand::c16(0x3c00u), tmp)); /* omod flushes -0.0 to +0.0 */ @@ -231,11 +238,12 @@ BEGIN_TEST(optimize.output_modifiers) //! v1: %res18 = v_mul_f32 2.0, %res18_tmp //! p_unit_test 18, %res18 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(18, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x40000000u), tmp)); + writeout(18, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000u), tmp)); //! v1: %res19 = v_add_f32 %a, %b clamp //! p_unit_test 19, %res19 tmp = bld.vop2(aco_opcode::v_add_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(19, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand(0u), Operand(0x3f800000u), tmp)); + writeout(19, bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), Operand::zero(), + Operand::c32(0x3f800000u), tmp)); //>> BB4 //! /* logical preds: / linear preds: / kind: uniform, */ @@ -246,11 +254,12 @@ BEGIN_TEST(optimize.output_modifiers) //! v2b: %res20 = v_mul_f16 2.0, %res20_tmp //! p_unit_test 20, %res20 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]); - writeout(20, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand((uint16_t)0x4000u), tmp)); + writeout(20, bld.vop2(aco_opcode::v_mul_f16, bld.def(v2b), Operand::c16(0x4000u), tmp)); //! v2b: %res21 = v_add_f16 %a, %b clamp //! p_unit_test 21, %res21 tmp = bld.vop2(aco_opcode::v_add_f16, bld.def(v2b), inputs[0], inputs[1]); - writeout(21, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand((uint16_t)0u), Operand((uint16_t)0x3c00u), tmp)); + writeout(21, bld.vop3(aco_opcode::v_med3_f16, bld.def(v2b), Operand::c16(0u), + Operand::c16(0x3c00u), tmp)); finish_opt_test(); END_TEST @@ -270,33 +279,34 @@ BEGIN_TEST(optimize.cndmask) //! v1: %res0 = v_cndmask_b32 0, %a, %c //! p_unit_test 0, %res0 - subbrev = create_subbrev_co(Operand(0u), Operand(0u), Operand(inputs[2])); + subbrev = create_subbrev_co(Operand::zero(), Operand::zero(), Operand(inputs[2])); writeout(0, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), inputs[0], subbrev)); //! v1: %res1 = v_cndmask_b32 0, 42, %c //! p_unit_test 1, %res1 - subbrev = create_subbrev_co(Operand(0u), Operand(0u), Operand(inputs[2])); - writeout(1, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(42u), subbrev)); + subbrev = create_subbrev_co(Operand::zero(), Operand::zero(), Operand(inputs[2])); + writeout(1, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand::c32(42u), subbrev)); //~gfx9! v1: %subbrev, s2: %_ = v_subbrev_co_u32 0, 0, %c //~gfx9! v1: %res2 = v_and_b32 %b, %subbrev //~gfx10! v1: %res2 = v_cndmask_b32 0, %b, %c //! p_unit_test 2, %res2 - subbrev = create_subbrev_co(Operand(0u), Operand(0u), Operand(inputs[2])); + subbrev = create_subbrev_co(Operand::zero(), Operand::zero(), Operand(inputs[2])); writeout(2, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), inputs[1], subbrev)); //! v1: %subbrev1, s2: %_ = v_subbrev_co_u32 0, 0, %c //! v1: %xor = v_xor_b32 %a, %subbrev1 //! v1: %res3 = v_cndmask_b32 0, %xor, %c //! p_unit_test 3, %res3 - subbrev = create_subbrev_co(Operand(0u), Operand(0u), Operand(inputs[2])); + subbrev = create_subbrev_co(Operand::zero(), Operand::zero(), Operand(inputs[2])); Temp xor_a = bld.vop2(aco_opcode::v_xor_b32, bld.def(v1), inputs[0], subbrev); writeout(3, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), xor_a, subbrev)); //! v1: %res4 = v_cndmask_b32 0, %a, %c //! p_unit_test 4, %res4 - Temp cndmask = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), Operand(1u), Operand(inputs[2])); - Temp sub = bld.vsub32(bld.def(v1), Operand(0u), cndmask); + Temp cndmask = bld.vop2_e64(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), + Operand::c32(1u), Operand(inputs[2])); + Temp sub = bld.vsub32(bld.def(v1), Operand::zero(), cndmask); writeout(4, bld.vop2(aco_opcode::v_and_b32, bld.def(v1), Operand(inputs[0]), sub)); finish_opt_test(); @@ -315,9 +325,10 @@ BEGIN_TEST(optimize.add_lshl) //~gfx8! s1: %res0, s1: %_:scc = s_add_u32 %lshl0, 4 //~gfx(9|10)! s1: %res0, s1: %_:scc = s_lshl3_add_u32 %a, 4 //! p_unit_test 0, %res0 - shift = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), - Operand(inputs[0]), Operand(3u)); - writeout(0, bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), shift, Operand(4u))); + shift = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), Operand(inputs[0]), + Operand::c32(3u)); + writeout(0, bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), shift, + Operand::c32(4u))); //~gfx8! s1: %lshl1, s1: %_:scc = s_lshl_b32 %a, 3 //~gfx8! s1: %add1, s1: %_:scc = s_add_u32 %lshl1, 4 @@ -327,9 +338,10 @@ BEGIN_TEST(optimize.add_lshl) //~gfx(9|10)! v1: %lshl_add = v_lshl_add_u32 %a, 3, %b //~gfx(9|10)! v1: %res1 = v_add_u32 %lshl1, %lshl_add //! p_unit_test 1, %res1 - shift = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), - Operand(inputs[0]), Operand(3u)); - Temp sadd = bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), shift, Operand(4u)); + shift = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc), Operand(inputs[0]), + Operand::c32(3u)); + Temp sadd = + bld.sop2(aco_opcode::s_add_u32, bld.def(s1), bld.def(s1, scc), shift, Operand::c32(4u)); Temp vadd = bld.vadd32(bld.def(v1), shift, Operand(inputs[1])); writeout(1, bld.vadd32(bld.def(v1), sadd, vadd)); @@ -337,7 +349,8 @@ BEGIN_TEST(optimize.add_lshl) //~gfx8! v1: %res2, s2: %_ = v_add_co_u32 %lshl2, %b //~gfx(9|10)! v1: %res2 = v_lshl_add_u32 %a, 3, %b //! p_unit_test 2, %res2 - Temp lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), Operand(inputs[0]), Operand(3u)); + Temp lshl = + bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), Operand(inputs[0]), Operand::c32(3u)); writeout(2, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1]))); //~gfx8! s1: %lshl3 = s_lshl_b32 (is24bit)%a, 7 @@ -346,14 +359,14 @@ BEGIN_TEST(optimize.add_lshl) //! p_unit_test 3, %res3 Operand a_24bit = Operand(inputs[0]); a_24bit.set24bit(true); - lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_24bit, Operand(7u)); + lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_24bit, Operand::c32(7u)); writeout(3, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1]))); //! s1: %lshl4 = s_lshl_b32 (is24bit)%a, 3 //~gfx(8|9)! v1: %res4, s2: %carry = v_add_co_u32 %lshl4, %b //~gfx10! v1: %res4, s2: %carry = v_add_co_u32_e64 %lshl4, %b //! p_unit_test 4, %carry - lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_24bit, Operand(3u)); + lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_24bit, Operand::c32(3u)); Temp carry = bld.vadd32(bld.def(v1), lshl, Operand(inputs[1]), true).def(1).getTemp(); writeout(4, carry); @@ -367,7 +380,7 @@ BEGIN_TEST(optimize.add_lshl) //~gfx8! v1: %res6 = v_mad_u32_u24 (is24bit)%a, 8, %b //~gfx(9|10)! v1: %res6 = v_lshl_add_u32 (is24bit)%a, 3, %b //! p_unit_test 6, %res6 - lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_24bit, Operand(3u)); + lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_24bit, Operand::c32(3u)); writeout(6, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1]))); //~gfx8! v1: %res7 = v_mad_u32_u24 (is16bit)%a, 16, %b @@ -375,7 +388,7 @@ BEGIN_TEST(optimize.add_lshl) //! p_unit_test 7, %res7 Operand a_16bit = Operand(inputs[0]); a_16bit.set16bit(true); - lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_16bit, Operand(4u)); + lshl = bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), a_16bit, Operand::c32(4u)); writeout(7, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1]))); finish_opt_test(); @@ -398,27 +411,28 @@ BEGIN_TEST(optimize.mad_u32_u16) //! v1: %res0 = v_mul_u32_u24 (is16bit)%a, (is16bit)%b //! p_unit_test 0, %res0 - writeout(0, create_mad_u32_u16(Operand(inputs[0]), Operand(inputs[1]), Operand(0u))); + writeout(0, create_mad_u32_u16(Operand(inputs[0]), Operand(inputs[1]), Operand::zero())); //! v1: %res1 = v_mul_u32_u24 42, (is16bit)%a //! p_unit_test 1, %res1 - writeout(1, create_mad_u32_u16(Operand(42u), Operand(inputs[0]), Operand(0u))); + writeout(1, create_mad_u32_u16(Operand::c32(42u), Operand(inputs[0]), Operand::zero())); //! v1: %res2 = v_mul_u32_u24 42, (is16bit)%a //! p_unit_test 2, %res2 - writeout(2, create_mad_u32_u16(Operand(inputs[0]), Operand(42u), Operand(0u))); + writeout(2, create_mad_u32_u16(Operand(inputs[0]), Operand::c32(42u), Operand::zero())); //! v1: %res3 = v_mul_u32_u24 (is16bit)%c, (is16bit)%a //! p_unit_test 3, %res3 - writeout(3, create_mad_u32_u16(Operand(inputs[2]), Operand(inputs[0]), Operand(0u))); + writeout(3, create_mad_u32_u16(Operand(inputs[2]), Operand(inputs[0]), Operand::zero())); //! v1: %res4 = v_mad_u32_u16 42, (is16bit)%c, 0 //! p_unit_test 4, %res4 - writeout(4, create_mad_u32_u16(Operand(42u), Operand(inputs[2]), Operand(0u))); + writeout(4, create_mad_u32_u16(Operand::c32(42u), Operand(inputs[2]), Operand::zero())); //! v1: %res5 = v_mad_u32_u16 42, %a, 0 //! p_unit_test 5, %res5 - writeout(5, create_mad_u32_u16(Operand(42u), Operand(inputs[0]), Operand(0u), false)); + writeout(5, + create_mad_u32_u16(Operand::c32(42u), Operand(inputs[0]), Operand::zero(), false)); //~gfx9! v1: %mul6 = v_mul_lo_u16 %a, %b //~gfx9! v1: %res6 = v_add_u32 %mul6, %b @@ -458,31 +472,31 @@ BEGIN_TEST(optimize.bcnt) //! v1: %res0 = v_bcnt_u32_b32 %a, %a //! p_unit_test 0, %res0 - bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand(0u)); + bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand::zero()); writeout(0, bld.vadd32(bld.def(v1), bcnt, Operand(inputs[0]))); //! v1: %res1 = v_bcnt_u32_b32 %a, %b //! p_unit_test 1, %res1 - bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand(0u)); + bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand::zero()); writeout(1, bld.vadd32(bld.def(v1), bcnt, Operand(inputs[1]))); //! v1: %res2 = v_bcnt_u32_b32 %a, 42 //! p_unit_test 2, %res2 - bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand(0u)); - writeout(2, bld.vadd32(bld.def(v1), bcnt, Operand(42u))); + bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand::zero()); + writeout(2, bld.vadd32(bld.def(v1), bcnt, Operand::c32(42u))); //! v1: %bnct3 = v_bcnt_u32_b32 %b, 0 //~gfx8! v1: %res3, s2: %_ = v_add_co_u32 %bcnt3, %a //~gfx(9|10)! v1: %res3 = v_add_u32 %bcnt3, %a //! p_unit_test 3, %res3 - bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[1]), Operand(0u)); + bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[1]), Operand::zero()); writeout(3, bld.vadd32(bld.def(v1), bcnt, Operand(inputs[0]))); //! v1: %bnct4 = v_bcnt_u32_b32 %a, 0 //~gfx(8|9)! v1: %add4, s2: %carry = v_add_co_u32 %bcnt4, %a //~gfx10! v1: %add4, s2: %carry = v_add_co_u32_e64 %bcnt4, %a //! p_unit_test 4, %carry - bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand(0u)); + bcnt = bld.vop3(aco_opcode::v_bcnt_u32_b32, bld.def(v1), Operand(inputs[0]), Operand::zero()); Temp carry = bld.vadd32(bld.def(v1), bcnt, Operand(inputs[0]), true).def(1).getTemp(); writeout(4, carry); @@ -499,28 +513,28 @@ struct clamp_config { static const clamp_config clamp_configs[] = { /* 0.0, 4.0 */ {"_0,4f32", aco_opcode::v_min_f32, aco_opcode::v_max_f32, aco_opcode::v_med3_f32, - Operand(0u), Operand(0x40800000u)}, + Operand::zero(), Operand::c32(0x40800000u)}, {"_0,4f16", aco_opcode::v_min_f16, aco_opcode::v_max_f16, aco_opcode::v_med3_f16, - Operand((uint16_t)0u), Operand((uint16_t)0x4400)}, + Operand::c16(0u), Operand::c16(0x4400)}, /* -1.0, 0.0 */ {"_-1,0f32", aco_opcode::v_min_f32, aco_opcode::v_max_f32, aco_opcode::v_med3_f32, - Operand(0xbf800000u), Operand(0u)}, + Operand::c32(0xbf800000u), Operand::zero()}, {"_-1,0f16", aco_opcode::v_min_f16, aco_opcode::v_max_f16, aco_opcode::v_med3_f16, - Operand((uint16_t)0xBC00), Operand((uint16_t)0u)}, + Operand::c16(0xBC00), Operand::c16(0u)}, /* 0, 3 */ {"_0,3u32", aco_opcode::v_min_u32, aco_opcode::v_max_u32, aco_opcode::v_med3_u32, - Operand(0u), Operand(3u)}, + Operand::zero(), Operand::c32(3u)}, {"_0,3u16", aco_opcode::v_min_u16, aco_opcode::v_max_u16, aco_opcode::v_med3_u16, - Operand((uint16_t)0u), Operand((uint16_t)3u)}, + Operand::c16(0u), Operand::c16(3u)}, {"_0,3i32", aco_opcode::v_min_i32, aco_opcode::v_max_i32, aco_opcode::v_med3_i32, - Operand(0u), Operand(3u)}, + Operand::zero(), Operand::c32(3u)}, {"_0,3i16", aco_opcode::v_min_i16, aco_opcode::v_max_i16, aco_opcode::v_med3_i16, - Operand((uint16_t)0u), Operand((uint16_t)3u)}, + Operand::c16(0u), Operand::c16(3u)}, /* -5, 0 */ {"_-5,0i32", aco_opcode::v_min_i32, aco_opcode::v_max_i32, aco_opcode::v_med3_i32, - Operand(0xfffffffbu), Operand(0u)}, + Operand::c32(0xfffffffbu), Operand::zero()}, {"_-5,0i16", aco_opcode::v_min_i16, aco_opcode::v_max_i16, aco_opcode::v_med3_i16, - Operand((uint16_t)0xfffbu), Operand((uint16_t)0u)}, + Operand::c16(0xfffbu), Operand::c16(0u)}, }; BEGIN_TEST(optimize.clamp) @@ -613,45 +627,52 @@ BEGIN_TEST(optimize.const_comparison_ordering) //! p_unit_test 0, %res0 writeout(0, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]), - bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]))); + bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), + Operand::c32(0x40800000u), inputs[0]))); //! s2: %res1 = v_cmp_nge_f32 4.0, %a //! p_unit_test 1, %res1 writeout(1, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]), - bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]))); + bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), + Operand::c32(0x40800000u), inputs[0]))); //! s2: %res2 = v_cmp_nge_f32 0x40a00000, %a //! p_unit_test 2, %res2 writeout(2, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]), - bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), bld.copy(bld.def(v1), Operand(0x40a00000u)), inputs[0]))); + bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), + bld.copy(bld.def(v1), Operand::c32(0x40a00000u)), inputs[0]))); /* optimize to ordered comparison */ //! s2: %res3 = v_cmp_lt_f32 4.0, %a //! p_unit_test 3, %res3 writeout(3, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc), bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]), - bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]))); + bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), + Operand::c32(0x40800000u), inputs[0]))); //! s2: %res4 = v_cmp_lt_f32 4.0, %a //! p_unit_test 4, %res4 writeout(4, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc), bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]), - bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]))); + bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), + Operand::c32(0x40800000u), inputs[0]))); //! s2: %res5 = v_cmp_lt_f32 0x40a00000, %a //! p_unit_test 5, %res5 writeout(5, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc), bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]), - bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), bld.copy(bld.def(v1), Operand(0x40a00000u)), inputs[0]))); + bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), + bld.copy(bld.def(v1), Operand::c32(0x40a00000u)), inputs[0]))); /* similar but unoptimizable expressions */ //! s2: %tmp6_0 = v_cmp_lt_f32 4.0, %a //! s2: %tmp6_1 = v_cmp_neq_f32 %a, %a //! s2: %res6, s1: %_:scc = s_and_b64 %tmp6_1, %tmp6_0 //! p_unit_test 6, %res6 - Temp src1 = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]); + Temp src1 = + bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand::c32(0x40800000u), inputs[0]); Temp src0 = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]); writeout(6, bld.sop2(aco_opcode::s_and_b64, bld.def(bld.lm), bld.def(s1, scc), src0, src1)); @@ -659,7 +680,8 @@ BEGIN_TEST(optimize.const_comparison_ordering) //! s2: %tmp7_1 = v_cmp_eq_f32 %a, %a //! s2: %res7, s1: %_:scc = s_or_b64 %tmp7_1, %tmp7_0 //! p_unit_test 7, %res7 - src1 = bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]); + src1 = + bld.vopc(aco_opcode::v_cmp_nge_f32, bld.def(bld.lm), Operand::c32(0x40800000u), inputs[0]); src0 = bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), inputs[0], inputs[0]); writeout(7, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), src0, src1)); @@ -667,7 +689,7 @@ BEGIN_TEST(optimize.const_comparison_ordering) //! s2: %tmp8_1 = v_cmp_neq_f32 %a, %a //! s2: %res8, s1: %_:scc = s_or_b64 %tmp8_1, %tmp8_0 //! p_unit_test 8, %res8 - src1 = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[3]); + src1 = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand::c32(0x40800000u), inputs[3]); src0 = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]); writeout(8, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), src0, src1)); @@ -675,23 +697,26 @@ BEGIN_TEST(optimize.const_comparison_ordering) //! s2: %tmp9_1 = v_cmp_neq_f32 %a, %d //! s2: %res9, s1: %_:scc = s_or_b64 %tmp9_1, %tmp9_0 //! p_unit_test 9, %res9 - src1 = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(0x40800000u), inputs[0]); + src1 = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand::c32(0x40800000u), inputs[0]); src0 = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[3]); writeout(9, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), src0, src1)); /* bit sizes */ //! s2: %res10 = v_cmp_nge_f16 4.0, %b //! p_unit_test 10, %res10 - Temp input1_16 = bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), inputs[1], Operand(0u)); + Temp input1_16 = + bld.pseudo(aco_opcode::p_extract_vector, bld.def(v2b), inputs[1], Operand::zero()); writeout(10, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), bld.vopc(aco_opcode::v_cmp_neq_f16, bld.def(bld.lm), input1_16, input1_16), - bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), Operand((uint16_t)0x4400u), input1_16))); + bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), Operand::c16(0x4400u), + input1_16))); //! s2: %res11 = v_cmp_nge_f64 4.0, %c //! p_unit_test 11, %res11 writeout(11, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), bld.vopc(aco_opcode::v_cmp_neq_f64, bld.def(bld.lm), inputs[2], inputs[2]), - bld.vopc(aco_opcode::v_cmp_lt_f64, bld.def(bld.lm), Operand(0x4010000000000000u), inputs[2]))); + bld.vopc(aco_opcode::v_cmp_lt_f64, bld.def(bld.lm), + Operand::c64(0x4010000000000000u), inputs[2]))); /* NaN */ uint16_t nan16 = 0x7e00; @@ -702,7 +727,7 @@ BEGIN_TEST(optimize.const_comparison_ordering) //! s2: %tmp12_1 = v_cmp_neq_f16 %a, %a //! s2: %res12, s1: %_:scc = s_or_b64 %tmp12_1, %tmp12_0 //! p_unit_test 12, %res12 - src1 = bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), Operand(nan16), inputs[0]); + src1 = bld.vopc(aco_opcode::v_cmp_lt_f16, bld.def(bld.lm), Operand::c16(nan16), inputs[0]); src0 = bld.vopc(aco_opcode::v_cmp_neq_f16, bld.def(bld.lm), inputs[0], inputs[0]); writeout(12, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), src0, src1)); @@ -710,7 +735,7 @@ BEGIN_TEST(optimize.const_comparison_ordering) //! s2: %tmp13_1 = v_cmp_neq_f32 %a, %a //! s2: %res13, s1: %_:scc = s_or_b64 %tmp13_1, %tmp13_0 //! p_unit_test 13, %res13 - src1 = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand(nan32), inputs[0]); + src1 = bld.vopc(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), Operand::c32(nan32), inputs[0]); src0 = bld.vopc(aco_opcode::v_cmp_neq_f32, bld.def(bld.lm), inputs[0], inputs[0]); writeout(13, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), src0, src1)); @@ -718,7 +743,7 @@ BEGIN_TEST(optimize.const_comparison_ordering) //! s2: %tmp14_1 = v_cmp_neq_f64 %a, %a //! s2: %res14, s1: %_:scc = s_or_b64 %tmp14_1, %tmp14_0 //! p_unit_test 14, %res14 - src1 = bld.vopc(aco_opcode::v_cmp_lt_f64, bld.def(bld.lm), Operand(nan64), inputs[0]); + src1 = bld.vopc(aco_opcode::v_cmp_lt_f64, bld.def(bld.lm), Operand::c64(nan64), inputs[0]); src0 = bld.vopc(aco_opcode::v_cmp_neq_f64, bld.def(bld.lm), inputs[0], inputs[0]); writeout(14, bld.sop2(aco_opcode::s_or_b64, bld.def(bld.lm), bld.def(s1, scc), src0, src1)); @@ -762,15 +787,15 @@ BEGIN_TEST(optimize.minmax) //! v1: %res0 = v_max3_f32 0, -0, %a //! p_unit_test 0, %res0 Temp xor0 = fneg(inputs[0]); - Temp min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0u), xor0); + Temp min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand::zero(), xor0); Temp xor1 = fneg(min); - writeout(0, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), xor1)); + writeout(0, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand::zero(), xor1)); //! v1: %res1 = v_max3_f32 0, -0, -%a //! p_unit_test 1, %res1 - min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0u), Operand(inputs[0])); + min = bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand::zero(), Operand(inputs[0])); xor1 = fneg(min); - writeout(1, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand(0u), xor1)); + writeout(1, bld.vop2(aco_opcode::v_max_f32, bld.def(v1), Operand::zero(), xor1)); finish_opt_test(); } @@ -809,7 +834,7 @@ BEGIN_TEST(optimize.add_lshlrev) //~gfx8! v1: %res0, s2: %_ = v_add_co_u32 %lshl0, %b //~gfx(9|10)! v1: %res0 = v_lshl_add_u32 %a, 3, %b //! p_unit_test 0, %res0 - lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(3u), Operand(inputs[0])); + lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), Operand(inputs[0])); writeout(0, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1]))); //~gfx8! v1: %lshl1 = v_lshlrev_b32 7, (is24bit)%a @@ -818,7 +843,7 @@ BEGIN_TEST(optimize.add_lshlrev) //! p_unit_test 1, %res1 Operand a_24bit = Operand(inputs[0]); a_24bit.set24bit(true); - lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(7u), a_24bit); + lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(7u), a_24bit); writeout(1, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1]))); //~gfx8! v1: %lshl2 = v_lshlrev_b32 (is24bit)%a, (is24bit)%b @@ -833,7 +858,7 @@ BEGIN_TEST(optimize.add_lshlrev) //~gfx8! v1: %res3 = v_mad_u32_u24 (is24bit)%a, 8, %b //~gfx(9|10)! v1: %res3 = v_lshl_add_u32 (is24bit)%a, 3, %b //! p_unit_test 3, %res3 - lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(3u), a_24bit); + lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(3u), a_24bit); writeout(3, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1]))); //~gfx8! v1: %res4 = v_mad_u32_u24 (is16bit)%a, 16, %b @@ -841,7 +866,7 @@ BEGIN_TEST(optimize.add_lshlrev) //! p_unit_test 4, %res4 Operand a_16bit = Operand(inputs[0]); a_16bit.set16bit(true); - lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(4u), a_16bit); + lshl = bld.vop2(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(4u), a_16bit); writeout(4, bld.vadd32(bld.def(v1), lshl, Operand(inputs[1]))); //~gfx8! v1: %lshl5 = v_lshlrev_b32 4, (is24bit)%c @@ -850,7 +875,7 @@ BEGIN_TEST(optimize.add_lshlrev) //! p_unit_test 5, %res5 Operand c_24bit = Operand(inputs[2]); c_24bit.set24bit(true); - lshl = bld.vop2_e64(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(4u), c_24bit); + lshl = bld.vop2_e64(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand::c32(4u), c_24bit); writeout(5, bld.vadd32(bld.def(v1), lshl, Operand(inputs[2]))); finish_opt_test(); @@ -896,9 +921,9 @@ static Temp emit_denorm_srcdest(aco_opcode op, Temp val) { switch (op) { case aco_opcode::v_cndmask_b32: - return bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), val, inputs[1]); + return bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), val, inputs[1]); case aco_opcode::v_min_f32: - return bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand(0u), val); + return bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand::zero(), val); case aco_opcode::v_rcp_f32: return bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), val); default: @@ -975,7 +1000,7 @@ BEGIN_TEST(optimize.denorm_propagation) Temp val = emit_denorm_srcdest(cfg.src, inputs[0]); switch (cfg.op) { case denorm_mul1: - val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x3f800000u), val); + val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x3f800000u), val); break; case denorm_fneg: val = fneg(val); @@ -988,7 +1013,8 @@ BEGIN_TEST(optimize.denorm_propagation) break; } val = emit_denorm_srcdest(cfg.dest, val); - writeout(0, bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand(0u), val, inputs[1])); + writeout( + 0, bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), val, inputs[1])); finish_opt_test(); } diff --git a/src/amd/compiler/tests/test_optimizer_postRA.cpp b/src/amd/compiler/tests/test_optimizer_postRA.cpp index 31b70f2..9887ab6 100644 --- a/src/amd/compiler/tests/test_optimizer_postRA.cpp +++ b/src/amd/compiler/tests/test_optimizer_postRA.cpp @@ -48,7 +48,8 @@ BEGIN_TEST(optimizer_postRA.vcmp) //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0] //! s2: %e:s[2-3] = p_cbranch_z %b:vcc //! p_unit_test 0, %e:s[2-3] - auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand(0u), Operand(v_in, reg_v0)); + auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), + Operand(v_in, reg_v0)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); writeout(0, Operand(br, reg_s2)); @@ -64,9 +65,10 @@ BEGIN_TEST(optimizer_postRA.vcmp) //! s2: %f:vcc = s_mov_b64 0 //! s2: %e:s[2-3] = p_cbranch_z %d:scc //! p_unit_test 1, %e:s[2-3], %f:vcc - auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand(0u), Operand(v_in, reg_v0)); + auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), + Operand(v_in, reg_v0)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); - auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand(0u)); + auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero()); auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc)); } @@ -80,7 +82,8 @@ BEGIN_TEST(optimizer_postRA.vcmp) //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec //! s2: %e:s[2-3] = p_cbranch_z %d:scc //! p_unit_test 2, %e:s[2-3] - auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand(0u), Operand(v_in, reg_v0)); + auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(), + Operand(v_in, reg_v0)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(vcmp, reg_s4), Operand(exec, bld.lm)); auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); writeout(2, Operand(br, reg_s2)); @@ -95,7 +98,8 @@ BEGIN_TEST(optimizer_postRA.vcmp) //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec //! s2: %e:s[2-3] = p_cbranch_z %d:scc //! p_unit_test 2, %e:s[2-3] - auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc), Operand(1u), Operand(reg_s4, bld.lm)); + auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc), + Operand::c32(1u), Operand(reg_s4, bld.lm)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(salu, vcc), Operand(exec, bld.lm)); auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); writeout(2, Operand(br, reg_s2)); @@ -111,9 +115,10 @@ BEGIN_TEST(optimizer_postRA.vcmp) //! s2: %f:exec = s_mov_b64 42 //! s2: %e:s[2-3] = p_cbranch_z %d:scc //! p_unit_test 4, %e:s[2-3], %f:exec - auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand(0u), Operand(v_in, reg_v0)); + auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(), + Operand(v_in, reg_v0)); auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm)); - auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand(42u)); + auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u)); auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp())); writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec)); } @@ -149,8 +154,10 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt) //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 //! s2: %f:vcc = p_cbranch_nz %e:scc //! p_unit_test 0, %f:vcc - auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand(0x40018u)); - auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand(0u)); + auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, + Operand::c32(0x40018u)); + auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), + Operand::zero()); auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); writeout(0, Operand(br, vcc)); } @@ -161,8 +168,10 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt) //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 //! s2: %f:vcc = p_cbranch_z %e:scc //! p_unit_test 1, %f:vcc - auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand(0x40018u)); - auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand(0u)); + auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, + Operand::c32(0x40018u)); + auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), + Operand::zero()); auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); writeout(1, Operand(br, vcc)); } @@ -173,8 +182,10 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt) //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 //! s2: %f:vcc = p_cbranch_z %e:scc //! p_unit_test 2, %f:vcc - auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand(0x40018u)); - auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand(0u)); + auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, + Operand::c32(0x40018u)); + auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), + Operand::zero()); auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp)); writeout(2, Operand(br, vcc)); } @@ -185,8 +196,10 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt) //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 //! s2: %f:vcc = p_cbranch_nz %e:scc //! p_unit_test 3, %f:vcc - auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand(0x40018u)); - auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand(0u)); + auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, + Operand::c32(0x40018u)); + auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2), + Operand::zero()); auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp)); writeout(3, Operand(br, vcc)); } @@ -197,8 +210,10 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt) //! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345 //! s2: %f:vcc = p_cbranch_z %e:scc //! p_unit_test 4, %f:vcc - auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1, Operand(0x12345u)); - auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2), Operand(UINT64_C(0))); + auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1, + Operand::c32(0x12345u)); + auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2), + Operand::zero(8)); auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp)); writeout(4, Operand(br, vcc)); } @@ -213,9 +228,12 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt) //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0 //! s2: %f:vcc = p_cbranch_z %g:scc //! p_unit_test 5, %f:vcc, %h:s[3] - auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand(0x40018u)); - auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, Operand(1u)); - auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand(0u)); + auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, + Operand::c32(0x40018u)); + auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, + Operand::c32(1u)); + auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), + Operand::zero()); auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp)); writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3)); } @@ -226,8 +244,10 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt) //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018 //! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc //! p_unit_test 6, %f:s[4] - auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand(0x40018u)); - auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand(0u)); + auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, + Operand::c32(0x40018u)); + auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), + Operand::zero()); auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp)); writeout(6, Operand(br, reg_s4)); } @@ -242,9 +262,12 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_opt) //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0 //! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc //! p_unit_test 7, %f:s[4], %h:s[3] - auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, Operand(0x40018u)); - auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, Operand(1u)); - auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), Operand(0u)); + auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0, + Operand::c32(0x40018u)); + auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0, + Operand::c32(1u)); + auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2), + Operand::zero()); auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp)); writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3)); } diff --git a/src/amd/compiler/tests/test_regalloc.cpp b/src/amd/compiler/tests/test_regalloc.cpp index bd998b3..52449a4 100644 --- a/src/amd/compiler/tests/test_regalloc.cpp +++ b/src/amd/compiler/tests/test_regalloc.cpp @@ -70,7 +70,7 @@ BEGIN_TEST(regalloc.32bit_partial_write) /* This test checks if this instruction uses SDWA. */ //! v2b: %_:v[0][0:16] = v_not_b32 0 dst_preserve - Temp lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v2b), Operand(0u)); + Temp lo = bld.vop1(aco_opcode::v_not_b32, bld.def(v2b), Operand::zero()); //! v1: %_:v[0] = p_create_vector %_:v[0][0:16], %_:v[0][16:32] bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), lo, hi); diff --git a/src/amd/compiler/tests/test_sdwa.cpp b/src/amd/compiler/tests/test_sdwa.cpp index 68306df..6a16700 100644 --- a/src/amd/compiler/tests/test_sdwa.cpp +++ b/src/amd/compiler/tests/test_sdwa.cpp @@ -79,14 +79,14 @@ BEGIN_TEST(validate.sdwa.operands) //~gfx8! Wrong source position for constant argument: v1: %_ = v_mul_f32 4, %vgpr1 //~gfx8! Wrong source position for constant argument: v1: %_ = v_mul_f32 %vgpr0, 4 - bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), Operand(4u), inputs[1]); - bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], Operand(4u)); + bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(4u), inputs[1]); + bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], Operand::c32(4u)); //! Literal applied on wrong instruction format: v1: %_ = v_mul_f32 0x1234, %vgpr1 //! Literal applied on wrong instruction format: v1: %_ = v_mul_f32 %vgpr0, 0x1234 //! Wrong source position for Literal argument: v1: %_ = v_mul_f32 %vgpr0, 0x1234 - bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), Operand(0x1234u), inputs[1]); - bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], Operand(0x1234u)); + bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x1234u), inputs[1]); + bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], Operand::c32(0x1234u)); //! Validation failed @@ -172,35 +172,42 @@ BEGIN_TEST(optimize.sdwa.extract) { //~gfx[^7].*! @standard_test(0, 0, 8) - Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(is_signed)); + Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u), + Operand::c32(is_signed)); writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte0_b)); //~gfx[^7].*! @standard_test(1, 8, 8) - Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(1u), Operand(8u), Operand(is_signed)); + Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u), + Operand::c32(is_signed)); writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte1_b)); //~gfx[^7].*! @standard_test(2, 16, 8) - Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(2u), Operand(8u), Operand(is_signed)); + Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u), Operand::c32(8u), + Operand::c32(is_signed)); writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte2_b)); //~gfx[^7].*! @standard_test(3, 24, 8) - Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(3u), Operand(8u), Operand(is_signed)); + Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u), Operand::c32(8u), + Operand::c32(is_signed)); writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte3_b)); //~gfx[^7].*! @standard_test(4, 0, 16) - Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(0u), Operand(16u), Operand(is_signed)); + Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u), + Operand::c32(is_signed)); writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word0_b)); //~gfx[^7].*! @standard_test(5, 16, 16) - Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(1u), Operand(16u), Operand(is_signed)); + Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), + Operand::c32(16u), Operand::c32(is_signed)); writeout(5, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word1_b)); //~gfx[^7]_unsigned! @standard_test(6, 0, 8) - Temp bfi_byte0_b = bld.pseudo(ins, bld.def(v1), inputs[1], Operand(0u), Operand(8u)); + Temp bfi_byte0_b = bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u)); writeout(6, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte0_b)); //~gfx[^7]_unsigned! @standard_test(7, 0, 16) - Temp bfi_word0_b = bld.pseudo(ins, bld.def(v1), inputs[1], Operand(0u), Operand(16u)); + Temp bfi_word0_b = + bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u)); writeout(7, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_word0_b)); } @@ -211,7 +218,8 @@ BEGIN_TEST(optimize.sdwa.extract) //! v1: %tmp8 = p_insert %b, 1, 8 //! v1: %res8 = v_mul_f32 %a, %tmp8 //! p_unit_test 8, %res8 - Temp bfi_byte1_b = bld.pseudo(ins, bld.def(v1), inputs[1], Operand(1u), Operand(8u)); + Temp bfi_byte1_b = + bld.pseudo(ins, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u)); writeout(8, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte1_b)); /* v_cvt_f32_ubyte[0-3] can be used instead of v_cvt_f32_u32+sdwa */ @@ -220,7 +228,8 @@ BEGIN_TEST(optimize.sdwa.extract) //~gfx[^7]+_signed! v1: %res9 = v_cvt_f32_u32 @b(0:7) //~gfx\d+_unsigned! v1: %res9 = v_cvt_f32_ubyte0 %b //! p_unit_test 9, %res9 - Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(is_signed)); + Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u), + Operand::c32(is_signed)); writeout(9, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte0_b)); //~gfx7_signed! v1: %bfe_byte1_b = p_extract %b, 1, 8, 1 @@ -228,7 +237,8 @@ BEGIN_TEST(optimize.sdwa.extract) //~gfx[^7]+_signed! v1: %res10 = v_cvt_f32_u32 @b(8:15) //~gfx\d+_unsigned! v1: %res10 = v_cvt_f32_ubyte1 %b //! p_unit_test 10, %res10 - Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(1u), Operand(8u), Operand(is_signed)); + Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u), + Operand::c32(is_signed)); writeout(10, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte1_b)); //~gfx7_signed! v1: %bfe_byte2_b = p_extract %b, 2, 8, 1 @@ -236,7 +246,8 @@ BEGIN_TEST(optimize.sdwa.extract) //~gfx[^7]+_signed! v1: %res11 = v_cvt_f32_u32 @b(16:23) //~gfx\d+_unsigned! v1: %res11 = v_cvt_f32_ubyte2 %b //! p_unit_test 11, %res11 - Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(2u), Operand(8u), Operand(is_signed)); + Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u), Operand::c32(8u), + Operand::c32(is_signed)); writeout(11, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte2_b)); //~gfx7_signed! v1: %bfe_byte3_b = p_extract %b, 3, 8, 1 @@ -244,18 +255,21 @@ BEGIN_TEST(optimize.sdwa.extract) //~gfx[^7]+_signed! v1: %res12 = v_cvt_f32_u32 @b(24:31) //~gfx\d+_unsigned! v1: %res12 = v_cvt_f32_ubyte3 %b //! p_unit_test 12, %res12 - Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(3u), Operand(8u), Operand(is_signed)); + Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u), Operand::c32(8u), + Operand::c32(is_signed)); writeout(12, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte3_b)); //! v1: %res13 = v_add_i16 %a, %b //! p_unit_test 13, %res13 - Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(0u), Operand(16u), Operand(is_signed)); + Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u), + Operand::c32(is_signed)); writeout(13, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word0_b)); /* VOP3-only instructions can't use SDWA but they can use opsel instead */ //~gfx(9|10).*! v1: %res14 = v_add_i16 %a, hi(%b) //~gfx(9|10).*! p_unit_test 14, %res14 - Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(1u), Operand(16u), Operand(is_signed)); + Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), + Operand::c32(16u), Operand::c32(is_signed)); writeout(14, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word1_b)); } @@ -274,7 +288,8 @@ BEGIN_TEST(optimize.sdwa.extract_modifiers) //! v1: %res0 = v_mul_f32 %a, -%b[0:7] //! p_unit_test 0, %res0 - Temp byte0 = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(0u)); + Temp byte0 = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u), + Operand::zero()); Temp neg_byte0 = fneg(byte0); writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], neg_byte0)); @@ -284,7 +299,8 @@ BEGIN_TEST(optimize.sdwa.extract_modifiers) //~gfx(9|10)! v1: %res1 = v_mul_f32 %a, %neg_byte0 //! p_unit_test 1, %res1 Temp neg = fneg(inputs[1]); - Temp byte0_neg = bld.pseudo(ext, bld.def(v1), neg, Operand(0u), Operand(8u), Operand(0u)); + Temp byte0_neg = + bld.pseudo(ext, bld.def(v1), neg, Operand::zero(), Operand::c32(8u), Operand::zero()); writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_neg)); //! v1: %res2 = v_mul_f32 %a, |%b[0:7]| @@ -296,7 +312,8 @@ BEGIN_TEST(optimize.sdwa.extract_modifiers) //! v1: %res3 = v_mul_f32 %a, %abs[0:7] //! p_unit_test 3, %res3 Temp abs = fabs(inputs[1]); - Temp byte0_abs = bld.pseudo(ext, bld.def(v1), abs, Operand(0u), Operand(8u), Operand(0u)); + Temp byte0_abs = + bld.pseudo(ext, bld.def(v1), abs, Operand::zero(), Operand::c32(8u), Operand::zero()); writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_abs)); //! v1: %res4 = v_mul_f32 %1, -|%2[0:7]| @@ -310,7 +327,8 @@ BEGIN_TEST(optimize.sdwa.extract_modifiers) //~gfx(9|10)! v1: %res5 = v_mul_f32 %a, %neg_abs_byte0 //! p_unit_test 5, %res5 Temp neg_abs = fneg(abs); - Temp byte0_neg_abs = bld.pseudo(ext, bld.def(v1), neg_abs, Operand(0u), Operand(8u), Operand(0u)); + Temp byte0_neg_abs = + bld.pseudo(ext, bld.def(v1), neg_abs, Operand::zero(), Operand::c32(8u), Operand::zero()); writeout(5, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_neg_abs)); finish_opt_test(); @@ -329,28 +347,32 @@ BEGIN_TEST(optimize.sdwa.extract.sgpr) //~gfx8! v1: %res1 = v_mul_f32 %c, %byte0_b //~gfx(9|10)! v1: %res1 = v_mul_f32 %c, %b[0:7] //! p_unit_test 1, %res1 - Temp byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(0u)); + Temp byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u), + Operand::zero()); writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[2], byte0_b)); //~gfx8! v1: %byte0_c = p_extract %c, 0, 8, 0 //~gfx8! v1: %res2 = v_mul_f32 %a, %byte0_c //~gfx(9|10)! v1: %res2 = v_mul_f32 %a, %c[0:7] //! p_unit_test 2, %res2 - Temp byte0_c = bld.pseudo(ext, bld.def(v1), inputs[2], Operand(0u), Operand(8u), Operand(0u)); + Temp byte0_c = bld.pseudo(ext, bld.def(v1), inputs[2], Operand::zero(), Operand::c32(8u), + Operand::zero()); writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_c)); //~gfx8! v1: %byte0_c_2 = p_extract %c, 0, 8, 0 //~gfx8! v1: %res3 = v_mul_f32 %c, %byte0_c_2 //~gfx(9|10)! v1: %res3 = v_mul_f32 %c, %c[0:7] //! p_unit_test 3, %res3 - byte0_c = bld.pseudo(ext, bld.def(v1), inputs[2], Operand(0u), Operand(8u), Operand(0u)); + byte0_c = bld.pseudo(ext, bld.def(v1), inputs[2], Operand::zero(), Operand::c32(8u), + Operand::zero()); writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[2], byte0_c)); //~gfx(8|9)! v1: %byte0_c_3 = p_extract %c, 0, 8, 0 //~gfx(8|9)! v1: %res4 = v_mul_f32 %d, %byte0_c_3 //~gfx10! v1: %res4 = v_mul_f32 %d, %c[0:7] //! p_unit_test 4, %res4 - byte0_c = bld.pseudo(ext, bld.def(v1), inputs[2], Operand(0u), Operand(8u), Operand(0u)); + byte0_c = bld.pseudo(ext, bld.def(v1), inputs[2], Operand::zero(), Operand::c32(8u), + Operand::zero()); writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[3], byte0_c)); finish_opt_test(); @@ -365,7 +387,8 @@ BEGIN_TEST(optimize.sdwa.from_vop3) //! v1: %res0 = v_mul_f32 -|%a|, %b[0:7] //! p_unit_test 0, %res0 - Temp byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(0u)); + Temp byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(), + Operand::c32(8u), Operand::zero()); VOP3_instruction *mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b).instr->vop3(); mul->neg[0] = true; mul->abs[0] = true; @@ -375,7 +398,8 @@ BEGIN_TEST(optimize.sdwa.from_vop3) //~gfx8! v1: %res1 = v_mul_f32 %a, %byte0_b_0 *4 //~gfx(9|10)! v1: %res1 = v_mul_f32 %a, %b[0:7] *4 //! p_unit_test 1, %res1 - byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(0u)); + byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(), + Operand::c32(8u), Operand::zero()); mul = &bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], byte0_b).instr->vop3(); mul->omod = 2; writeout(1, mul->definitions[0].getTemp()); @@ -384,15 +408,18 @@ BEGIN_TEST(optimize.sdwa.from_vop3) //~gfx8! v1: %res2 = v_mul_f32 %byte0_b_1, %c //~gfx(9|10)! v1: %res2 = v_mul_f32 %b[0:7], %c //! p_unit_test 2, %res2 - byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(0u)); + byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(), + Operand::c32(8u), Operand::zero()); writeout(2, bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), byte0_b, inputs[2])); if (i >= GFX10) { //~gfx10! v1: %byte0_b_2 = p_extract %b, 0, 8, 0 //~gfx10! v1: %res3 = v_mul_f32 %byte0_b_2, 0x1234 //~gfx10! p_unit_test 3, %res3 - byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand(0u), Operand(8u), Operand(0u)); - writeout(3, bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), byte0_b, Operand(0x1234u))); + byte0_b = bld.pseudo(aco_opcode::p_extract, bld.def(v1), inputs[1], Operand::zero(), + Operand::c32(8u), Operand::zero()); + writeout(3, + bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), byte0_b, Operand::c32(0x1234u))); } finish_opt_test(); @@ -411,54 +438,58 @@ BEGIN_TEST(optimize.sdwa.insert) //~gfx[^7]! v1: %res0 = v_mul_f32 %a, %b dst_sel:ubyte0 //~gfx[^7]! p_unit_test 0, %res0 Temp val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(0, bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(8u))); + writeout(0, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u))); //~gfx[^7]! v1: %res1 = v_mul_f32 %a, %b dst_sel:ubyte1 //~gfx[^7]! p_unit_test 1, %res1 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(1, bld.pseudo(ins, bld.def(v1), val, Operand(1u), Operand(8u))); + writeout(1, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(8u))); //~gfx[^7]! v1: %res2 = v_mul_f32 %a, %b dst_sel:ubyte2 //~gfx[^7]! p_unit_test 2, %res2 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(2, bld.pseudo(ins, bld.def(v1), val, Operand(2u), Operand(8u))); + writeout(2, bld.pseudo(ins, bld.def(v1), val, Operand::c32(2u), Operand::c32(8u))); //~gfx[^7]! v1: %res3 = v_mul_f32 %a, %b dst_sel:ubyte3 //~gfx[^7]! p_unit_test 3, %res3 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(3, bld.pseudo(ins, bld.def(v1), val, Operand(3u), Operand(8u))); + writeout(3, bld.pseudo(ins, bld.def(v1), val, Operand::c32(3u), Operand::c32(8u))); //~gfx[^7]! v1: %res4 = v_mul_f32 %a, %b dst_sel:uword0 //~gfx[^7]! p_unit_test 4, %res4 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(4, bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(16u))); + writeout(4, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(16u))); //~gfx[^7]! v1: %res5 = v_mul_f32 %a, %b dst_sel:uword1 //~gfx[^7]! p_unit_test 5, %res5 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(5, bld.pseudo(ins, bld.def(v1), val, Operand(1u), Operand(16u))); + writeout(5, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u))); //~gfx[^7]! v1: %res6 = v_mul_f32 %a, %b dst_sel:ubyte0 //~gfx[^7]! p_unit_test 6, %res6 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(6, bld.pseudo(ext, bld.def(v1), val, Operand(0u), Operand(8u), Operand(0u))); + writeout( + 6, bld.pseudo(ext, bld.def(v1), val, Operand::zero(), Operand::c32(8u), Operand::zero())); //~gfx[^7]! v1: %res7 = v_mul_f32 %a, %b dst_sel:uword0 //~gfx[^7]! p_unit_test 7, %res7 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(7, bld.pseudo(ext, bld.def(v1), val, Operand(0u), Operand(16u), Operand(0u))); + writeout( + 7, bld.pseudo(ext, bld.def(v1), val, Operand::zero(), Operand::c32(16u), Operand::zero())); //~gfx[^7]! v1: %tmp8 = v_mul_f32 %a, %b //~gfx[^7]! v1: %res8 = p_extract %tmp8, 2, 8, 0 //~gfx[^7]! p_unit_test 8, %res8 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(8, bld.pseudo(ext, bld.def(v1), val, Operand(2u), Operand(8u), Operand(0u))); + writeout( + 8, bld.pseudo(ext, bld.def(v1), val, Operand::c32(2u), Operand::c32(8u), Operand::zero())); //~gfx[^7]! v1: %tmp9 = v_mul_f32 %a, %b //~gfx[^7]! v1: %res9 = p_extract %tmp9, 0, 8, 1 //~gfx[^7]! p_unit_test 9, %res9 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]); - writeout(9, bld.pseudo(ext, bld.def(v1), val, Operand(0u), Operand(8u), Operand(1u))); + writeout( + 9, bld.pseudo(ext, bld.def(v1), val, Operand::zero(), Operand::c32(8u), Operand::c32(1u))); //>> p_unit_test 63 writeout(63); @@ -466,26 +497,26 @@ BEGIN_TEST(optimize.sdwa.insert) //! v1: %res10 = v_mul_f32 %a, %b //! p_unit_test 10, %res10 val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1]); - bld.pseudo(ins, bld.def(v1), val, Operand(1u), Operand(16u)); + bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u)); writeout(10, val); //! v1: %res11 = v_sub_i16 %a, %b //! p_unit_test 11, %res11 val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]); - writeout(11, bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(16u))); + writeout(11, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(16u))); //~gfx[78]! v1: %tmp12 = v_sub_i16 %a, %b //~gfx[78]! v1: %res12 = p_insert %tmp11, 1, 16 //~gfx(9|10)! v1: %res12 = v_sub_i16 %a, %b opsel_hi //! p_unit_test 12, %res12 val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]); - writeout(12, bld.pseudo(ins, bld.def(v1), val, Operand(1u), Operand(16u))); + writeout(12, bld.pseudo(ins, bld.def(v1), val, Operand::c32(1u), Operand::c32(16u))); //! v1: %tmp13 = v_sub_i16 %a, %b //! v1: %res13 = p_insert %tmp13, 0, 8 //! p_unit_test 13, %res13 val = bld.vop3(aco_opcode::v_sub_i16, bld.def(v1), inputs[0], inputs[1]); - writeout(13, bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(8u))); + writeout(13, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u))); finish_opt_test(); } @@ -504,29 +535,31 @@ BEGIN_TEST(optimize.sdwa.insert_modifiers) //~gfx9! v1: %res0 = v_rcp_f32 %a *2 dst_sel:ubyte0 //! p_unit_test 0, %res0 Temp val = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), inputs[0]); - val = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), val, Operand(0x40000000u)); - writeout(0, bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(8u))); + val = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), val, Operand::c32(0x40000000u)); + writeout(0, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u))); //! v1: %res1 = v_rcp_f32 %a clamp dst_sel:ubyte0 //! p_unit_test 1, %res1 val = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), inputs[0]); - val = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), val, Operand(0u), Operand(0x3f800000u)); - writeout(1, bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(8u))); + val = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), val, Operand::zero(), + Operand::c32(0x3f800000u)); + writeout(1, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u))); //! v1: %tmp2 = v_rcp_f32 %a dst_sel:ubyte0 //! v1: %res2 = v_mul_f32 %tmp2, 2.0 //! p_unit_test 2, %res2 val = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), inputs[0]); - val = bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(8u)); - val = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), val, Operand(0x40000000u)); + val = bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u)); + val = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), val, Operand::c32(0x40000000u)); writeout(2, val); //! v1: %tmp3 = v_rcp_f32 %a dst_sel:ubyte0 //! v1: %res3 = v_med3_f32 %tmp3, 0, 1.0 //! p_unit_test 3, %res3 val = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), inputs[0]); - val = bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(8u)); - val = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), val, Operand(0u), Operand(0x3f800000u)); + val = bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u)); + val = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), val, Operand::zero(), + Operand::c32(0x3f800000u)); writeout(3, val); //~gfx8! v1: %tmp4 = v_rcp_f32 %a *2 clamp @@ -534,9 +567,10 @@ BEGIN_TEST(optimize.sdwa.insert_modifiers) //~gfx9! v1: %res4 = v_rcp_f32 %a *2 clamp dst_sel:ubyte0 //! p_unit_test 4, %res4 val = bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), inputs[0]); - val = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), val, Operand(0x40000000u)); - val = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), val, Operand(0u), Operand(0x3f800000u)); - writeout(4, bld.pseudo(ins, bld.def(v1), val, Operand(0u), Operand(8u))); + val = bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), val, Operand::c32(0x40000000u)); + val = bld.vop3(aco_opcode::v_med3_f32, bld.def(v1), val, Operand::zero(), + Operand::c32(0x3f800000u)); + writeout(4, bld.pseudo(ins, bld.def(v1), val, Operand::zero(), Operand::c32(8u))); finish_opt_test(); } diff --git a/src/amd/compiler/tests/test_to_hw_instr.cpp b/src/amd/compiler/tests/test_to_hw_instr.cpp index 177d07b..4e64111 100644 --- a/src/amd/compiler/tests/test_to_hw_instr.cpp +++ b/src/amd/compiler/tests/test_to_hw_instr.cpp @@ -51,7 +51,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] - bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v1_lo, v2b), Operand(v1_lo, v2b), Operand(v0_lo, v2b)); @@ -60,7 +60,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 - bld.pseudo(aco_opcode::p_unit_test, Operand(1u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v1_lo, v2b), Operand(v0_lo, v2b)); @@ -70,7 +70,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2 //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16] - bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v6b), Operand(v1_lo, v2b), Operand(v0_lo, v2b), Operand(v2_lo, v2b)); @@ -81,7 +81,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16] //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2 - bld.pseudo(aco_opcode::p_unit_test, Operand(3u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2), Operand(v1_lo, v2b), Operand(v0_lo, v2b), @@ -95,7 +95,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] - bld.pseudo(aco_opcode::p_unit_test, Operand(4u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2), Operand(v1_lo, v2b), Operand(v2_lo, v2b), @@ -104,7 +104,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! p_unit_test 5 //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] - bld.pseudo(aco_opcode::p_unit_test, Operand(5u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b), Operand(v0_lo, v1)); @@ -113,7 +113,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! v2b: %0:v[2][0:16] = v_mov_b32 %0:v[1][0:16] //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] - bld.pseudo(aco_opcode::p_unit_test, Operand(6u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b), Definition(v2_lo, v2b), Operand(v0_lo, v6b)); @@ -123,7 +123,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32] //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32] - bld.pseudo(aco_opcode::p_unit_test, Operand(7u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b), Definition(v2_lo, v2b), Definition(v3_lo, v2b), @@ -135,7 +135,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] - bld.pseudo(aco_opcode::p_unit_test, Operand(8u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v2_lo, v2b), Definition(v0_lo, v2b), Definition(v3_lo, v2b), @@ -145,7 +145,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] - bld.pseudo(aco_opcode::p_unit_test, Operand(9u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v1_lo, v1b), Operand(v1_lo, v1b), Operand(v0_lo, v1b)); @@ -154,7 +154,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8] //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3 //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] - bld.pseudo(aco_opcode::p_unit_test, Operand(10u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2b), Operand(v1_lo, v1b), Operand(v0_lo, v1b)); @@ -165,7 +165,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16] //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2 - bld.pseudo(aco_opcode::p_unit_test, Operand(11u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v3b), Operand(v1_lo, v1b), Operand(v0_lo, v1b), Operand(v2_lo, v1b)); @@ -178,7 +178,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2 //~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24] //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1 - bld.pseudo(aco_opcode::p_unit_test, Operand(12u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u)); bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v1_lo, v1b), Operand(v0_lo, v1b), @@ -192,7 +192,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! v3b: %0:v[0][0:24] = v_and_b32 0xffffff, %0:v[0][0:24] //~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001 //~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8] - bld.pseudo(aco_opcode::p_unit_test, Operand(13u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u)); Instruction* pseudo = bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v0_lo, v1b), Operand(v0_lo, v1b), @@ -202,7 +202,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! p_unit_test 14 //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8] //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16] - bld.pseudo(aco_opcode::p_unit_test, Operand(14u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u)); bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v1b), Definition(v0_lo, v1b), Operand(v0_lo, v2b)); @@ -212,7 +212,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16] //~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24] //~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32] - bld.pseudo(aco_opcode::p_unit_test, Operand(15u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u)); bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v1b), Definition(v0_lo, v1b), Definition(v2_lo, v1b), Definition(v3_lo, v1b), @@ -230,7 +230,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[89]>> p_unit_test 0 //~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 //~gfx9! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16] - bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), Operand(v0_hi, v2b), Operand(v0_lo, v2b)); @@ -241,7 +241,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_preserve - bld.pseudo(aco_opcode::p_unit_test, Operand(1u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b), Operand(v1_lo, v1), Operand(v0_lo, v2b)); @@ -252,7 +252,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve //~gfx[89]! v2b: %0:v[0][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve //~gfx[89]! v2b: %0:v[1][0:16] = v_xor_b32 %0:v[1][0:16], %0:v[0][0:16] dst_preserve - bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b), Definition(v1_hi, v2b), Operand(v1_lo, v1), Operand(v0_lo, v2b), Operand(v0_lo, v2b)); @@ -264,7 +264,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16] dst_preserve //~gfx[89]! v1b: %0:v[1][16:24] = v_mov_b32 %0:v[0][16:24] dst_preserve - bld.pseudo(aco_opcode::p_unit_test, Operand(3u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_b3, v1b), Operand(v1_lo, v1), Operand(v0_b3, v1b)); @@ -276,7 +276,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] //~gfx[89]! v1b: %0:v[1][8:16] = v_mov_b32 %0:v[0][8:16] dst_preserve //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_preserve - bld.pseudo(aco_opcode::p_unit_test, Operand(4u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b), Operand(v1_lo, v1), Operand(v0_lo, v1b)); @@ -288,7 +288,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx9! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1] //~gfx[89]! v1b: %0:v[0][8:16] = v_mov_b32 %0:v[1][8:16] dst_preserve //~gfx[89]! v1b: %0:v[0][24:32] = v_mov_b32 %0:v[1][24:32] dst_preserve - bld.pseudo(aco_opcode::p_unit_test, Operand(5u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v0_hi, v1b), Definition(v1_lo, v1), Operand(v1_lo, v1b), Operand(v1_hi, v1b), Operand(v0_lo, v1)); @@ -298,7 +298,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0] //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] - bld.pseudo(aco_opcode::p_unit_test, Operand(6u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1), Operand(v1_lo, v2b), Operand(v1_hi, v2b), Operand(v0_lo, v1)); @@ -309,7 +309,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1] //~gfx9! v1: %0:v[1], v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1] //~gfx[89]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2 - bld.pseudo(aco_opcode::p_unit_test, Operand(7u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1), Operand(v1_hi, v2b), Operand(v1_lo, v2b), Operand(v0_lo, v1)); @@ -322,7 +322,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve //~gfx[89]! v1b: %0:v[1][24:32] = v_xor_b32 %0:v[1][24:32], %0:v[0][24:32] dst_preserve - bld.pseudo(aco_opcode::p_unit_test, Operand(8u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b), Operand(v1_lo, v3b), Operand(v0_lo, v3b)); @@ -333,7 +333,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0] //~gfx9! v1: %0:v[0], v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0] //~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_preserve - bld.pseudo(aco_opcode::p_unit_test, Operand(9u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b), Definition(v0_b3, v1b), Operand(v1_lo, v3b), Operand(v0_lo, v3b), Operand(v1_b3, v1b)); @@ -345,7 +345,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve //~gfx[89]! v1b: %0:v[0][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve //~gfx[89]! v1b: %0:v[1][16:24] = v_xor_b32 %0:v[1][16:24], %0:v[0][16:24] dst_preserve - bld.pseudo(aco_opcode::p_unit_test, Operand(10u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Definition(v1_b1, v2b), Operand(v1_b1, v2b), Operand(v0_b1, v2b)); @@ -353,10 +353,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword) //~gfx[89]! p_unit_test 11 //~gfx[89]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][16:32] dst_preserve //~gfx[89]! v1: %0:v[0] = v_mov_b32 42 - bld.pseudo(aco_opcode::p_unit_test, Operand(11u)); - bld.pseudo(aco_opcode::p_parallelcopy, - Definition(v0_lo, v1), Definition(v1_lo, v2b), - Operand(42u), Operand(v0_hi, v2b)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); + bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b), + Operand::c32(42u), Operand(v0_hi, v2b)); //~gfx[89]! s_endpgm @@ -380,93 +379,81 @@ BEGIN_TEST(to_hw_instr.subdword_constant) /* 16-bit pack */ //>> p_unit_test 0 //! v1: %_:v[0] = v_pack_b32_f16 0.5, hi(%_:v[1][16:32]) - bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); - bld.pseudo(aco_opcode::p_parallelcopy, - Definition(v0_lo, v2b), Definition(v0_hi, v2b), - Operand((uint16_t)0x3800), Operand(v1_hi, v2b)); + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); + bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), + Operand::c16(0x3800), Operand(v1_hi, v2b)); //! p_unit_test 1 //~gfx9! v2b: %0:v[0][16:32] = v_and_b32 0xffff0000, %0:v[1][16:32] //~gfx9! v1: %0:v[0] = v_or_b32 0x4205, %0:v[0] //~gfx10! v1: %_:v[0] = v_pack_b32_f16 0x4205, hi(%_:v[1][16:32]) - bld.pseudo(aco_opcode::p_unit_test, Operand(1u)); - bld.pseudo(aco_opcode::p_parallelcopy, - Definition(v0_lo, v2b), Definition(v0_hi, v2b), - Operand((uint16_t)0x4205), Operand(v1_hi, v2b)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); + bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), + Operand::c16(0x4205), Operand(v1_hi, v2b)); //! p_unit_test 2 //~gfx9! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16] //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] //~gfx10! v1: %0:v[0] = v_pack_b32_f16 0x4205, %0:v[0][0:16] - bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); - bld.pseudo(aco_opcode::p_parallelcopy, - Definition(v0_lo, v2b), Definition(v0_hi, v2b), - Operand((uint16_t)0x4205), Operand(v0_lo, v2b)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); + bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), + Operand::c16(0x4205), Operand(v0_lo, v2b)); //! p_unit_test 3 //! v1: %_:v[0] = v_mov_b32 0x3c003800 - bld.pseudo(aco_opcode::p_unit_test, Operand(3u)); - bld.pseudo(aco_opcode::p_parallelcopy, - Definition(v0_lo, v2b), Definition(v0_hi, v2b), - Operand((uint16_t)0x3800), Operand((uint16_t)0x3c00)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u)); + bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), + Operand::c16(0x3800), Operand::c16(0x3c00)); //! p_unit_test 4 //! v1: %_:v[0] = v_mov_b32 0x43064205 - bld.pseudo(aco_opcode::p_unit_test, Operand(4u)); - bld.pseudo(aco_opcode::p_parallelcopy, - Definition(v0_lo, v2b), Definition(v0_hi, v2b), - Operand((uint16_t)0x4205), Operand((uint16_t)0x4306)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); + bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), + Operand::c16(0x4205), Operand::c16(0x4306)); //! p_unit_test 5 //! v1: %_:v[0] = v_mov_b32 0x38004205 - bld.pseudo(aco_opcode::p_unit_test, Operand(5u)); - bld.pseudo(aco_opcode::p_parallelcopy, - Definition(v0_lo, v2b), Definition(v0_hi, v2b), - Operand((uint16_t)0x4205), Operand((uint16_t)0x3800)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u)); + bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b), + Operand::c16(0x4205), Operand::c16(0x3800)); /* 16-bit copy */ //! p_unit_test 6 //! v2b: %_:v[0][0:16] = v_add_f16 0.5, 0 dst_preserve - bld.pseudo(aco_opcode::p_unit_test, Operand(6u)); - bld.pseudo(aco_opcode::p_parallelcopy, - Definition(v0_lo, v2b), Operand((uint16_t)0x3800)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u)); + bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x3800)); //! p_unit_test 7 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff0000, %_:v[0] //~gfx9! v1: %_:v[0] = v_or_b32 0x4205, %_:v[0] //~gfx10! v2b: %_:v[0][0:16] = v_pack_b32_f16 0x4205, hi(%_:v[0][16:32]) - bld.pseudo(aco_opcode::p_unit_test, Operand(7u)); - bld.pseudo(aco_opcode::p_parallelcopy, - Definition(v0_lo, v2b), Operand((uint16_t)0x4205)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u)); + bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x4205)); //! p_unit_test 8 //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0] //~gfx9! v1: %_:v[0] = v_or_b32 0x42050000, %_:v[0] //~gfx10! v2b: %_:v[0][16:32] = v_pack_b32_f16 %_:v[0][0:16], 0x4205 - bld.pseudo(aco_opcode::p_unit_test, Operand(8u)); - bld.pseudo(aco_opcode::p_parallelcopy, - Definition(v0_hi, v2b), Operand((uint16_t)0x4205)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u)); + bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0x4205)); //! p_unit_test 9 //! v1b: %_:v[0][8:16] = v_mov_b32 0 dst_preserve //! v1b: %_:v[0][16:24] = v_mov_b32 56 dst_preserve - bld.pseudo(aco_opcode::p_unit_test, Operand(9u)); - bld.pseudo(aco_opcode::p_parallelcopy, - Definition(v0_b1, v2b), Operand((uint16_t)0x3800)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u)); + bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x3800)); //! p_unit_test 10 //! v1b: %_:v[0][8:16] = v_mov_b32 5 dst_preserve //! v1b: %_:v[0][16:24] = v_mul_u32_u24 2, 33 dst_preserve - bld.pseudo(aco_opcode::p_unit_test, Operand(10u)); - bld.pseudo(aco_opcode::p_parallelcopy, - Definition(v0_b1, v2b), Operand((uint16_t)0x4205)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u)); + bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Operand::c16(0x4205)); /* 8-bit copy */ //! p_unit_test 11 //! v1b: %_:v[0][0:8] = v_mul_u32_u24 2, 33 dst_preserve - bld.pseudo(aco_opcode::p_unit_test, Operand(11u)); - bld.pseudo(aco_opcode::p_parallelcopy, - Definition(v0_lo, v1b), Operand((uint8_t)0x42)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u)); + bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0x42)); //! s_endpgm @@ -488,7 +475,7 @@ BEGIN_TEST(to_hw_instr.self_intersecting_swap) //! v1: %0:v[2], v1: %0:v[3] = v_swap_b32 %0:v[3], %0:v[2] //! v1: %0:v[3], v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3] //! s_endpgm - bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); //v[1:2] = v[2:3] //v3 = v7 //v7 = v1 @@ -521,7 +508,7 @@ BEGIN_TEST(to_hw_instr.extract) //; funcs['sel'] = lambda bits: ('sext(%%_:v[1])[%s]' if variant.endswith('_signed') else '%%_:v[1][%s]') % bits //>> p_unit_test 0 - bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); //! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8 EXT(0, 8) //! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8 @@ -542,7 +529,7 @@ BEGIN_TEST(to_hw_instr.extract) Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size), Operand::c32(is_signed)); //>> p_unit_test 2 - bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); //~gfx._unsigned! s1: %_:s[0], s1: %_:scc = @s_bfe %_:s[1], 0x80000 //~gfx._signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1] EXT(0, 8) @@ -565,7 +552,7 @@ BEGIN_TEST(to_hw_instr.extract) Operand::c32(idx), Operand::c32(8u), Operand::c32(is_signed)); //>> p_unit_test 4 - bld.pseudo(aco_opcode::p_unit_test, Operand(4u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u)); //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8 //~gfx[^7].*! v2b: %_:v[0][0:16] = v_mov_b32 @sel(0:7) EXT(0, 0) @@ -603,7 +590,7 @@ BEGIN_TEST(to_hw_instr.insert) Operand::c32(size)); //>> p_unit_test 0 - bld.pseudo(aco_opcode::p_unit_test, Operand(0u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::zero()); //! v1: %_:v[0] = v_bfe_u32 %_:v[1], 0, 8 INS(0, 8) //~gfx7! v1: %0:v[0] = v_bfe_u32 %0:v[1], 0, 8 @@ -628,7 +615,7 @@ BEGIN_TEST(to_hw_instr.insert) Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size)); //>> p_unit_test 1 - bld.pseudo(aco_opcode::p_unit_test, Operand(1u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u)); //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 INS(0, 8) //! s1: %_:s[0], s1: %_:scc = s_bfe_u32 %_:s[1], 0x80000 @@ -651,7 +638,7 @@ BEGIN_TEST(to_hw_instr.insert) Operand::c32(idx), Operand::c32(8u)); //>> p_unit_test 2 - bld.pseudo(aco_opcode::p_unit_test, Operand(2u)); + bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u)); //~gfx7! v2b: %_:v[0][0:16] = v_bfe_u32 %_:v[1][0:16], 0, 8 //~gfx[^7]! v1: %_:v[0] = v_mov_b32 %_:v[1][0:16] dst_sel:ubyte0 dst_preserve INS(0, 0) -- 2.7.4