aco: reformat according to its .clang-format
authorEric Engestrom <eric@igalia.com>
Fri, 26 May 2023 11:55:35 +0000 (12:55 +0100)
committerMarge Bot <emma+marge@anholt.net>
Fri, 16 Jun 2023 19:59:52 +0000 (19:59 +0000)
Signed-off-by: Eric Engestrom <eric@igalia.com>
Reviewed-by: Daniel Schürmann <daniel@schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23253>

34 files changed:
src/amd/compiler/aco_assembler.cpp
src/amd/compiler/aco_insert_NOPs.cpp
src/amd/compiler/aco_insert_exec_mask.cpp
src/amd/compiler/aco_insert_waitcnt.cpp
src/amd/compiler/aco_instruction_selection.cpp
src/amd/compiler/aco_instruction_selection_setup.cpp
src/amd/compiler/aco_interface.cpp
src/amd/compiler/aco_interface.h
src/amd/compiler/aco_ir.cpp
src/amd/compiler/aco_ir.h
src/amd/compiler/aco_lower_to_hw_instr.cpp
src/amd/compiler/aco_optimizer.cpp
src/amd/compiler/aco_print_ir.cpp
src/amd/compiler/aco_register_allocation.cpp
src/amd/compiler/aco_scheduler.cpp
src/amd/compiler/aco_shader_info.h
src/amd/compiler/aco_spill.cpp
src/amd/compiler/aco_validate.cpp
src/amd/compiler/tests/framework.h
src/amd/compiler/tests/helpers.cpp
src/amd/compiler/tests/helpers.h
src/amd/compiler/tests/main.cpp
src/amd/compiler/tests/test_assembler.cpp
src/amd/compiler/tests/test_d3d11_derivs.cpp
src/amd/compiler/tests/test_hard_clause.cpp
src/amd/compiler/tests/test_insert_nops.cpp
src/amd/compiler/tests/test_insert_waitcnt.cpp
src/amd/compiler/tests/test_isel.cpp
src/amd/compiler/tests/test_optimizer.cpp
src/amd/compiler/tests/test_optimizer_postRA.cpp
src/amd/compiler/tests/test_reduce_assign.cpp
src/amd/compiler/tests/test_regalloc.cpp
src/amd/compiler/tests/test_sdwa.cpp
src/amd/compiler/tests/test_to_hw_instr.cpp

index eda5e69..300ebdf 100644 (file)
@@ -52,7 +52,7 @@ struct asm_context {
    // TODO: keep track of branch instructions referring blocks
    // and, when emitting the block, correct the offset in instr
    asm_context(Program* program_, std::vector<struct aco_symbol>* symbols_)
-      : program(program_), gfx_level(program->gfx_level), symbols(symbols_)
+       : program(program_), gfx_level(program->gfx_level), symbols(symbols_)
    {
       if (gfx_level <= GFX7)
          opcode = &instr_info.opcode_gfx7[0];
@@ -1160,8 +1160,7 @@ emit_long_jump(asm_context& ctx, SOPP_instruction* branch, bool backwards,
    emit_instruction(ctx, out, instr.get());
 
    /* create the s_setpc_b64 to jump */
-   instr.reset(
-      bld.sop1(aco_opcode::s_setpc_b64, Operand(def.physReg(), s2)).instr);
+   instr.reset(bld.sop1(aco_opcode::s_setpc_b64, Operand(def.physReg(), s2)).instr);
    emit_instruction(ctx, out, instr.get());
 }
 
@@ -1218,8 +1217,7 @@ fix_constaddrs(asm_context& ctx, std::vector<uint32_t>& out)
 }
 
 unsigned
-emit_program(Program* program, std::vector<uint32_t>& code,
-             std::vector<struct aco_symbol>* symbols)
+emit_program(Program* program, std::vector<uint32_t>& code, std::vector<struct aco_symbol>* symbols)
 {
    asm_context ctx(program, symbols);
 
@@ -1252,8 +1250,8 @@ emit_program(Program* program, std::vector<uint32_t>& code,
    code.insert(code.end(), (uint32_t*)program->constant_data.data(),
                (uint32_t*)(program->constant_data.data() + program->constant_data.size()));
 
-   program->config->scratch_bytes_per_wave = align(
-      program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule);
+   program->config->scratch_bytes_per_wave =
+      align(program->config->scratch_bytes_per_wave, program->dev.scratch_alloc_granule);
 
    return exec_size;
 }
index 8e4decd..049a604 100644 (file)
@@ -254,8 +254,7 @@ public:
    void join_min(const VGPRCounterMap& other)
    {
       unsigned i;
-      BITSET_FOREACH_SET(i, other.resident, 256)
-      {
+      BITSET_FOREACH_SET (i, other.resident, 256) {
          if (BITSET_TEST(resident, i))
             val[i] = MIN2(val[i] + base, other.val[i] + other.base) - base;
          else
@@ -270,8 +269,7 @@ public:
          return false;
 
       unsigned i;
-      BITSET_FOREACH_SET(i, other.resident, 256)
-      {
+      BITSET_FOREACH_SET (i, other.resident, 256) {
          if (!BITSET_TEST(resident, i))
             return false;
          if (val[i] + base != other.val[i] + other.base)
@@ -365,11 +363,11 @@ search_backwards_internal(State& state, GlobalState& global_state, BlockState bl
          return;
    }
 
-PRAGMA_DIAGNOSTIC_PUSH
-PRAGMA_DIAGNOSTIC_IGNORED(-Waddress)
+   PRAGMA_DIAGNOSTIC_PUSH
+   PRAGMA_DIAGNOSTIC_IGNORED(-Waddress)
    if (block_cb != nullptr && !block_cb(global_state, block_state, block))
       return;
-PRAGMA_DIAGNOSTIC_POP
+   PRAGMA_DIAGNOSTIC_POP
 
    for (unsigned lin_pred : block->linear_preds) {
       search_backwards_internal<GlobalState, BlockState, block_cb, instr_cb>(
index 5c48e03..2008270 100644 (file)
@@ -52,8 +52,7 @@ struct wqm_ctx {
    /* state for WQM propagation */
    std::set<unsigned> worklist;
    std::vector<bool> branch_wqm; /* true if the branch condition in this block should be in wqm */
-   wqm_ctx(Program* program_)
-       : program(program_), branch_wqm(program->blocks.size())
+   wqm_ctx(Program* program_) : program(program_), branch_wqm(program->blocks.size())
    {
       for (unsigned i = 0; i < program->blocks.size(); i++)
          worklist.insert(i);
@@ -137,8 +136,7 @@ get_block_needs(wqm_ctx& ctx, exec_ctx& exec_ctx, Block* block)
          propagate_wqm = true;
 
       bool pred_by_exec = needs_exec_mask(instr.get()) ||
-                          instr->opcode == aco_opcode::p_logical_end ||
-                          instr->isBranch();
+                          instr->opcode == aco_opcode::p_logical_end || instr->isBranch();
 
       if (needs_exact(instr))
          instr_needs[i] = Exact;
@@ -574,7 +572,8 @@ process_instructions(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instructio
                 * WQM again.
                 */
                ctx.info[block->index].exec.resize(1);
-               assert(ctx.info[block->index].exec[0].second == (mask_type_exact | mask_type_global));
+               assert(ctx.info[block->index].exec[0].second ==
+                      (mask_type_exact | mask_type_global));
                current_exec = get_exec_op(ctx.info[block->index].exec.back().first);
                ctx.info[block->index].exec[0].first = Operand(bld.lm);
             }
index 9643a9e..985a8e1 100644 (file)
@@ -91,9 +91,8 @@ enum vmem_type : uint8_t {
    vmem_bvh = 1 << 2,
 };
 
-static const uint16_t exp_events =
-   event_exp_pos | event_exp_param | event_exp_mrt_null | event_gds_gpr_lock | event_vmem_gpr_lock |
-   event_ldsdir;
+static const uint16_t exp_events = event_exp_pos | event_exp_param | event_exp_mrt_null |
+                                   event_gds_gpr_lock | event_vmem_gpr_lock | event_ldsdir;
 static const uint16_t lgkm_events = event_smem | event_lds | event_gds | event_flat | event_sendmsg;
 static const uint16_t vm_events = event_vmem | event_flat;
 static const uint16_t vs_events = event_vmem_store;
@@ -580,7 +579,8 @@ kill(wait_imm& imm, alu_delay_info& delay, Instruction* instr, wait_ctx& ctx,
       }
 
       if (ctx.program->gfx_level >= GFX11) {
-         update_alu(ctx, false, false, false, MAX3(delay.salu_cycles, delay.valu_cycles, delay.trans_cycles));
+         update_alu(ctx, false, false, false,
+                    MAX3(delay.salu_cycles, delay.valu_cycles, delay.trans_cycles));
       }
 
       /* remove all gprs with higher counter from map */
@@ -775,8 +775,7 @@ insert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, uint8_t vmem_
     */
    uint32_t ds_vmem_events = event_lds | event_gds | event_vmem | event_flat;
    uint32_t alu_events = event_trans | event_valu | event_salu;
-   bool force_linear =
-      ctx.gfx_level >= GFX11 && (event & (ds_vmem_events | alu_events));
+   bool force_linear = ctx.gfx_level >= GFX11 && (event & (ds_vmem_events | alu_events));
 
    insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, vmem_types, cycles,
                      force_linear);
index c04b173..b51cddf 100644 (file)
@@ -26,8 +26,8 @@
 #include "aco_instruction_selection.h"
 
 #include "aco_builder.h"
-#include "aco_ir.h"
 #include "aco_interface.h"
+#include "aco_ir.h"
 
 #include "common/ac_nir.h"
 #include "common/sid.h"
@@ -661,8 +661,8 @@ convert_int(isel_context* ctx, Builder& bld, Temp src, unsigned src_bits, unsign
                  Operand::c32(src_bits), Operand::c32((unsigned)sign_extend));
    } else {
       assert(src_bits < 32);
-      bld.pseudo(aco_opcode::p_extract, Definition(tmp), src, Operand::zero(), Operand::c32(src_bits),
-                 Operand::c32((unsigned)sign_extend));
+      bld.pseudo(aco_opcode::p_extract, Definition(tmp), src, Operand::zero(),
+                 Operand::c32(src_bits), Operand::c32((unsigned)sign_extend));
    }
 
    if (dst_bits == 64) {
@@ -1894,8 +1894,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
    }
    case nir_op_uadd_sat: {
       if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
-         Instruction* add_instr =
-            emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_u16, dst);
+         Instruction* add_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_u16, dst);
          add_instr->valu().clamp = 1;
          break;
       }
@@ -1977,8 +1976,7 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
    }
    case nir_op_iadd_sat: {
       if (dst.regClass() == v1 && instr->dest.dest.ssa.bit_size == 16) {
-         Instruction* add_instr =
-            emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_i16, dst);
+         Instruction* add_instr = emit_vop3p_instruction(ctx, instr, aco_opcode::v_pk_add_i16, dst);
          add_instr->valu().clamp = 1;
          break;
       }
@@ -3316,8 +3314,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
                              exponent_large);
          Temp cond =
             bld.sopc(aco_opcode::s_cmp_ge_i32, bld.def(s1, scc), Operand::c32(64u), exponent);
-         mantissa = bld.sop2(aco_opcode::s_cselect_b64, bld.def(s2), mantissa,
-                             Operand::c64(~0llu), cond);
+         mantissa =
+            bld.sop2(aco_opcode::s_cselect_b64, bld.def(s2), mantissa, Operand::c64(~0llu), cond);
          Temp lower = bld.tmp(s1), upper = bld.tmp(s1);
          bld.pseudo(aco_opcode::p_split_vector, Definition(lower), Definition(upper), mantissa);
          Temp cond_small =
@@ -3483,9 +3481,8 @@ visit_alu_instr(isel_context* ctx, nir_alu_instr* instr)
    case nir_op_unpack_64_4x16:
    case nir_op_unpack_32_4x8:
       bld.copy(Definition(dst), get_alu_src(ctx, instr->src[0]));
-      emit_split_vector(ctx, dst,
-                        instr->op == nir_op_unpack_32_4x8 ||
-                        instr->op == nir_op_unpack_64_4x16 ? 4 : 2);
+      emit_split_vector(
+         ctx, dst, instr->op == nir_op_unpack_32_4x8 || instr->op == nir_op_unpack_64_4x16 ? 4 : 2);
       break;
    case nir_op_pack_64_2x32_split: {
       Temp src0 = get_alu_src(ctx, instr->src[0]);
@@ -4029,7 +4026,7 @@ struct LoadEmitInfo {
    unsigned num_components;
    unsigned component_size;
    Temp resource = Temp(0, s1); /* buffer resource or base 64-bit address */
-   Temp idx = Temp(0, v1); /* buffer index */
+   Temp idx = Temp(0, v1);      /* buffer index */
    unsigned component_stride = 0;
    unsigned const_offset = 0;
    unsigned align_mul = 0;
@@ -4176,9 +4173,10 @@ emit_load(isel_context* ctx, Builder& bld, const LoadEmitInfo& info,
             aligned_offset = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), lo, hi);
          }
       }
-      Temp aligned_offset_tmp =
-         aligned_offset.isTemp() ? aligned_offset.getTemp() :
-         aligned_offset.isConstant() ? bld.copy(bld.def(s1), aligned_offset) : Temp(0, s1);
+      Temp aligned_offset_tmp = aligned_offset.isTemp() ? aligned_offset.getTemp()
+                                : aligned_offset.isConstant()
+                                   ? bld.copy(bld.def(s1), aligned_offset)
+                                   : Temp(0, s1);
 
       Temp val = params.callback(bld, info, aligned_offset_tmp, bytes_needed, align,
                                  reduced_const_offset, byte_align ? Temp() : info.dst);
@@ -4508,8 +4506,7 @@ mubuf_load_callback(Builder& bld, const LoadEmitInfo& info, Temp offset, unsigne
    mubuf->offen = offen;
    mubuf->idxen = idxen;
    mubuf->glc = info.glc;
-   mubuf->dlc =
-      info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
+   mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
    mubuf->slc = info.slc;
    mubuf->sync = info.sync;
    mubuf->offset = const_offset;
@@ -4552,40 +4549,20 @@ mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset,
    aco_opcode op = aco_opcode::num_opcodes;
    if (info.component_size == 2) {
       switch (bytes_needed) {
-      case 2:
-         op = aco_opcode::buffer_load_format_d16_x;
-         break;
-      case 4:
-         op = aco_opcode::buffer_load_format_d16_xy;
-         break;
-      case 6:
-         op = aco_opcode::buffer_load_format_d16_xyz;
-         break;
-      case 8:
-         op = aco_opcode::buffer_load_format_d16_xyzw;
-         break;
-      default:
-         unreachable("invalid buffer load format size");
-         break;
+      case 2: op = aco_opcode::buffer_load_format_d16_x; break;
+      case 4: op = aco_opcode::buffer_load_format_d16_xy; break;
+      case 6: op = aco_opcode::buffer_load_format_d16_xyz; break;
+      case 8: op = aco_opcode::buffer_load_format_d16_xyzw; break;
+      default: unreachable("invalid buffer load format size"); break;
       }
    } else {
       assert(info.component_size == 4);
       switch (bytes_needed) {
-      case 4:
-         op = aco_opcode::buffer_load_format_x;
-         break;
-      case 8:
-         op = aco_opcode::buffer_load_format_xy;
-         break;
-      case 12:
-         op = aco_opcode::buffer_load_format_xyz;
-         break;
-      case 16:
-         op = aco_opcode::buffer_load_format_xyzw;
-         break;
-      default:
-         unreachable("invalid buffer load format size");
-         break;
+      case 4: op = aco_opcode::buffer_load_format_x; break;
+      case 8: op = aco_opcode::buffer_load_format_xy; break;
+      case 12: op = aco_opcode::buffer_load_format_xyz; break;
+      case 16: op = aco_opcode::buffer_load_format_xyzw; break;
+      default: unreachable("invalid buffer load format size"); break;
       }
    }
 
@@ -4596,8 +4573,7 @@ mubuf_load_format_callback(Builder& bld, const LoadEmitInfo& info, Temp offset,
    mubuf->offen = offen;
    mubuf->idxen = idxen;
    mubuf->glc = info.glc;
-   mubuf->dlc =
-      info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
+   mubuf->dlc = info.glc && (bld.program->gfx_level == GFX10 || bld.program->gfx_level == GFX10_3);
    mubuf->slc = info.slc;
    mubuf->sync = info.sync;
    mubuf->offset = const_offset;
@@ -5229,9 +5205,9 @@ resolve_excess_vmem_const_offset(Builder& bld, Temp& voffset, unsigned const_off
 }
 
 void
-emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp soffset, Temp idx, Temp vdata,
-                        unsigned const_offset, memory_sync_info sync, bool glc, bool slc,
-                        bool swizzled)
+emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp soffset, Temp idx,
+                        Temp vdata, unsigned const_offset, memory_sync_info sync, bool glc,
+                        bool slc, bool swizzled)
 {
    assert(vdata.id());
    assert(vdata.size() != 3 || ctx->program->gfx_level != GFX6);
@@ -5256,8 +5232,8 @@ emit_single_mubuf_store(isel_context* ctx, Temp descriptor, Temp voffset, Temp s
       vaddr_op = Operand(idx);
 
    Builder::Result r =
-      bld.mubuf(op, Operand(descriptor), vaddr_op, soffset_op, Operand(vdata), const_offset,
-                offen, swizzled, idxen, /* addr64 */ false, /* disable_wqm */ false, glc,
+      bld.mubuf(op, Operand(descriptor), vaddr_op, soffset_op, Operand(vdata), const_offset, offen,
+                swizzled, idxen, /* addr64 */ false, /* disable_wqm */ false, glc,
                 /* dlc*/ false, slc);
 
    r->mubuf().sync = sync;
@@ -5269,7 +5245,8 @@ store_vmem_mubuf(isel_context* ctx, Temp src, Temp descriptor, Temp voffset, Tem
                  bool swizzled, memory_sync_info sync, bool glc, bool slc)
 {
    Builder bld(ctx->program, ctx->block);
-   assert(elem_size_bytes == 1 || elem_size_bytes == 2 || elem_size_bytes == 4 || elem_size_bytes == 8);
+   assert(elem_size_bytes == 1 || elem_size_bytes == 2 || elem_size_bytes == 4 ||
+          elem_size_bytes == 8);
    assert(write_mask);
    write_mask = util_widen_mask(write_mask, elem_size_bytes);
 
@@ -5282,8 +5259,8 @@ store_vmem_mubuf(isel_context* ctx, Temp src, Temp descriptor, Temp voffset, Tem
 
    for (unsigned i = 0; i < write_count; i++) {
       unsigned const_offset = offsets[i] + base_const_offset;
-      emit_single_mubuf_store(ctx, descriptor, voffset, soffset, idx, write_datas[i], const_offset, sync,
-                              glc, slc, swizzled);
+      emit_single_mubuf_store(ctx, descriptor, voffset, soffset, idx, write_datas[i], const_offset,
+                              sync, glc, slc, swizzled);
    }
 }
 
@@ -5387,7 +5364,7 @@ visit_store_output(isel_context* ctx, nir_intrinsic_instr* instr)
 {
    /* LS pass output to TCS by temp if they have same in/out patch size. */
    bool ls_need_output = ctx->stage == vertex_tess_control_hs &&
-      ctx->shader->info.stage == MESA_SHADER_VERTEX && ctx->tcs_in_out_eq;
+                         ctx->shader->info.stage == MESA_SHADER_VERTEX && ctx->tcs_in_out_eq;
 
    bool ps_need_output = ctx->stage == fragment_fs;
 
@@ -6331,8 +6308,7 @@ visit_image_load(isel_context* ctx, nir_intrinsic_instr* instr)
       if (instr->intrinsic == nir_intrinsic_bindless_image_fragment_mask_load_amd) {
          opcode = aco_opcode::image_load;
       } else {
-         bool level_zero =
-            nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
+         bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
          opcode = level_zero ? aco_opcode::image_load : aco_opcode::image_load_mip;
       }
 
@@ -6391,8 +6367,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
    memory_sync_info sync = get_memory_sync_info(instr, storage_image, 0);
    unsigned access = nir_intrinsic_access(instr);
    bool glc = ctx->options->gfx_level == GFX6 ||
-              ((access & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
-               ctx->program->gfx_level < GFX11);
+              ((access & (ACCESS_VOLATILE | ACCESS_COHERENT)) && ctx->program->gfx_level < GFX11);
 
    if (dim == GLSL_SAMPLER_DIM_BUF) {
       Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
@@ -6463,7 +6438,7 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
             aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
                aco_opcode::p_create_vector, Format::PSEUDO, dmask_count, 1)};
             uint32_t index = 0;
-            u_foreach_bit(bit, dmask) {
+            u_foreach_bit (bit, dmask) {
                vec->operands[index++] = Operand(emit_extract_vector(ctx, data, bit, rc));
             }
             data = bld.tmp(RegClass::get(RegType::vgpr, dmask_count * rc.bytes()));
@@ -6491,9 +6466,8 @@ visit_image_store(isel_context* ctx, nir_intrinsic_instr* instr)
 }
 
 void
-translate_buffer_image_atomic_op(const nir_atomic_op op,
-                                 aco_opcode *buf_op, aco_opcode *buf_op64,
-                                 aco_opcode *image_op)
+translate_buffer_image_atomic_op(const nir_atomic_op op, aco_opcode* buf_op, aco_opcode* buf_op64,
+                                 aco_opcode* image_op)
 {
    switch (op) {
    case nir_atomic_op_iadd:
@@ -6571,8 +6545,7 @@ translate_buffer_image_atomic_op(const nir_atomic_op op,
       *buf_op64 = aco_opcode::buffer_atomic_fmax_x2;
       *image_op = aco_opcode::image_atomic_fmax;
       break;
-   default:
-      unreachable("unsupported atomic operation");
+   default: unreachable("unsupported atomic operation");
    }
 }
 
@@ -6682,9 +6655,8 @@ visit_store_ssbo(isel_context* ctx, nir_intrinsic_instr* instr)
    Temp rsrc = bld.as_uniform(get_ssa_temp(ctx, instr->src[1].ssa));
 
    memory_sync_info sync = get_memory_sync_info(instr, storage_buffer, 0);
-   bool glc =
-      (nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
-      ctx->program->gfx_level < GFX11;
+   bool glc = (nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
+              ctx->program->gfx_level < GFX11;
 
    unsigned write_count = 0;
    Temp write_datas[32];
@@ -6805,7 +6777,7 @@ visit_load_global(isel_context* ctx, nir_intrinsic_instr* instr)
    /* Don't expand global loads when they use MUBUF or SMEM.
     * Global loads don't have the bounds checking that buffer loads have that
     * makes this safe.
-   */
+    */
    unsigned align = nir_intrinsic_align(instr);
    bool byte_align_for_smem_mubuf =
       can_use_byte_align_for_global_load(num_components, component_size, align, false);
@@ -6836,9 +6808,8 @@ visit_store_global(isel_context* ctx, nir_intrinsic_instr* instr)
 
    Temp data = as_vgpr(ctx, get_ssa_temp(ctx, instr->src[0].ssa));
    memory_sync_info sync = get_memory_sync_info(instr, storage_buffer, 0);
-   bool glc =
-      (nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
-      ctx->program->gfx_level < GFX11;
+   bool glc = (nir_intrinsic_access(instr) & (ACCESS_VOLATILE | ACCESS_COHERENT)) &&
+              ctx->program->gfx_level < GFX11;
 
    unsigned write_count = 0;
    Temp write_datas[32];
@@ -6999,8 +6970,7 @@ visit_global_atomic(isel_context* ctx, nir_intrinsic_instr* instr)
          op32 = global ? aco_opcode::global_atomic_fmax : aco_opcode::flat_atomic_fmax;
          op64 = global ? aco_opcode::global_atomic_fmax_x2 : aco_opcode::flat_atomic_fmax_x2;
          break;
-      default:
-         unreachable("unsupported atomic operation");
+      default: unreachable("unsupported atomic operation");
       }
 
       aco_opcode op = instr->dest.ssa.bit_size == 32 ? op32 : op64;
@@ -7192,8 +7162,8 @@ visit_store_buffer(isel_context* ctx, nir_intrinsic_instr* intrin)
    memory_sync_info sync(aco_storage_mode_from_nir_mem_mode(mem_mode),
                          written_once ? semantic_can_reorder : semantic_none);
 
-   store_vmem_mubuf(ctx, store_src, descriptor, v_offset, s_offset, idx, const_offset, elem_size_bytes,
-                    write_mask, swizzled, sync, glc, slc);
+   store_vmem_mubuf(ctx, store_src, descriptor, v_offset, s_offset, idx, const_offset,
+                    elem_size_bytes, write_mask, swizzled, sync, glc, slc);
 }
 
 void
@@ -7206,8 +7176,8 @@ visit_load_smem(isel_context* ctx, nir_intrinsic_instr* instr)
 
    /* If base address is 32bit, convert to 64bit with the high 32bit part. */
    if (base.bytes() == 4) {
-      base = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2),
-                        base, Operand::c32(ctx->options->address32_hi));
+      base = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), base,
+                        Operand::c32(ctx->options->address32_hi));
    }
 
    aco_opcode opcode = aco_opcode::s_load_dword;
@@ -7535,10 +7505,10 @@ get_scratch_resource(isel_context* ctx)
    Builder bld(ctx->program, ctx->block);
    Temp scratch_addr = ctx->program->private_segment_buffer;
    if (!scratch_addr.bytes()) {
-      Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
-                              Operand::c32(aco_symbol_scratch_addr_lo));
-      Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
-                              Operand::c32(aco_symbol_scratch_addr_hi));
+      Temp addr_lo =
+         bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
+      Temp addr_hi =
+         bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
       scratch_addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
    } else if (ctx->stage.hw != HWStage::CS) {
       scratch_addr =
@@ -8093,8 +8063,7 @@ Temp merged_wave_info_to_mask(isel_context* ctx, unsigned i);
 Temp lanecount_to_mask(isel_context* ctx, Temp count);
 
 Temp
-get_interp_param(isel_context* ctx, nir_intrinsic_op intrin,
-                 enum glsl_interp_mode interp)
+get_interp_param(isel_context* ctx, nir_intrinsic_op intrin, enum glsl_interp_mode interp)
 {
    bool linear = interp == INTERP_MODE_NOPERSPECTIVE;
    if (intrin == nir_intrinsic_load_barycentric_pixel ||
@@ -8109,9 +8078,8 @@ get_interp_param(isel_context* ctx, nir_intrinsic_op intrin,
 }
 
 void
-ds_ordered_count_offsets(isel_context *ctx, unsigned index_operand,
-                         unsigned wave_release, unsigned wave_done,
-                         unsigned *offset0, unsigned *offset1)
+ds_ordered_count_offsets(isel_context* ctx, unsigned index_operand, unsigned wave_release,
+                         unsigned wave_done, unsigned* offset0, unsigned* offset1)
 {
    unsigned ordered_count_index = index_operand & 0x3f;
    unsigned count_dword = (index_operand >> 24) & 0xf;
@@ -8189,7 +8157,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
       RegClass rc = RegClass(offset.type(), 1);
       Temp pos1 = bld.tmp(rc), pos2 = bld.tmp(rc);
       bld.pseudo(aco_opcode::p_split_vector, Definition(pos1), Definition(pos2), offset);
-      Temp bary = get_interp_param(ctx, instr->intrinsic, (glsl_interp_mode)nir_intrinsic_interp_mode(instr));
+      Temp bary = get_interp_param(ctx, instr->intrinsic,
+                                   (glsl_interp_mode)nir_intrinsic_interp_mode(instr));
       emit_interp_center(ctx, get_ssa_temp(ctx, &instr->dest.ssa), bary, pos1, pos2);
       break;
    }
@@ -8977,8 +8946,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
       if (ctx->args->merged_wave_info.used)
          bld.pseudo(aco_opcode::p_extract, Definition(dst), bld.def(s1, scc),
-                    get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(2u),
-                    Operand::c32(8u), Operand::zero());
+                    get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(2u), Operand::c32(8u),
+                    Operand::zero());
       else if (ctx->args->gs_wave_id.used)
          bld.copy(Definition(dst), get_arg(ctx, ctx->args->gs_wave_id));
       else
@@ -9025,8 +8994,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
    case nir_intrinsic_overwrite_tes_arguments_amd: {
       ctx->arg_temps[ctx->args->tes_u.arg_index] = get_ssa_temp(ctx, instr->src[0].ssa);
       ctx->arg_temps[ctx->args->tes_v.arg_index] = get_ssa_temp(ctx, instr->src[1].ssa);
-      ctx->arg_temps[ctx->args->tes_rel_patch_id.arg_index] =
-         get_ssa_temp(ctx, instr->src[3].ssa);
+      ctx->arg_temps[ctx->args->tes_rel_patch_id.arg_index] = get_ssa_temp(ctx, instr->src[3].ssa);
       ctx->arg_temps[ctx->args->tes_patch_id.arg_index] = get_ssa_temp(ctx, instr->src[2].ssa);
       break;
    }
@@ -9036,7 +9004,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
       Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
       Temp src = ctx->arg_temps[nir_intrinsic_base(instr)];
       assert(src.id());
-      assert(src.type() == (instr->intrinsic == nir_intrinsic_load_scalar_arg_amd ? RegType::sgpr : RegType::vgpr));
+      assert(src.type() == (instr->intrinsic == nir_intrinsic_load_scalar_arg_amd ? RegType::sgpr
+                                                                                  : RegType::vgpr));
       bld.copy(Definition(dst), src);
       emit_split_vector(ctx, dst, dst.size());
       break;
@@ -9048,35 +9017,34 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
 
       Temp gds_base = bld.copy(bld.def(v1), Operand::c32(0u));
       unsigned offset0, offset1;
-      Instruction *ds_instr;
+      Instructionds_instr;
       Operand m;
 
       /* Lock a GDS mutex. */
       ds_ordered_count_offsets(ctx, 1 << 24u, false, false, &offset0, &offset1);
       m = bld.m0(bld.as_uniform(ordered_id));
-      ds_instr = bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m,
-                        offset0, offset1, true);
+      ds_instr =
+         bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, offset0, offset1, true);
       ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile);
 
       aco_ptr<Pseudo_instruction> vec{create_instruction<Pseudo_instruction>(
          aco_opcode::p_create_vector, Format::PSEUDO, instr->num_components, 1)};
       unsigned write_mask = nir_intrinsic_write_mask(instr);
 
-      bool use_gds_registers =
-         ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
+      bool use_gds_registers = ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
 
       for (unsigned i = 0; i < instr->num_components; i++) {
          if (write_mask & (1 << i)) {
             Temp chan_counter = emit_extract_vector(ctx, counter, i, v1);
 
             if (use_gds_registers) {
-               ds_instr = bld.ds(aco_opcode::ds_add_gs_reg_rtn, bld.def(v1),
-                                 Operand(), chan_counter, i * 4, 0u, true);
+               ds_instr = bld.ds(aco_opcode::ds_add_gs_reg_rtn, bld.def(v1), Operand(),
+                                 chan_counter, i * 4, 0u, true);
             } else {
                m = bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0x100u)));
 
-               ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, bld.def(v1),
-                                 gds_base, chan_counter, m, i * 4, 0u, true);
+               ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, bld.def(v1), gds_base, chan_counter, m,
+                                 i * 4, 0u, true);
             }
             ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw);
 
@@ -9092,33 +9060,32 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
       /* Unlock a GDS mutex. */
       ds_ordered_count_offsets(ctx, 1 << 24u, true, true, &offset0, &offset1);
       m = bld.m0(bld.as_uniform(ordered_id));
-      ds_instr = bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m,
-                        offset0, offset1, true);
+      ds_instr =
+         bld.ds(aco_opcode::ds_ordered_count, bld.def(v1), gds_base, m, offset0, offset1, true);
       ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile);
 
       emit_split_vector(ctx, dst, instr->num_components);
       break;
    }
    case nir_intrinsic_xfb_counter_sub_amd: {
-      bool use_gds_registers =
-         ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
+      bool use_gds_registers = ctx->options->gfx_level >= GFX11 && ctx->options->is_opengl;
 
       unsigned write_mask = nir_intrinsic_write_mask(instr);
       Temp counter = get_ssa_temp(ctx, instr->src[0].ssa);
       Temp gds_base = bld.copy(bld.def(v1), Operand::c32(0u));
 
-      u_foreach_bit(i, write_mask) {
+      u_foreach_bit (i, write_mask) {
          Temp chan_counter = emit_extract_vector(ctx, counter, i, v1);
-         Instruction *ds_instr;
+         Instructionds_instr;
 
          if (use_gds_registers) {
-            ds_instr = bld.ds(aco_opcode::ds_sub_gs_reg_rtn, bld.def(v1),
-                              Operand(), chan_counter, i * 4, 0u, true);
+            ds_instr = bld.ds(aco_opcode::ds_sub_gs_reg_rtn, bld.def(v1), Operand(), chan_counter,
+                              i * 4, 0u, true);
          } else {
             Operand m = bld.m0((Temp)bld.copy(bld.def(s1, m0), Operand::c32(0x100u)));
 
-            ds_instr = bld.ds(aco_opcode::ds_sub_rtn_u32, bld.def(v1),
-                              gds_base, chan_counter, m, i * 4, 0u, true);
+            ds_instr = bld.ds(aco_opcode::ds_sub_rtn_u32, bld.def(v1), gds_base, chan_counter, m,
+                              i * 4, 0u, true);
          }
          ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw);
       }
@@ -9162,15 +9129,14 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
          exp->valid_mask = false;
 
       /* Compressed export uses two bits for a channel. */
-      uint32_t channel_mask = exp->compressed ?
-         (write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0) :
-         write_mask;
+      uint32_t channel_mask =
+         exp->compressed ? (write_mask & 0x3 ? 1 : 0) | (write_mask & 0xc ? 2 : 0) : write_mask;
 
       Temp value = get_ssa_temp(ctx, instr->src[0].ssa);
       for (unsigned i = 0; i < 4; i++) {
-         exp->operands[i] = channel_mask & BITFIELD_BIT(i) ?
-            Operand(emit_extract_vector(ctx, value, i, v1)) :
-            Operand(v1);
+         exp->operands[i] = channel_mask & BITFIELD_BIT(i)
+                               ? Operand(emit_extract_vector(ctx, value, i, v1))
+                               : Operand(v1);
       }
 
       ctx->block->instructions.emplace_back(std::move(exp));
@@ -9183,13 +9149,11 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
 
       struct aco_export_mrt mrt0, mrt1;
       for (unsigned i = 0; i < 4; i++) {
-         mrt0.out[i] = write_mask & BITFIELD_BIT(i) ?
-            Operand(emit_extract_vector(ctx, val0, i, v1)) :
-            Operand(v1);
+         mrt0.out[i] = write_mask & BITFIELD_BIT(i) ? Operand(emit_extract_vector(ctx, val0, i, v1))
+                                                    : Operand(v1);
 
-         mrt1.out[i] = write_mask & BITFIELD_BIT(i) ?
-            Operand(emit_extract_vector(ctx, val1, i, v1)) :
-            Operand(v1);
+         mrt1.out[i] = write_mask & BITFIELD_BIT(i) ? Operand(emit_extract_vector(ctx, val1, i, v1))
+                                                    : Operand(v1);
       }
       mrt0.enabled_channels = mrt1.enabled_channels = write_mask;
 
@@ -9383,7 +9347,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
    }
 
    if (has_wqm_coord) {
-      assert(instr->op == nir_texop_tex || instr->op == nir_texop_txb || instr->op == nir_texop_lod);
+      assert(instr->op == nir_texop_tex || instr->op == nir_texop_txb ||
+             instr->op == nir_texop_lod);
       assert(wqm_coord.regClass().is_linear_vgpr());
       assert(!a16 && !g16);
    }
@@ -9701,9 +9666,8 @@ visit_tex(isel_context* ctx, nir_tex_instr* instr)
          if (dst.regClass() == s1) {
             Temp is_not_null = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand::zero(),
                                         emit_extract_vector(ctx, resource, 1, s1));
-            bld.sop2(aco_opcode::s_cselect_b32, Definition(dst),
-                     bld.as_uniform(tmp_dst), Operand::c32(0x76543210),
-                     bld.scc(is_not_null));
+            bld.sop2(aco_opcode::s_cselect_b32, Definition(dst), bld.as_uniform(tmp_dst),
+                     Operand::c32(0x76543210), bld.scc(is_not_null));
          } else {
             Temp is_not_null = bld.tmp(bld.lm);
             bld.vopc_e64(aco_opcode::v_cmp_lg_u32, Definition(is_not_null), Operand::zero(),
@@ -10782,10 +10746,12 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
 
    /* Replace NaN by zero (only 32-bit) to fix game bugs if requested. */
    if (out->enable_mrt_output_nan_fixup && !is_16bit &&
-       (out->col_format == V_028714_SPI_SHADER_32_R || out->col_format == V_028714_SPI_SHADER_32_GR ||
-        out->col_format == V_028714_SPI_SHADER_32_AR || out->col_format == V_028714_SPI_SHADER_32_ABGR ||
+       (out->col_format == V_028714_SPI_SHADER_32_R ||
+        out->col_format == V_028714_SPI_SHADER_32_GR ||
+        out->col_format == V_028714_SPI_SHADER_32_AR ||
+        out->col_format == V_028714_SPI_SHADER_32_ABGR ||
         out->col_format == V_028714_SPI_SHADER_FP16_ABGR)) {
-      u_foreach_bit(i, out->write_mask) {
+      u_foreach_bit (i, out->write_mask) {
          Temp is_not_nan =
             bld.vopc(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm), values[i], values[i]);
          values[i] = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), values[i],
@@ -10847,7 +10813,6 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
       }
       break;
 
-
    case V_028714_SPI_SHADER_SNORM16_ABGR:
       if (is_16bit && ctx->options->gfx_level >= GFX9) {
          compr_op = aco_opcode::v_cvt_pknorm_i16_f16;
@@ -10862,13 +10827,13 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
          /* clamp */
          uint32_t max_rgb = out->is_int8 ? 255 : out->is_int10 ? 1023 : 0;
 
-         u_foreach_bit(i, out->write_mask) {
+         u_foreach_bit (i, out->write_mask) {
             uint32_t max = i == 3 && out->is_int10 ? 3 : max_rgb;
 
             values[i] = bld.vop2(aco_opcode::v_min_u32, bld.def(v1), Operand::c32(max), values[i]);
          }
       } else if (is_16bit) {
-         u_foreach_bit(i, out->write_mask) {
+         u_foreach_bit (i, out->write_mask) {
             Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, false);
             values[i] = Operand(tmp);
          }
@@ -10882,7 +10847,7 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
          uint32_t max_rgb = out->is_int8 ? 127 : out->is_int10 ? 511 : 0;
          uint32_t min_rgb = out->is_int8 ? -128 : out->is_int10 ? -512 : 0;
 
-         u_foreach_bit(i, out->write_mask) {
+         u_foreach_bit (i, out->write_mask) {
             uint32_t max = i == 3 && out->is_int10 ? 1 : max_rgb;
             uint32_t min = i == 3 && out->is_int10 ? -2u : min_rgb;
 
@@ -10890,7 +10855,7 @@ export_fs_mrt_color(isel_context* ctx, const struct mrt_color_export* out,
             values[i] = bld.vop2(aco_opcode::v_max_i32, bld.def(v1), Operand::c32(min), values[i]);
          }
       } else if (is_16bit) {
-         u_foreach_bit(i, out->write_mask) {
+         u_foreach_bit (i, out->write_mask) {
             Temp tmp = convert_int(ctx, bld, values[i].getTemp(), 16, 32, true);
             values[i] = Operand(tmp);
          }
@@ -10996,8 +10961,7 @@ create_fs_jump_to_epilog(isel_context* ctx)
       }
    }
 
-   Temp continue_pc =
-      convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.ps.epilog_pc));
+   Temp continue_pc = convert_pointer_to_64_bit(ctx, get_arg(ctx, ctx->program->info.ps.epilog_pc));
 
    aco_ptr<Pseudo_instruction> jump{create_instruction<Pseudo_instruction>(
       aco_opcode::p_jump_to_epilog, Format::PSEUDO, 1 + color_exports.size(), 0)};
@@ -11068,12 +11032,13 @@ add_startpgm(struct isel_context* ctx)
       Operand scratch_offset = Operand(get_arg(ctx, ctx->args->scratch_offset));
       scratch_offset.setLateKill(true);
 
-      Operand scratch_addr = ctx->args->ring_offsets.used ?
-         Operand(get_arg(ctx, ctx->args->ring_offsets)) : Operand(s2);
+      Operand scratch_addr = ctx->args->ring_offsets.used
+                                ? Operand(get_arg(ctx, ctx->args->ring_offsets))
+                                : Operand(s2);
 
       Builder bld(ctx->program, ctx->block);
-      bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc),
-                 scratch_addr, scratch_offset);
+      bld.pseudo(aco_opcode::p_init_scratch, bld.def(s2), bld.def(s1, scc), scratch_addr,
+                 scratch_offset);
    }
 
    return startpgm;
@@ -11085,9 +11050,9 @@ fix_ls_vgpr_init_bug(isel_context* ctx, Pseudo_instruction* startpgm)
    assert(ctx->shader->info.stage == MESA_SHADER_VERTEX);
    Builder bld(ctx->program, ctx->block);
    constexpr unsigned hs_idx = 1u;
-   Builder::Result hs_thread_count = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
-                                              get_arg(ctx, ctx->args->merged_wave_info),
-                                              Operand::c32((8u << 16) | (hs_idx * 8u)));
+   Builder::Result hs_thread_count =
+      bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1), bld.def(s1, scc),
+               get_arg(ctx, ctx->args->merged_wave_info), Operand::c32((8u << 16) | (hs_idx * 8u)));
    Temp ls_has_nonzero_hs_threads = bool_to_vector_condition(ctx, hs_thread_count.def(1).getTemp());
 
    /* If there are no HS threads, SPI mistakenly loads the LS VGPRs starting at VGPR 0. */
@@ -11218,10 +11183,9 @@ merged_wave_info_to_mask(isel_context* ctx, unsigned i)
    Builder bld(ctx->program, ctx->block);
 
    /* lanecount_to_mask() only cares about s0.u[6:0] so we don't need either s_bfe nor s_and here */
-   Temp count = i == 0
-                   ? get_arg(ctx, ctx->args->merged_wave_info)
-                   : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
-                              get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(i * 8u));
+   Temp count = i == 0 ? get_arg(ctx, ctx->args->merged_wave_info)
+                       : bld.sop2(aco_opcode::s_lshr_b32, bld.def(s1), bld.def(s1, scc),
+                                  get_arg(ctx, ctx->args->merged_wave_info), Operand::c32(i * 8u));
 
    return lanecount_to_mask(ctx, count);
 }
@@ -11276,10 +11240,10 @@ select_program_rt(isel_context& ctx, unsigned shader_count, struct nir_shader* c
 void
 select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
                ac_shader_config* config, const struct aco_compiler_options* options,
-               const struct aco_shader_info* info,
-               const struct ac_shader_args* args)
+               const struct aco_shader_info* info, const struct ac_shader_args* args)
 {
-   isel_context ctx = setup_isel_context(program, shader_count, shaders, config, options, info, args, false);
+   isel_context ctx =
+      setup_isel_context(program, shader_count, shaders, config, options, info, args, false);
 
    if (ctx.stage == raytracing_cs)
       return select_program_rt(ctx, shader_count, shaders, args);
@@ -11391,8 +11355,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
 void
 select_trap_handler_shader(Program* program, struct nir_shader* shader, ac_shader_config* config,
                            const struct aco_compiler_options* options,
-                           const struct aco_shader_info* info,
-                           const struct ac_shader_args* args)
+                           const struct aco_shader_info* info, const struct ac_shader_args* args)
 {
    assert(options->gfx_level == GFX8);
 
index 2c8ca8c..0108b2c 100644 (file)
@@ -660,8 +660,8 @@ cleanup_context(isel_context* ctx)
 isel_context
 setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
                    ac_shader_config* config, const struct aco_compiler_options* options,
-                   const struct aco_shader_info* info,
-                   const struct ac_shader_args* args, bool is_ps_epilog)
+                   const struct aco_shader_info* info, const struct ac_shader_args* args,
+                   bool is_ps_epilog)
 {
    SWStage sw_stage = SWStage::None;
    for (unsigned i = 0; i < shader_count; i++) {
index 5c121b5..45bf4aa 100644 (file)
@@ -80,8 +80,7 @@ validate(aco::Program* program)
 }
 
 static std::string
-get_disasm_string(aco::Program* program, std::vector<uint32_t>& code,
-                  unsigned exec_size)
+get_disasm_string(aco::Program* program, std::vector<uint32_t>& code, unsigned exec_size)
 {
    std::string disasm;
 
@@ -111,8 +110,7 @@ get_disasm_string(aco::Program* program, std::vector<uint32_t>& code,
 
 static std::string
 aco_postprocess_shader(const struct aco_compiler_options* options,
-                       const struct aco_shader_info *info,
-                       std::unique_ptr<aco::Program>& program)
+                       const struct aco_shader_info* info, std::unique_ptr<aco::Program>& program)
 {
    std::string llvm_ir;
 
@@ -211,12 +209,9 @@ aco_postprocess_shader(const struct aco_compiler_options* options,
 }
 
 void
-aco_compile_shader(const struct aco_compiler_options* options,
-                   const struct aco_shader_info* info,
+aco_compile_shader(const struct aco_compiler_options* options, const struct aco_shader_info* info,
                    unsigned shader_count, struct nir_shader* const* shaders,
-                   const struct ac_shader_args *args,
-                   aco_callback *build_binary,
-                   void **binary)
+                   const struct ac_shader_args* args, aco_callback* build_binary, void** binary)
 {
    aco::init();
 
@@ -335,13 +330,8 @@ aco_compile_vs_prolog(const struct aco_compiler_options* options,
    if (get_disasm)
       disasm = get_disasm_string(program.get(), code, exec_size);
 
-   (*build_prolog)(binary,
-                   config.num_sgprs,
-                   config.num_vgprs,
-                   code.data(),
-                   code.size(),
-                   disasm.data(),
-                   disasm.size());
+   (*build_prolog)(binary, config.num_sgprs, config.num_vgprs, code.data(), code.size(),
+                   disasm.data(), disasm.size());
 }
 
 void
@@ -377,11 +367,6 @@ aco_compile_ps_epilog(const struct aco_compiler_options* options,
    if (get_disasm)
       disasm = get_disasm_string(program.get(), code, exec_size);
 
-   (*build_epilog)(binary,
-                   config.num_sgprs,
-                   config.num_vgprs,
-                   code.data(),
-                   code.size(),
-                   disasm.data(),
-                   disasm.size());
+   (*build_epilog)(binary, config.num_sgprs, config.num_vgprs, code.data(), code.size(),
+                   disasm.data(), disasm.size());
 }
index a91d7a3..e28ff73 100644 (file)
@@ -25,9 +25,9 @@
 #ifndef ACO_INTERFACE_H
 #define ACO_INTERFACE_H
 
-#include "amd_family.h"
-
 #include "aco_shader_info.h"
+
+#include "amd_family.h"
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -47,24 +47,18 @@ typedef void(aco_callback)(void** priv_ptr, const struct ac_shader_config* confi
                            const char* llvm_ir_str, unsigned llvm_ir_size, const char* disasm_str,
                            unsigned disasm_size, uint32_t* statistics, uint32_t stats_size,
                            uint32_t exec_size, const uint32_t* code, uint32_t code_dw,
-                           const struct aco_symbol *symbols, unsigned num_symbols);
+                           const struct aco_symbolsymbols, unsigned num_symbols);
 
-typedef void (aco_shader_part_callback)(void **priv_ptr,
-                                        uint32_t num_sgprs,
-                                        uint32_t num_vgprs,
-                                        const uint32_t *code,
-                                        uint32_t code_size,
-                                        const char *disasm_str,
-                                        uint32_t disasm_size);
+typedef void(aco_shader_part_callback)(void** priv_ptr, uint32_t num_sgprs, uint32_t num_vgprs,
+                                       const uint32_t* code, uint32_t code_size,
+                                       const char* disasm_str, uint32_t disasm_size);
 
 extern const struct aco_compiler_statistic_info* aco_statistic_infos;
 
 void aco_compile_shader(const struct aco_compiler_options* options,
-                        const struct aco_shader_info* info,
-                        unsigned shader_count, struct nir_shader* const* shaders,
-                        const struct ac_shader_args *args,
-                        aco_callback *build_binary,
-                        void **binary);
+                        const struct aco_shader_info* info, unsigned shader_count,
+                        struct nir_shader* const* shaders, const struct ac_shader_args* args,
+                        aco_callback* build_binary, void** binary);
 
 void aco_compile_rt_prolog(const struct aco_compiler_options* options,
                            const struct aco_shader_info* info, const struct ac_shader_args* in_args,
index 639b6db..f552cc2 100644 (file)
@@ -98,8 +98,9 @@ init_program(Program* program, Stage stage, const struct aco_shader_info* info,
    program->wave_size = info->wave_size;
    program->lane_mask = program->wave_size == 32 ? s1 : s2;
 
-   program->dev.lds_encoding_granule = gfx_level >= GFX11 && stage == fragment_fs ? 1024 :
-                                       gfx_level >= GFX7 ? 512 : 256;
+   program->dev.lds_encoding_granule = gfx_level >= GFX11 && stage == fragment_fs ? 1024
+                                       : gfx_level >= GFX7                        ? 512
+                                                                                  : 256;
    program->dev.lds_alloc_granule = gfx_level >= GFX10_3 ? 1024 : program->dev.lds_encoding_granule;
 
    /* GFX6: There is 64KB LDS per CU, but a single workgroup can only use 32KB. */
index 8f8b5de..08a9a2a 100644 (file)
@@ -140,9 +140,9 @@ enum storage_class : uint8_t {
    storage_buffer = 0x1, /* SSBOs and global memory */
    storage_gds = 0x2,
    storage_image = 0x4,
-   storage_shared = 0x8,       /* or TCS output */
-   storage_vmem_output = 0x10, /* GS or TCS output stores using VMEM */
-   storage_task_payload = 0x20,/* Task-Mesh payload */
+   storage_shared = 0x8,        /* or TCS output */
+   storage_vmem_output = 0x10,  /* GS or TCS output stores using VMEM */
+   storage_task_payload = 0x20, /* Task-Mesh payload */
    storage_scratch = 0x40,
    storage_vgpr_spill = 0x80,
    storage_count = 8, /* not counting storage_none */
@@ -823,7 +823,8 @@ public:
       assert(bytes() == 2 || bytes() == 4);
       if (opsel) {
          if (bytes() == 2 && int16_t(data_.i) >= -16 && int16_t(data_.i) <= 64 && !isLiteral())
-            return int16_t(data_.i) >> 16; /* 16-bit inline integers are sign-extended, even with fp16 instrs */
+            return int16_t(data_.i) >>
+                   16; /* 16-bit inline integers are sign-extended, even with fp16 instrs */
          else
             return data_.i >> 16;
       }
@@ -1418,7 +1419,8 @@ struct VINTERP_inreg_instruction : public VALU_instruction {
    uint8_t padding5;
    uint8_t padding6;
 };
-static_assert(sizeof(VINTERP_inreg_instruction) == sizeof(VALU_instruction) + 4, "Unexpected padding");
+static_assert(sizeof(VINTERP_inreg_instruction) == sizeof(VALU_instruction) + 4,
+              "Unexpected padding");
 
 /**
  * Data Parallel Primitives Format:
@@ -1809,8 +1811,7 @@ memory_sync_info get_sync_info(const Instruction* instr);
 inline bool
 is_dead(const std::vector<uint16_t>& uses, const Instruction* instr)
 {
-   if (instr->definitions.empty() || instr->isBranch() ||
-       instr->opcode == aco_opcode::p_startpgm ||
+   if (instr->definitions.empty() || instr->isBranch() || instr->opcode == aco_opcode::p_startpgm ||
        instr->opcode == aco_opcode::p_init_scratch ||
        instr->opcode == aco_opcode::p_dual_src_export_gfx11)
       return false;
@@ -2216,8 +2217,7 @@ void init_program(Program* program, Stage stage, const struct aco_shader_info* i
 
 void select_program(Program* program, unsigned shader_count, struct nir_shader* const* shaders,
                     ac_shader_config* config, const struct aco_compiler_options* options,
-                    const struct aco_shader_info* info,
-                    const struct ac_shader_args* args);
+                    const struct aco_shader_info* info, const struct ac_shader_args* args);
 void select_trap_handler_shader(Program* program, struct nir_shader* shader,
                                 ac_shader_config* config,
                                 const struct aco_compiler_options* options,
@@ -2258,7 +2258,7 @@ bool dealloc_vgprs(Program* program);
 void insert_NOPs(Program* program);
 void form_hard_clauses(Program* program);
 unsigned emit_program(Program* program, std::vector<uint32_t>& code,
-                      std::vector<struct aco_symbol> *symbols);
+                      std::vector<struct aco_symbol>symbols);
 /**
  * Returns true if print_asm can disassemble the given program for the current build/runtime
  * configuration
index 61fe7eb..f79f96a 100644 (file)
@@ -2181,7 +2181,7 @@ lower_image_sample(lower_context* ctx, aco_ptr<Instruction>& instr)
    instr->mimg().strict_wqm = false;
 
    if ((3 + num_vaddr) > instr->operands.size()) {
-      MIMG_instruction *new_instr = create_instruction<MIMG_instruction>(
+      MIMG_instructionnew_instr = create_instruction<MIMG_instruction>(
          instr->opcode, Format::MIMG, 3 + num_vaddr, instr->definitions.size());
       std::copy(instr->definitions.cbegin(), instr->definitions.cend(),
                 new_instr->definitions.begin());
@@ -2346,8 +2346,8 @@ lower_to_hw_instr(Program* program)
                      target =
                         program->has_color_exports ? V_008DFC_SQ_EXP_MRT : V_008DFC_SQ_EXP_MRTZ;
                   if (program->stage == fragment_fs)
-                     bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1),
-                             0, target, false, true, true);
+                     bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1), 0,
+                             target, false, true, true);
                   if (should_dealloc_vgprs)
                      bld.sopp(aco_opcode::s_sendmsg, -1, sendmsg_dealloc_vgprs);
                   bld.sopp(aco_opcode::s_endpgm);
@@ -2518,8 +2518,7 @@ lower_to_hw_instr(Program* program)
                         create_bperm(bld, ext_swiz, dst, Operand::zero());
                      }
                   } else {
-                     SDWA_instruction& sdwa =
-                        bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op)->sdwa();
+                     SDWA_instruction& sdwa = bld.vop1_sdwa(aco_opcode::v_mov_b32, dst, op)->sdwa();
                      sdwa.sel[0] = SubdwordSel(bits / 8, offset / 8, signext);
                   }
                }
@@ -2574,7 +2573,8 @@ lower_to_hw_instr(Program* program)
                } else {
                   assert(dst.regClass() == v2b);
                   bld.vop2_sdwa(aco_opcode::v_lshlrev_b32, dst, Operand::c32(offset), op)
-                     ->sdwa().sel[1] = SubdwordSel::ubyte;
+                     ->sdwa()
+                     .sel[1] = SubdwordSel::ubyte;
                }
                break;
             }
index 6462122..8e28570 100644 (file)
@@ -1369,7 +1369,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
       if (instr->isSALU() || instr->isPseudo()) {
          unsigned bits = get_operand_size(instr, i);
          if ((info.is_constant(bits) || (info.is_literal(bits) && instr->isPseudo())) &&
-              alu_can_accept_constant(instr, i)) {
+             alu_can_accept_constant(instr, i)) {
             instr->operands[i] = get_constant_op(ctx, info, bits);
             continue;
          }
@@ -2116,9 +2116,10 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
    case aco_opcode::v_mbcnt_hi_u32_b32_e64: {
       if (instr->operands[0].constantEquals(-1) && instr->operands[1].isTemp() &&
           ctx.info[instr->operands[1].tempId()].is_usedef()) {
-         Instruction *usedef_instr = ctx.info[instr->operands[1].tempId()].instr;
+         Instructionusedef_instr = ctx.info[instr->operands[1].tempId()].instr;
          if (usedef_instr->opcode == aco_opcode::v_mbcnt_lo_u32_b32 &&
-             usedef_instr->operands[0].constantEquals(-1) && usedef_instr->operands[1].constantEquals(0))
+             usedef_instr->operands[0].constantEquals(-1) &&
+             usedef_instr->operands[1].constantEquals(0))
             ctx.info[instr->definitions[0].tempId()].set_subgroup_invocation(instr.get());
       }
       break;
@@ -2370,7 +2371,9 @@ optimize_cmp_subgroup_invocation(opt_ctx& ctx, aco_ptr<Instruction>& instr)
       return false;
 
    /* Find the constant operand or return early if there isn't one. */
-   const int const_op_idx = instr->operands[0].isConstant() ? 0 : instr->operands[1].isConstant() ? 1 : -1;
+   const int const_op_idx = instr->operands[0].isConstant()   ? 0
+                            : instr->operands[1].isConstant() ? 1
+                                                              : -1;
    if (const_op_idx == -1)
       return false;
 
@@ -2413,11 +2416,10 @@ optimize_cmp_subgroup_invocation(opt_ctx& ctx, aco_ptr<Instruction>& instr)
       first_bit = val + 1;
       num_bits = val >= wave_size ? 0 : (wave_size - val - 1);
       break;
-   default:
-      return false;
+   default: return false;
    }
 
-   Instruction *cpy = NULL;
+   Instructioncpy = NULL;
    const uint64_t mask = BITFIELD64_RANGE(first_bit, num_bits);
    if (wave_size == 64 && mask > 0x7fffffff && mask != -1ull) {
       /* Mask can't be represented as a 64-bit constant or literal, use s_bfm_b64. */
@@ -2426,7 +2428,8 @@ optimize_cmp_subgroup_invocation(opt_ctx& ctx, aco_ptr<Instruction>& instr)
       cpy->operands[1] = Operand::c32(first_bit);
    } else {
       /* Copy mask as a literal constant. */
-      cpy = create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, 1, 1);
+      cpy =
+         create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, 1, 1);
       cpy->operands[0] = wave_size == 32 ? Operand::c32((uint32_t)mask) : Operand::c64(mask);
    }
 
@@ -4821,10 +4824,12 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
     */
    if (instr->opcode == aco_opcode::s_and_b32 || instr->opcode == aco_opcode::s_and_b64) {
       if (instr->operands[0].isTemp() && fixed_to_exec(instr->operands[1]) &&
-          ctx.uses[instr->operands[0].tempId()] == 1 && ctx.uses[instr->definitions[1].tempId()] == 0 &&
+          ctx.uses[instr->operands[0].tempId()] == 1 &&
+          ctx.uses[instr->definitions[1].tempId()] == 0 &&
           can_eliminate_and_exec(ctx, instr->operands[0].getTemp(), instr->pass_flags)) {
          ctx.uses[instr->operands[0].tempId()]--;
-         ctx.info[instr->operands[0].tempId()].instr->definitions[0].setTemp(instr->definitions[0].getTemp());
+         ctx.info[instr->operands[0].tempId()].instr->definitions[0].setTemp(
+            instr->definitions[0].getTemp());
          instr.reset();
          return;
       }
index 37db332..c7ba857 100644 (file)
@@ -516,7 +516,7 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
       if (mimg.lwe)
          fprintf(output, " lwe");
       if (mimg.r128)
-        fprintf(output, " r128");
+         fprintf(output, " r128");
       if (mimg.a16)
          fprintf(output, " a16");
       if (mimg.d16)
index 0fb22f1..7c4535b 100644 (file)
@@ -460,8 +460,7 @@ print_regs(ra_ctx& ctx, bool vgprs, RegisterFile& reg_file)
    printf("%u/%u used, %u/%u free\n", regs.size - free_regs, regs.size, free_regs, regs.size);
 
    /* print assignments ordered by registers */
-   std::map<PhysReg, std::pair<unsigned, unsigned>>
-      regs_to_vars; /* maps to byte size and temp id */
+   std::map<PhysReg, std::pair<unsigned, unsigned>> regs_to_vars; /* maps to byte size and temp id */
    for (unsigned id : find_vars(ctx, reg_file, regs)) {
       const assignment& var = ctx.assignments[id];
       PhysReg reg = var.reg;
@@ -1088,8 +1087,8 @@ get_reg_for_create_vector_copy(ra_ctx& ctx, RegisterFile& reg_file,
              instr->operands[i].regClass() == info.rc) {
             assignment& op = ctx.assignments[instr->operands[i].tempId()];
             /* if everything matches, create parallelcopy for the killed operand */
-            if (!intersects(def_reg, PhysRegInterval{op.reg, op.rc.size()}) &&
-                op.reg != scc && reg_file.get_id(op.reg) == instr->operands[i].tempId()) {
+            if (!intersects(def_reg, PhysRegInterval{op.reg, op.rc.size()}) && op.reg != scc &&
+                reg_file.get_id(op.reg) == instr->operands[i].tempId()) {
                Definition pc_def = Definition(reg, info.rc);
                parallelcopies.emplace_back(instr->operands[i], pc_def);
                return op.reg;
@@ -1655,8 +1654,7 @@ get_reg(ra_ctx& ctx, RegisterFile& reg_file, Temp temp,
          return vcc;
    }
    if (ctx.assignments[temp.id()].m0) {
-      if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, m0) &&
-          can_write_m0(instr))
+      if (get_reg_specified(ctx, reg_file, temp.regClass(), instr, m0) && can_write_m0(instr))
          return m0;
    }
 
index 5316878..02498c0 100644 (file)
@@ -587,8 +587,10 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
    /* don't move non-reorderable instructions */
    if (instr->opcode == aco_opcode::s_memtime || instr->opcode == aco_opcode::s_memrealtime ||
        instr->opcode == aco_opcode::s_setprio || instr->opcode == aco_opcode::s_getreg_b32 ||
-       instr->opcode == aco_opcode::p_init_scratch || instr->opcode == aco_opcode::p_jump_to_epilog ||
-       instr->opcode == aco_opcode::s_sendmsg_rtn_b32 || instr->opcode == aco_opcode::s_sendmsg_rtn_b64)
+       instr->opcode == aco_opcode::p_init_scratch ||
+       instr->opcode == aco_opcode::p_jump_to_epilog ||
+       instr->opcode == aco_opcode::s_sendmsg_rtn_b32 ||
+       instr->opcode == aco_opcode::s_sendmsg_rtn_b64)
       return hazard_fail_unreorderable;
 
    memory_event_set instr_set;
@@ -663,8 +665,7 @@ schedule_SMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
    int16_t k = 0;
 
    /* don't move s_memtime/s_memrealtime */
-   if (current->opcode == aco_opcode::s_memtime ||
-       current->opcode == aco_opcode::s_memrealtime ||
+   if (current->opcode == aco_opcode::s_memtime || current->opcode == aco_opcode::s_memrealtime ||
        current->opcode == aco_opcode::s_sendmsg_rtn_b32 ||
        current->opcode == aco_opcode::s_sendmsg_rtn_b64)
       return;
index 8765c94..4bfe7a6 100644 (file)
 extern "C" {
 #endif
 
-#define ACO_MAX_SO_OUTPUTS 64
-#define ACO_MAX_SO_BUFFERS 4
+#define ACO_MAX_SO_OUTPUTS     64
+#define ACO_MAX_SO_BUFFERS     4
 #define ACO_MAX_VERTEX_ATTRIBS 32
-#define ACO_MAX_VBS 32
+#define ACO_MAX_VBS            32
 
 struct aco_vs_input_state {
    uint32_t instance_rate_inputs;
@@ -133,8 +133,8 @@ struct aco_compiler_options {
    enum amd_gfx_level gfx_level;
    uint32_t address32_hi;
    struct {
-      void (*func)(void *private_data, enum aco_compiler_debug_level level, const char *message);
-      void *private_data;
+      void (*func)(void* private_data, enum aco_compiler_debug_level level, const char* message);
+      voidprivate_data;
    } debug;
 };
 
index 8e9c091..1b64d60 100644 (file)
@@ -94,7 +94,8 @@ struct spill_ctx {
    spill_ctx(const RegisterDemand target_pressure_, Program* program_,
              std::vector<std::vector<RegisterDemand>> register_demand_)
        : target_pressure(target_pressure_), program(program_), memory(),
-         register_demand(std::move(register_demand_)), renames(program->blocks.size(), aco::map<Temp, Temp>(memory)),
+         register_demand(std::move(register_demand_)),
+         renames(program->blocks.size(), aco::map<Temp, Temp>(memory)),
          spills_entry(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
          spills_exit(program->blocks.size(), aco::unordered_map<Temp, uint32_t>(memory)),
          processed(program->blocks.size(), false),
@@ -226,10 +227,11 @@ next_uses_per_block(spill_ctx& ctx, unsigned block_idx, uint32_t& worklist)
 
       std::pair<uint32_t, uint32_t> distance{block_idx, 0};
 
-      auto it = instr->definitions[0].isTemp() ? next_use_distances_start.find(instr->definitions[0].getTemp())
-                                               : next_use_distances_start.end();
+      auto it = instr->definitions[0].isTemp()
+                   ? next_use_distances_start.find(instr->definitions[0].getTemp())
+                   : next_use_distances_start.end();
       if (it != next_use_distances_start.end() &&
-         phi_defs.insert(instr->definitions[0].getTemp()).second) {
+          phi_defs.insert(instr->definitions[0].getTemp()).second) {
          distance = it->second;
       }
 
@@ -388,7 +390,7 @@ get_rematerialize_info(spill_ctx& ctx)
 
 void
 update_local_next_uses(spill_ctx& ctx, Block* block,
-                std::vector<std::vector<std::pair<Temp, uint32_t>>>& local_next_uses)
+                       std::vector<std::vector<std::pair<Temp, uint32_t>>>& local_next_uses)
 {
    if (local_next_uses.size() < block->instructions.size()) {
       /* Allocate more next-use-maps. Note that by never reducing the vector size, we enable
@@ -1006,7 +1008,7 @@ add_coupling_code(spill_ctx& ctx, Block* block, unsigned block_idx)
                ctx.renames[pred_idx].find(phi->operands[i].getTemp());
             if (it != ctx.renames[pred_idx].end()) {
                phi->operands[i].setTemp(it->second);
-            /* prevent the defining instruction from being DCE'd if it could be rematerialized */
+               /* prevent the defining instruction from being DCE'd if it could be rematerialized */
             } else {
                auto remat_it = ctx.remat.find(phi->operands[i].getTemp());
                if (remat_it != ctx.remat.end()) {
@@ -1407,7 +1409,8 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
             continue;
 
          /* find p_logical_end */
-         std::vector<aco_ptr<Instruction>>& prev_instructions = ctx.program->blocks[block_idx].instructions;
+         std::vector<aco_ptr<Instruction>>& prev_instructions =
+            ctx.program->blocks[block_idx].instructions;
          unsigned idx = prev_instructions.size() - 1;
          while (prev_instructions[idx]->opcode != aco_opcode::p_logical_end)
             idx--;
@@ -1422,10 +1425,10 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
 
    Temp private_segment_buffer = ctx.program->private_segment_buffer;
    if (!private_segment_buffer.bytes()) {
-      Temp addr_lo = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
-                              Operand::c32(aco_symbol_scratch_addr_lo));
-      Temp addr_hi = bld.sop1(aco_opcode::p_load_symbol, bld.def(s1),
-                              Operand::c32(aco_symbol_scratch_addr_hi));
+      Temp addr_lo =
+         bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_lo));
+      Temp addr_hi =
+         bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
       private_segment_buffer =
          bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
    } else if (ctx.program->stage.hw != HWStage::CS) {
@@ -1471,8 +1474,7 @@ setup_vgpr_spill_reload(spill_ctx& ctx, Block& block,
       if (ctx.scratch_rsrc == Temp()) {
          int32_t saddr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size -
                          ctx.program->dev.scratch_global_offset_min;
-         ctx.scratch_rsrc =
-            load_scratch_resource(ctx, scratch_offset, block, instructions, saddr);
+         ctx.scratch_rsrc = load_scratch_resource(ctx, scratch_offset, block, instructions, saddr);
       }
    } else {
       bool add_offset_to_sgpr =
index afb9896..506ff80 100644 (file)
@@ -35,8 +35,8 @@
 namespace aco {
 
 static void
-aco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix,
-        const char* file, unsigned line, const char* fmt, va_list args)
+aco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix, const char* file,
+        unsigned line, const char* fmt, va_list args)
 {
    char* msg;
 
@@ -270,8 +270,7 @@ validate_ir(Program* program)
                                    (instr->opcode == aco_opcode::p_bpermute_gfx11w64 && i == 0) ||
                                    (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
                                    ((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
-                                   (instr->isScratch() && i == 0) ||
-                                   (instr->isDS() && i == 0) ||
+                                   (instr->isScratch() && i == 0) || (instr->isDS() && i == 0) ||
                                    (instr->opcode == aco_opcode::p_init_scratch && i == 0);
                check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
             } else {
@@ -393,7 +392,7 @@ validate_ir(Program* program)
                      "OPSEL_LO set for unsupported instruction format", instr.get());
                check(!instr->valu().opsel_hi || instr->isVOP3P(),
                      "OPSEL_HI set for unsupported instruction format", instr.get());
-               check(!instr->valu().omod || instr->isVOP3() ||instr->isSDWA(),
+               check(!instr->valu().omod || instr->isVOP3() || instr->isSDWA(),
                      "OMOD set for unsupported instruction format", instr.get());
                check(!instr->valu().clamp || instr->isVOP3() || instr->isVOP3P() ||
                         instr->isSDWA() || instr->isVINTERP_INREG(),
@@ -562,7 +561,8 @@ validate_ir(Program* program)
                         instr->definitions[2].regClass().size() == 1,
                      "Third definition of p_dual_src_export_gfx11 must be a v1", instr.get());
                check(instr->definitions[3].regClass() == program->lane_mask,
-                     "Fourth definition of p_dual_src_export_gfx11 must be a lane mask", instr.get());
+                     "Fourth definition of p_dual_src_export_gfx11 must be a lane mask",
+                     instr.get());
                check(instr->definitions[4].physReg() == vcc,
                      "Fifth definition of p_dual_src_export_gfx11 must be vcc", instr.get());
                check(instr->definitions[5].physReg() == scc,
@@ -627,26 +627,28 @@ validate_ir(Program* program)
             check(instr->operands.size() < 4 || instr->operands[3].isOfType(RegType::vgpr),
                   "VMEM write data must be vgpr", instr.get());
 
-            const bool d16 = instr->opcode == aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables
-                             instr->opcode == aco_opcode::buffer_load_ubyte ||
-                             instr->opcode == aco_opcode::buffer_load_sbyte ||
-                             instr->opcode == aco_opcode::buffer_load_ushort ||
-                             instr->opcode == aco_opcode::buffer_load_sshort ||
-                             instr->opcode == aco_opcode::buffer_load_ubyte_d16 ||
-                             instr->opcode == aco_opcode::buffer_load_ubyte_d16_hi ||
-                             instr->opcode == aco_opcode::buffer_load_sbyte_d16 ||
-                             instr->opcode == aco_opcode::buffer_load_sbyte_d16_hi ||
-                             instr->opcode == aco_opcode::buffer_load_short_d16 ||
-                             instr->opcode == aco_opcode::buffer_load_short_d16_hi ||
-                             instr->opcode == aco_opcode::buffer_load_format_d16_x ||
-                             instr->opcode == aco_opcode::buffer_load_format_d16_hi_x ||
-                             instr->opcode == aco_opcode::buffer_load_format_d16_xy ||
-                             instr->opcode == aco_opcode::buffer_load_format_d16_xyz ||
-                             instr->opcode == aco_opcode::buffer_load_format_d16_xyzw ||
-                             instr->opcode == aco_opcode::tbuffer_load_format_d16_x ||
-                             instr->opcode == aco_opcode::tbuffer_load_format_d16_xy ||
-                             instr->opcode == aco_opcode::tbuffer_load_format_d16_xyz ||
-                             instr->opcode == aco_opcode::tbuffer_load_format_d16_xyzw;
+            const bool d16 =
+               instr->opcode ==
+                  aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables
+               instr->opcode == aco_opcode::buffer_load_ubyte ||
+               instr->opcode == aco_opcode::buffer_load_sbyte ||
+               instr->opcode == aco_opcode::buffer_load_ushort ||
+               instr->opcode == aco_opcode::buffer_load_sshort ||
+               instr->opcode == aco_opcode::buffer_load_ubyte_d16 ||
+               instr->opcode == aco_opcode::buffer_load_ubyte_d16_hi ||
+               instr->opcode == aco_opcode::buffer_load_sbyte_d16 ||
+               instr->opcode == aco_opcode::buffer_load_sbyte_d16_hi ||
+               instr->opcode == aco_opcode::buffer_load_short_d16 ||
+               instr->opcode == aco_opcode::buffer_load_short_d16_hi ||
+               instr->opcode == aco_opcode::buffer_load_format_d16_x ||
+               instr->opcode == aco_opcode::buffer_load_format_d16_hi_x ||
+               instr->opcode == aco_opcode::buffer_load_format_d16_xy ||
+               instr->opcode == aco_opcode::buffer_load_format_d16_xyz ||
+               instr->opcode == aco_opcode::buffer_load_format_d16_xyzw ||
+               instr->opcode == aco_opcode::tbuffer_load_format_d16_x ||
+               instr->opcode == aco_opcode::tbuffer_load_format_d16_xy ||
+               instr->opcode == aco_opcode::tbuffer_load_format_d16_xyz ||
+               instr->opcode == aco_opcode::tbuffer_load_format_d16_xyzw;
             if (instr->definitions.size()) {
                check(instr->definitions[0].regClass().type() == RegType::vgpr,
                      "VMEM definitions[0] (VDATA) must be VGPR", instr.get());
@@ -763,11 +765,14 @@ validate_ir(Program* program)
             break;
          }
          case Format::LDSDIR: {
-            check(instr->definitions.size() == 1 && instr->definitions[0].regClass() == v1, "LDSDIR must have an v1 definition", instr.get());
+            check(instr->definitions.size() == 1 && instr->definitions[0].regClass() == v1,
+                  "LDSDIR must have an v1 definition", instr.get());
             check(instr->operands.size() == 1, "LDSDIR must have an operand", instr.get());
             if (!instr->operands.empty()) {
-               check(instr->operands[0].regClass() == s1, "LDSDIR must have an s1 operand", instr.get());
-               check(instr->operands[0].isFixed() && instr->operands[0].physReg() == m0, "LDSDIR must have an operand fixed to m0", instr.get());
+               check(instr->operands[0].regClass() == s1, "LDSDIR must have an s1 operand",
+                     instr.get());
+               check(instr->operands[0].isFixed() && instr->operands[0].physReg() == m0,
+                     "LDSDIR must have an operand fixed to m0", instr.get());
             }
             break;
          }
index 76a17e2..261642d 100644 (file)
 #include <string>
 
 struct TestDef {
-   const char *name;
-   const char *source_file;
+   const charname;
+   const charsource_file;
    void (*func)();
 };
 
 extern std::map<std::string, TestDef> tests;
-extern FILE *output;
+extern FILEoutput;
 
-bool set_variant(const char *name);
+bool set_variant(const charname);
 
-inline bool set_variant(amd_gfx_level cls, const char *rest="")
+inline bool
+set_variant(amd_gfx_level cls, const char* rest = "")
 {
-   char buf[8+strlen(rest)];
+   char buf[8 + strlen(rest)];
    if (cls != GFX10_3) {
       snprintf(buf, sizeof(buf), "gfx%d%s", cls - GFX6 + 6 - (cls > GFX10_3), rest);
    } else {
@@ -56,18 +57,21 @@ inline bool set_variant(amd_gfx_level cls, const char *rest="")
    return set_variant(buf);
 }
 
-void fail_test(const char *fmt, ...);
-void skip_test(const char *fmt, ...);
+void fail_test(const charfmt, ...);
+void skip_test(const charfmt, ...);
 
-#define _BEGIN_TEST(name, struct_name) static void struct_name(); static __attribute__((constructor)) void CONCAT2(add_test_, __COUNTER__)() {\
-      tests[#name] = (TestDef){#name, ACO_TEST_BUILD_ROOT "/" __FILE__, &struct_name};\
-   }\
-   static void struct_name() {\
+#define _BEGIN_TEST(name, struct_name)                                                             \
+   static void struct_name();                                                                      \
+   static __attribute__((constructor)) void CONCAT2(add_test_, __COUNTER__)()                      \
+   {                                                                                               \
+      tests[#name] = (TestDef){#name, ACO_TEST_BUILD_ROOT "/" __FILE__, &struct_name};             \
+   }                                                                                               \
+   static void struct_name()                                                                       \
+   {
 
-#define BEGIN_TEST(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__))
+#define BEGIN_TEST(name)      _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__))
 #define BEGIN_TEST_TODO(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__))
 #define BEGIN_TEST_FAIL(name) _BEGIN_TEST(name, CONCAT2(Test_, __COUNTER__))
-#define END_TEST \
-   }
+#define END_TEST              }
 
 #endif /* ACO_TEST_COMMON_H */
index c5a8844..caa8b51 100644 (file)
  *
  */
 #include "helpers.h"
-#include "vulkan/vk_format.h"
+
 #include "common/amd_family.h"
-#include <stdio.h>
-#include <sstream>
+#include "vulkan/vk_format.h"
+
 #include <llvm-c/Target.h>
+
 #include <mutex>
+#include <sstream>
+#include <stdio.h>
 
 using namespace aco;
 
 extern "C" {
-PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
-       VkInstance                                  instance,
-       const char*                                 pName);
+PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(VkInstance instance, const char* pName);
 }
 
 ac_shader_config config;
@@ -47,32 +48,34 @@ static VkInstance instance_cache[CHIP_LAST] = {VK_NULL_HANDLE};
 static VkDevice device_cache[CHIP_LAST] = {VK_NULL_HANDLE};
 static std::mutex create_device_mutex;
 
-#define FUNCTION_LIST\
-   ITEM(CreateInstance)\
-   ITEM(DestroyInstance)\
-   ITEM(EnumeratePhysicalDevices)\
-   ITEM(GetPhysicalDeviceProperties2)\
-   ITEM(CreateDevice)\
-   ITEM(DestroyDevice)\
-   ITEM(CreateShaderModule)\
-   ITEM(DestroyShaderModule)\
-   ITEM(CreateGraphicsPipelines)\
-   ITEM(CreateComputePipelines)\
-   ITEM(DestroyPipeline)\
-   ITEM(CreateDescriptorSetLayout)\
-   ITEM(DestroyDescriptorSetLayout)\
-   ITEM(CreatePipelineLayout)\
-   ITEM(DestroyPipelineLayout)\
-   ITEM(CreateRenderPass)\
-   ITEM(DestroyRenderPass)\
-   ITEM(GetPipelineExecutablePropertiesKHR)\
+#define FUNCTION_LIST                                                                              \
+   ITEM(CreateInstance)                                                                            \
+   ITEM(DestroyInstance)                                                                           \
+   ITEM(EnumeratePhysicalDevices)                                                                  \
+   ITEM(GetPhysicalDeviceProperties2)                                                              \
+   ITEM(CreateDevice)                                                                              \
+   ITEM(DestroyDevice)                                                                             \
+   ITEM(CreateShaderModule)                                                                        \
+   ITEM(DestroyShaderModule)                                                                       \
+   ITEM(CreateGraphicsPipelines)                                                                   \
+   ITEM(CreateComputePipelines)                                                                    \
+   ITEM(DestroyPipeline)                                                                           \
+   ITEM(CreateDescriptorSetLayout)                                                                 \
+   ITEM(DestroyDescriptorSetLayout)                                                                \
+   ITEM(CreatePipelineLayout)                                                                      \
+   ITEM(DestroyPipelineLayout)                                                                     \
+   ITEM(CreateRenderPass)                                                                          \
+   ITEM(DestroyRenderPass)                                                                         \
+   ITEM(GetPipelineExecutablePropertiesKHR)                                                        \
    ITEM(GetPipelineExecutableInternalRepresentationsKHR)
 
 #define ITEM(n) PFN_vk##n n;
 FUNCTION_LIST
 #undef ITEM
 
-void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size, enum radeon_family family)
+void
+create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_size,
+               enum radeon_family family)
 {
    memset(&config, 0, sizeof(config));
    info.wave_size = wave_size;
@@ -90,7 +93,7 @@ void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_siz
    program->debug.func = nullptr;
    program->debug.private_data = nullptr;
 
-   Block *block = program->create_and_insert_block();
+   Blockblock = program->create_and_insert_block();
    block->kind = block_kind_top_level;
 
    bld = Builder(program.get(), &program->blocks[0]);
@@ -98,9 +101,9 @@ void create_program(enum amd_gfx_level gfx_level, Stage stage, unsigned wave_siz
    config.float_mode = program->blocks[0].fp_mode.val;
 }
 
-bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
-              enum radeon_family family, const char* subvariant,
-              unsigned wave_size)
+bool
+setup_cs(const char* input_spec, enum amd_gfx_level gfx_level, enum radeon_family family,
+         const char* subvariant, unsigned wave_size)
 {
    if (!set_variant(gfx_level, subvariant))
       return false;
@@ -117,7 +120,8 @@ bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
          input_classes.push_back(RegClass::get(type, size * (in_bytes ? 1 : 4)));
 
          input_spec += 2 + in_bytes;
-         while (input_spec[0] == ' ') input_spec++;
+         while (input_spec[0] == ' ')
+            input_spec++;
       }
 
       aco_ptr<Instruction> startpgm{create_instruction<Pseudo_instruction>(
@@ -132,7 +136,8 @@ bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
    return true;
 }
 
-void finish_program(Program *prog)
+void
+finish_program(Program* prog)
 {
    for (Block& BB : prog->blocks) {
       for (unsigned idx : BB.linear_preds)
@@ -149,7 +154,8 @@ void finish_program(Program *prog)
    }
 }
 
-void finish_validator_test()
+void
+finish_validator_test()
 {
    finish_program(program.get());
    aco_print_program(program.get(), output);
@@ -160,7 +166,8 @@ void finish_validator_test()
       fprintf(output, "Validation failed\n");
 }
 
-void finish_opt_test()
+void
+finish_opt_test()
 {
    finish_program(program.get());
    if (!aco::validate_ir(program.get())) {
@@ -175,7 +182,8 @@ void finish_opt_test()
    aco_print_program(program.get(), output);
 }
 
-void finish_setup_reduce_temp_test()
+void
+finish_setup_reduce_temp_test()
 {
    finish_program(program.get());
    if (!aco::validate_ir(program.get())) {
@@ -190,7 +198,8 @@ void finish_setup_reduce_temp_test()
    aco_print_program(program.get(), output);
 }
 
-void finish_ra_test(ra_test_policy policy, bool lower)
+void
+finish_ra_test(ra_test_policy policy, bool lower)
 {
    finish_program(program.get());
    if (!aco::validate_ir(program.get())) {
@@ -215,42 +224,48 @@ void finish_ra_test(ra_test_policy policy, bool lower)
    aco_print_program(program.get(), output);
 }
 
-void finish_optimizer_postRA_test()
+void
+finish_optimizer_postRA_test()
 {
    finish_program(program.get());
    aco::optimize_postRA(program.get());
    aco_print_program(program.get(), output);
 }
 
-void finish_to_hw_instr_test()
+void
+finish_to_hw_instr_test()
 {
    finish_program(program.get());
    aco::lower_to_hw_instr(program.get());
    aco_print_program(program.get(), output);
 }
 
-void finish_waitcnt_test()
+void
+finish_waitcnt_test()
 {
    finish_program(program.get());
    aco::insert_wait_states(program.get());
    aco_print_program(program.get(), output);
 }
 
-void finish_insert_nops_test()
+void
+finish_insert_nops_test()
 {
    finish_program(program.get());
    aco::insert_NOPs(program.get());
    aco_print_program(program.get(), output);
 }
 
-void finish_form_hard_clause_test()
+void
+finish_form_hard_clause_test()
 {
    finish_program(program.get());
    aco::form_hard_clauses(program.get());
    aco_print_program(program.get(), output);
 }
 
-void finish_assembler_test()
+void
+finish_assembler_test()
 {
    finish_program(program.get());
    std::vector<uint32_t> binary;
@@ -261,13 +276,14 @@ void finish_assembler_test()
    if (program->gfx_level >= GFX8) {
       print_asm(program.get(), binary, exec_size / 4u, output);
    } else {
-      //TODO: maybe we should use CLRX and skip this test if it's not available?
+      // TODO: maybe we should use CLRX and skip this test if it's not available?
       for (uint32_t dword : binary)
          fprintf(output, "%.8x\n", dword);
    }
 }
 
-void writeout(unsigned i, Temp tmp)
+void
+writeout(unsigned i, Temp tmp)
 {
    if (tmp.id())
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), tmp);
@@ -275,22 +291,26 @@ void writeout(unsigned i, Temp tmp)
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i));
 }
 
-void writeout(unsigned i, aco::Builder::Result res)
+void
+writeout(unsigned i, aco::Builder::Result res)
 {
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), res);
 }
 
-void writeout(unsigned i, Operand op)
+void
+writeout(unsigned i, Operand op)
 {
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op);
 }
 
-void writeout(unsigned i, Operand op0, Operand op1)
+void
+writeout(unsigned i, Operand op0, Operand op1)
 {
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(i), op0, op1);
 }
 
-Temp fneg(Temp src, Builder b)
+Temp
+fneg(Temp src, Builder b)
 {
    if (src.bytes() == 2)
       return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0xbc00u), src);
@@ -298,35 +318,42 @@ Temp fneg(Temp src, Builder b)
       return b.vop2(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0xbf800000u), src);
 }
 
-Temp fabs(Temp src, Builder b)
+Temp
+fabs(Temp src, Builder b)
 {
    if (src.bytes() == 2) {
-      Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
+      Builder::Result res =
+         b.vop2_e64(aco_opcode::v_mul_f16, b.def(v2b), Operand::c16(0x3c00), src);
       res->valu().abs[1] = true;
       return res;
    } else {
-      Builder::Result res = b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
+      Builder::Result res =
+         b.vop2_e64(aco_opcode::v_mul_f32, b.def(v1), Operand::c32(0x3f800000u), src);
       res->valu().abs[1] = true;
       return res;
    }
 }
 
-Temp f2f32(Temp src, Builder b)
+Temp
+f2f32(Temp src, Builder b)
 {
    return b.vop1(aco_opcode::v_cvt_f32_f16, b.def(v1), src);
 }
 
-Temp f2f16(Temp src, Builder b)
+Temp
+f2f16(Temp src, Builder b)
 {
    return b.vop1(aco_opcode::v_cvt_f16_f32, b.def(v2b), src);
 }
 
-Temp u2u16(Temp src, Builder b)
+Temp
+u2u16(Temp src, Builder b)
 {
    return b.pseudo(aco_opcode::p_extract_vector, b.def(v2b), src, Operand::zero());
 }
 
-Temp fadd(Temp src0, Temp src1, Builder b)
+Temp
+fadd(Temp src0, Temp src1, Builder b)
 {
    if (src0.bytes() == 2)
       return b.vop2(aco_opcode::v_add_f16, b.def(v2b), src0, src1);
@@ -334,7 +361,8 @@ Temp fadd(Temp src0, Temp src1, Builder b)
       return b.vop2(aco_opcode::v_add_f32, b.def(v1), src0, src1);
 }
 
-Temp fmul(Temp src0, Temp src1, Builder b)
+Temp
+fmul(Temp src0, Temp src1, Builder b)
 {
    if (src0.bytes() == 2)
       return b.vop2(aco_opcode::v_mul_f16, b.def(v2b), src0, src1);
@@ -342,7 +370,8 @@ Temp fmul(Temp src0, Temp src1, Builder b)
       return b.vop2(aco_opcode::v_mul_f32, b.def(v1), src0, src1);
 }
 
-Temp fma(Temp src0, Temp src1, Temp src2, Builder b)
+Temp
+fma(Temp src0, Temp src1, Temp src2, Builder b)
 {
    if (src0.bytes() == 2)
       return b.vop3(aco_opcode::v_fma_f16, b.def(v2b), src0, src1, src2);
@@ -350,40 +379,46 @@ Temp fma(Temp src0, Temp src1, Temp src2, Builder b)
       return b.vop3(aco_opcode::v_fma_f32, b.def(v1), src0, src1, src2);
 }
 
-Temp fsat(Temp src, Builder b)
+Temp
+fsat(Temp src, Builder b)
 {
    if (src.bytes() == 2)
-      return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u),
-                    Operand::c16(0x3c00u), src);
+      return b.vop3(aco_opcode::v_med3_f16, b.def(v2b), Operand::c16(0u), Operand::c16(0x3c00u),
+                    src);
    else
-      return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(),
-                    Operand::c32(0x3f800000u), src);
+      return b.vop3(aco_opcode::v_med3_f32, b.def(v1), Operand::zero(), Operand::c32(0x3f800000u),
+                    src);
 }
 
-Temp fmin(Temp src0, Temp src1, Builder b)
+Temp
+fmin(Temp src0, Temp src1, Builder b)
 {
    return b.vop2(aco_opcode::v_min_f32, b.def(v1), src0, src1);
 }
 
-Temp fmax(Temp src0, Temp src1, Builder b)
+Temp
+fmax(Temp src0, Temp src1, Builder b)
 {
    return b.vop2(aco_opcode::v_max_f32, b.def(v1), src0, src1);
 }
 
-Temp ext_ushort(Temp src, unsigned idx, Builder b)
+Temp
+ext_ushort(Temp src, unsigned idx, Builder b)
 {
    return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
                    Operand::c32(16u), Operand::c32(false));
 }
 
-Temp ext_ubyte(Temp src, unsigned idx, Builder b)
+Temp
+ext_ubyte(Temp src, unsigned idx, Builder b)
 {
    return b.pseudo(aco_opcode::p_extract, b.def(src.regClass()), src, Operand::c32(idx),
                    Operand::c32(8u), Operand::c32(false));
 }
 
-void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::function<void()> then,
-                            std::function<void()> els)
+void
+emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::function<void()> then,
+                       std::function<void()> els)
 {
    prog->blocks.reserve(prog->blocks.size() + 6);
 
@@ -418,8 +453,10 @@ void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::f
    PhysReg saved_exec_reg(84);
 
    b.reset(if_block);
-   Temp saved_exec = b.sop1(Builder::s_and_saveexec, b.def(b.lm, saved_exec_reg), Definition(scc, s1), Definition(exec, b.lm), cond, Operand(exec, b.lm));
-   b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), then_logical->index, then_linear->index);
+   Temp saved_exec = b.sop1(Builder::s_and_saveexec, b.def(b.lm, saved_exec_reg),
+                            Definition(scc, s1), Definition(exec, b.lm), cond, Operand(exec, b.lm));
+   b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), then_logical->index,
+            then_linear->index);
 
    b.reset(then_logical);
    b.pseudo(aco_opcode::p_logical_start);
@@ -431,8 +468,10 @@ void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::f
    b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), invert->index);
 
    b.reset(invert);
-   b.sop2(Builder::s_andn2, Definition(exec, bld.lm), Definition(scc, s1), Operand(saved_exec, saved_exec_reg), Operand(exec, bld.lm));
-   b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), else_logical->index, else_linear->index);
+   b.sop2(Builder::s_andn2, Definition(exec, bld.lm), Definition(scc, s1),
+          Operand(saved_exec, saved_exec_reg), Operand(exec, bld.lm));
+   b.branch(aco_opcode::p_cbranch_nz, Definition(vcc, bld.lm), else_logical->index,
+            else_linear->index);
 
    b.reset(else_logical);
    b.pseudo(aco_opcode::p_logical_start);
@@ -444,42 +483,29 @@ void emit_divergent_if_else(Program* prog, aco::Builder& b, Operand cond, std::f
    b.branch(aco_opcode::p_branch, Definition(vcc, bld.lm), endif_block->index);
 
    b.reset(endif_block);
-   b.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm), Operand(saved_exec, saved_exec_reg));
+   b.pseudo(aco_opcode::p_parallelcopy, Definition(exec, bld.lm),
+            Operand(saved_exec, saved_exec_reg));
 }
 
-VkDevice get_vk_device(enum amd_gfx_level gfx_level)
+VkDevice
+get_vk_device(enum amd_gfx_level gfx_level)
 {
    enum radeon_family family;
    switch (gfx_level) {
-   case GFX6:
-      family = CHIP_TAHITI;
-      break;
-   case GFX7:
-      family = CHIP_BONAIRE;
-      break;
-   case GFX8:
-      family = CHIP_POLARIS10;
-      break;
-   case GFX9:
-      family = CHIP_VEGA10;
-      break;
-   case GFX10:
-      family = CHIP_NAVI10;
-      break;
-   case GFX10_3:
-      family = CHIP_NAVI21;
-      break;
-   case GFX11:
-      family = CHIP_GFX1100;
-      break;
-   default:
-      family = CHIP_UNKNOWN;
-      break;
+   case GFX6: family = CHIP_TAHITI; break;
+   case GFX7: family = CHIP_BONAIRE; break;
+   case GFX8: family = CHIP_POLARIS10; break;
+   case GFX9: family = CHIP_VEGA10; break;
+   case GFX10: family = CHIP_NAVI10; break;
+   case GFX10_3: family = CHIP_NAVI21; break;
+   case GFX11: family = CHIP_GFX1100; break;
+   default: family = CHIP_UNKNOWN; break;
    }
    return get_vk_device(family);
 }
 
-VkDevice get_vk_device(enum radeon_family family)
+VkDevice
+get_vk_device(enum radeon_family family)
 {
    assert(family != CHIP_UNKNOWN);
 
@@ -496,12 +522,13 @@ VkDevice get_vk_device(enum radeon_family family)
    VkInstanceCreateInfo instance_create_info = {};
    instance_create_info.pApplicationInfo = &app_info;
    instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
-   ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]);
+   ASSERTED VkResult result = ((PFN_vkCreateInstance)vk_icdGetInstanceProcAddr(
+      NULL, "vkCreateInstance"))(&instance_create_info, NULL, &instance_cache[family]);
    assert(result == VK_SUCCESS);
 
-   #define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n);
+#define ITEM(n) n = (PFN_vk##n)vk_icdGetInstanceProcAddr(instance_cache[family], "vk" #n);
    FUNCTION_LIST
-   #undef ITEM
+#undef ITEM
 
    uint32_t device_count = 1;
    VkPhysicalDevice device = VK_NULL_HANDLE;
@@ -511,7 +538,7 @@ VkDevice get_vk_device(enum radeon_family family)
 
    VkDeviceCreateInfo device_create_info = {};
    device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
-   static const char *extensions[] = {"VK_KHR_pipeline_executable_properties"};
+   static const charextensions[] = {"VK_KHR_pipeline_executable_properties"};
    device_create_info.enabledExtensionCount = sizeof(extensions) / sizeof(extensions[0]);
    device_create_info.ppEnabledExtensionNames = extensions;
    result = CreateDevice(device, &device_create_info, NULL, &device_cache[family]);
@@ -520,7 +547,8 @@ VkDevice get_vk_device(enum radeon_family family)
 }
 
 static struct DestroyDevices {
-   ~DestroyDevices() {
+   ~DestroyDevices()
+   {
       for (unsigned i = 0; i < CHIP_LAST; i++) {
          if (!device_cache[i])
             continue;
@@ -530,8 +558,9 @@ static struct DestroyDevices {
    }
 } destroy_devices;
 
-void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
-                       const char *name, bool remove_encoding)
+void
+print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
+                  const char* name, bool remove_encoding)
 {
    uint32_t executable_count = 16;
    VkPipelineExecutablePropertiesKHR executables[16];
@@ -539,7 +568,8 @@ void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBi
    pipeline_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INFO_KHR;
    pipeline_info.pNext = NULL;
    pipeline_info.pipeline = pipeline;
-   ASSERTED VkResult result = GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables);
+   ASSERTED VkResult result =
+      GetPipelineExecutablePropertiesKHR(device, &pipeline_info, &executable_count, executables);
    assert(result == VK_SUCCESS);
 
    uint32_t executable = 0;
@@ -570,13 +600,13 @@ void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBi
    }
    assert(requested_ir && "Could not find requested IR");
 
-   char *data = (char*)malloc(requested_ir->dataSize);
+   chardata = (char*)malloc(requested_ir->dataSize);
    requested_ir->pData = data;
    result = GetPipelineExecutableInternalRepresentationsKHR(device, &exec_info, &ir_count, ir);
    assert(result == VK_SUCCESS);
 
    if (remove_encoding) {
-      for (char *c = data; *c; c++) {
+      for (charc = data; *c; c++) {
          if (*c == ';') {
             for (; *c && *c != '\n'; c++)
                *c = ' ';
@@ -588,23 +618,25 @@ void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBi
    free(data);
 }
 
-VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo *module_info)
+VkShaderModule
+__qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo* module_info)
 {
-    VkShaderModuleCreateInfo vk_module_info;
-    vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
-    vk_module_info.pNext = NULL;
-    vk_module_info.flags = 0;
-    vk_module_info.codeSize = module_info->spirvSize;
-    vk_module_info.pCode = (const uint32_t*)module_info->pSpirv;
-
-    VkShaderModule module;
-    ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module);
-    assert(result == VK_SUCCESS);
-
-    return module;
+   VkShaderModuleCreateInfo vk_module_info;
+   vk_module_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
+   vk_module_info.pNext = NULL;
+   vk_module_info.flags = 0;
+   vk_module_info.codeSize = module_info->spirvSize;
+   vk_module_info.pCode = (const uint32_t*)module_info->pSpirv;
+
+   VkShaderModule module;
+   ASSERTED VkResult result = CreateShaderModule(dev, &vk_module_info, NULL, &module);
+   assert(result == VK_SUCCESS);
+
+   return module;
 }
 
-PipelineBuilder::PipelineBuilder(VkDevice dev) {
+PipelineBuilder::PipelineBuilder(VkDevice dev)
+{
    memset(this, 0, sizeof(*this));
    topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
    device = dev;
@@ -615,7 +647,7 @@ PipelineBuilder::~PipelineBuilder()
    DestroyPipeline(device, pipeline, NULL);
 
    for (unsigned i = 0; i < (is_compute() ? 1 : gfx_pipeline_info.stageCount); i++) {
-      VkPipelineShaderStageCreateInfo *stage_info = &stages[i];
+      VkPipelineShaderStageCreateInfostage_info = &stages[i];
       if (owned_stages & stage_info->stage)
          DestroyShaderModule(device, stage_info->module, NULL);
    }
@@ -628,72 +660,87 @@ PipelineBuilder::~PipelineBuilder()
    DestroyRenderPass(device, render_pass, NULL);
 }
 
-void PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout,
-                                       uint32_t binding, VkDescriptorType type, uint32_t count)
+void
+PipelineBuilder::add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout, uint32_t binding,
+                                  VkDescriptorType type, uint32_t count)
 {
    desc_layouts_used |= 1ull << layout;
    desc_bindings[layout][num_desc_bindings[layout]++] = {binding, type, count, stage_flags, NULL};
 }
 
-void PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
+void
+PipelineBuilder::add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate)
 {
    vs_bindings[vs_input.vertexBindingDescriptionCount++] = {binding, stride, rate};
 }
 
-void PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset)
+void
+PipelineBuilder::add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format,
+                                      uint32_t offset)
 {
    vs_attributes[vs_input.vertexAttributeDescriptionCount++] = {location, binding, format, offset};
 }
 
-void PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo *module)
+void
+PipelineBuilder::add_resource_decls(QoShaderModuleCreateInfo* module)
 {
    for (unsigned i = 0; i < module->declarationCount; i++) {
-      const QoShaderDecl *decl = &module->pDeclarations[i];
+      const QoShaderDecldecl = &module->pDeclarations[i];
       switch (decl->decl_type) {
       case QoShaderDeclType_ubo:
-         add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
+         add_desc_binding(module->stage, decl->set, decl->binding,
+                          VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER);
          break;
       case QoShaderDeclType_ssbo:
-         add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
+         add_desc_binding(module->stage, decl->set, decl->binding,
+                          VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
          break;
       case QoShaderDeclType_img_buf:
-         add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
+         add_desc_binding(module->stage, decl->set, decl->binding,
+                          VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);
          break;
       case QoShaderDeclType_img:
-         add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
+         add_desc_binding(module->stage, decl->set, decl->binding,
+                          VK_DESCRIPTOR_TYPE_STORAGE_IMAGE);
          break;
       case QoShaderDeclType_tex_buf:
-         add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
+         add_desc_binding(module->stage, decl->set, decl->binding,
+                          VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER);
          break;
       case QoShaderDeclType_combined:
-         add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
+         add_desc_binding(module->stage, decl->set, decl->binding,
+                          VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER);
          break;
       case QoShaderDeclType_tex:
-         add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
+         add_desc_binding(module->stage, decl->set, decl->binding,
+                          VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE);
          break;
       case QoShaderDeclType_samp:
          add_desc_binding(module->stage, decl->set, decl->binding, VK_DESCRIPTOR_TYPE_SAMPLER);
          break;
-      default:
-         break;
+      default: break;
       }
    }
 }
 
-void PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo *module)
+void
+PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo* module)
 {
    unsigned next_vtx_offset = 0;
    for (unsigned i = 0; i < module->declarationCount; i++) {
-      const QoShaderDecl *decl = &module->pDeclarations[i];
+      const QoShaderDecldecl = &module->pDeclarations[i];
       switch (decl->decl_type) {
       case QoShaderDeclType_in:
          if (module->stage == VK_SHADER_STAGE_VERTEX_BIT) {
             if (!strcmp(decl->type, "float") || decl->type[0] == 'v')
-               add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT, next_vtx_offset);
+               add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SFLOAT,
+                                    next_vtx_offset);
             else if (decl->type[0] == 'u')
-               add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT, next_vtx_offset);
+               add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_UINT,
+                                    next_vtx_offset);
             else if (decl->type[0] == 'i')
-               add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT, next_vtx_offset);
+               add_vertex_attribute(decl->location, 0, VK_FORMAT_R32G32B32A32_SINT,
+                                    next_vtx_offset);
             next_vtx_offset += 16;
          }
          break;
@@ -707,17 +754,17 @@ void PipelineBuilder::add_io_decls(QoShaderModuleCreateInfo *module)
                color_outputs[decl->location] = VK_FORMAT_R32G32B32A32_SINT;
          }
          break;
-      default:
-         break;
+      default: break;
       }
    }
    if (next_vtx_offset)
       add_vertex_binding(0, next_vtx_offset);
 }
 
-void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char *name)
+void
+PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char* name)
 {
-   VkPipelineShaderStageCreateInfo *stage_info;
+   VkPipelineShaderStageCreateInfostage_info;
    if (stage == VK_SHADER_STAGE_COMPUTE_BIT)
       stage_info = &stages[0];
    else
@@ -732,40 +779,50 @@ void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, VkShaderModule modu
    owned_stages |= stage;
 }
 
-void PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module, const char *name)
+void
+PipelineBuilder::add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module,
+                           const char* name)
 {
    add_stage(stage, __qoCreateShaderModule(device, &module), name);
    add_resource_decls(&module);
    add_io_decls(&module);
 }
 
-void PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
+void
+PipelineBuilder::add_vsfs(VkShaderModule vs, VkShaderModule fs)
 {
    add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
    add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
 }
 
-void PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
+void
+PipelineBuilder::add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs)
 {
    add_stage(VK_SHADER_STAGE_VERTEX_BIT, vs);
    add_stage(VK_SHADER_STAGE_FRAGMENT_BIT, fs);
 }
 
-void PipelineBuilder::add_cs(VkShaderModule cs)
+void
+PipelineBuilder::add_cs(VkShaderModule cs)
 {
    add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
 }
 
-void PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
+void
+PipelineBuilder::add_cs(QoShaderModuleCreateInfo cs)
 {
    add_stage(VK_SHADER_STAGE_COMPUTE_BIT, cs);
 }
 
-bool PipelineBuilder::is_compute() {
+bool
+PipelineBuilder::is_compute()
+{
    return gfx_pipeline_info.stageCount == 0;
 }
 
-void PipelineBuilder::create_compute_pipeline() {
+void
+PipelineBuilder::create_compute_pipeline()
+{
    VkComputePipelineCreateInfo create_info;
    create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
    create_info.pNext = NULL;
@@ -775,11 +832,14 @@ void PipelineBuilder::create_compute_pipeline() {
    create_info.basePipelineHandle = VK_NULL_HANDLE;
    create_info.basePipelineIndex = 0;
 
-   ASSERTED VkResult result = CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline);
+   ASSERTED VkResult result =
+      CreateComputePipelines(device, VK_NULL_HANDLE, 1, &create_info, NULL, &pipeline);
    assert(result == VK_SUCCESS);
 }
 
-void PipelineBuilder::create_graphics_pipeline() {
+void
+PipelineBuilder::create_graphics_pipeline()
+{
    /* create the create infos */
    if (!samples)
       samples = VK_SAMPLE_COUNT_1_BIT;
@@ -792,7 +852,7 @@ void PipelineBuilder::create_graphics_pipeline() {
       if (color_outputs[i] == VK_FORMAT_UNDEFINED)
          continue;
 
-      VkAttachmentDescription *desc = &attachment_descs[num_color_attachments];
+      VkAttachmentDescriptiondesc = &attachment_descs[num_color_attachments];
       desc->flags = 0;
       desc->format = color_outputs[i];
       desc->samples = samples;
@@ -803,16 +863,14 @@ void PipelineBuilder::create_graphics_pipeline() {
       desc->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
       desc->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
 
-      VkAttachmentReference *ref = &color_attachments[num_color_attachments];
+      VkAttachmentReferenceref = &color_attachments[num_color_attachments];
       ref->attachment = num_color_attachments;
       ref->layout = VK_IMAGE_LAYOUT_GENERAL;
 
-      VkPipelineColorBlendAttachmentState *blend = &blend_attachment_states[num_color_attachments];
+      VkPipelineColorBlendAttachmentStateblend = &blend_attachment_states[num_color_attachments];
       blend->blendEnable = false;
-      blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT |
-                              VK_COLOR_COMPONENT_G_BIT |
-                              VK_COLOR_COMPONENT_B_BIT |
-                              VK_COLOR_COMPONENT_A_BIT;
+      blend->colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+                              VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
 
       num_color_attachments++;
    }
@@ -820,7 +878,7 @@ void PipelineBuilder::create_graphics_pipeline() {
    unsigned num_attachments = num_color_attachments;
    VkAttachmentReference ds_attachment;
    if (ds_output != VK_FORMAT_UNDEFINED) {
-      VkAttachmentDescription *desc = &attachment_descs[num_attachments];
+      VkAttachmentDescriptiondesc = &attachment_descs[num_attachments];
       desc->flags = 0;
       desc->format = ds_output;
       desc->samples = samples;
@@ -902,8 +960,7 @@ void PipelineBuilder::create_graphics_pipeline() {
    ds_state.front.passOp = VK_STENCIL_OP_REPLACE;
    ds_state.front.depthFailOp = VK_STENCIL_OP_REPLACE;
    ds_state.front.compareOp = VK_COMPARE_OP_ALWAYS;
-   ds_state.front.compareMask = 0xffffffff,
-   ds_state.front.writeMask = 0;
+   ds_state.front.compareMask = 0xffffffff, ds_state.front.writeMask = 0;
    ds_state.front.reference = 0;
    ds_state.back = ds_state.front;
 
@@ -915,17 +972,15 @@ void PipelineBuilder::create_graphics_pipeline() {
    color_blend_state.attachmentCount = num_color_attachments;
    color_blend_state.pAttachments = blend_attachment_states;
 
-   VkDynamicState dynamic_states[9] = {
-      VK_DYNAMIC_STATE_VIEWPORT,
-      VK_DYNAMIC_STATE_SCISSOR,
-      VK_DYNAMIC_STATE_LINE_WIDTH,
-      VK_DYNAMIC_STATE_DEPTH_BIAS,
-      VK_DYNAMIC_STATE_BLEND_CONSTANTS,
-      VK_DYNAMIC_STATE_DEPTH_BOUNDS,
-      VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
-      VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
-      VK_DYNAMIC_STATE_STENCIL_REFERENCE
-   };
+   VkDynamicState dynamic_states[9] = {VK_DYNAMIC_STATE_VIEWPORT,
+                                       VK_DYNAMIC_STATE_SCISSOR,
+                                       VK_DYNAMIC_STATE_LINE_WIDTH,
+                                       VK_DYNAMIC_STATE_DEPTH_BIAS,
+                                       VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+                                       VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+                                       VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+                                       VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+                                       VK_DYNAMIC_STATE_STENCIL_REFERENCE};
 
    VkPipelineDynamicStateCreateInfo dynamic_state;
    dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
@@ -985,7 +1040,9 @@ void PipelineBuilder::create_graphics_pipeline() {
    assert(result == VK_SUCCESS);
 }
 
-void PipelineBuilder::create_pipeline() {
+void
+PipelineBuilder::create_pipeline()
+{
    unsigned num_desc_layouts = 0;
    for (unsigned i = 0; i < 64; i++) {
       if (!(desc_layouts_used & (1ull << i)))
@@ -998,7 +1055,8 @@ void PipelineBuilder::create_pipeline() {
       desc_layout_info.bindingCount = num_desc_bindings[i];
       desc_layout_info.pBindings = desc_bindings[i];
 
-      ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL, &desc_layouts[num_desc_layouts]);
+      ASSERTED VkResult result = CreateDescriptorSetLayout(device, &desc_layout_info, NULL,
+                                                           &desc_layouts[num_desc_layouts]);
       assert(result == VK_SUCCESS);
       num_desc_layouts++;
    }
@@ -1012,7 +1070,8 @@ void PipelineBuilder::create_pipeline() {
    pipeline_layout_info.setLayoutCount = num_desc_layouts;
    pipeline_layout_info.pSetLayouts = desc_layouts;
 
-   ASSERTED VkResult result = CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout);
+   ASSERTED VkResult result =
+      CreatePipelineLayout(device, &pipeline_layout_info, NULL, &pipeline_layout);
    assert(result == VK_SUCCESS);
 
    if (is_compute())
@@ -1021,7 +1080,8 @@ void PipelineBuilder::create_pipeline() {
       create_graphics_pipeline();
 }
 
-void PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char *name, bool remove_encoding)
+void
+PipelineBuilder::print_ir(VkShaderStageFlagBits stage_flags, const char* name, bool remove_encoding)
 {
    if (!pipeline)
       create_pipeline();
index c014369..eb035e0 100644 (file)
@@ -24,8 +24,9 @@
 #ifndef ACO_TEST_HELPERS_H
 #define ACO_TEST_HELPERS_H
 
-#include "framework.h"
 #include "vulkan/vulkan.h"
+
+#include "framework.h"
 #include <functional>
 
 enum QoShaderDeclType {
@@ -42,10 +43,10 @@ enum QoShaderDeclType {
 };
 
 struct QoShaderDecl {
-   const char *name;
-   const char *type;
+   const charname;
+   const chartype;
    QoShaderDeclType decl_type;
-   //TODO: array size?
+   // TODO: array size?
    unsigned location;
    unsigned component;
    unsigned binding;
@@ -53,12 +54,12 @@ struct QoShaderDecl {
 };
 
 struct QoShaderModuleCreateInfo {
-    void *pNext;
-    size_t spirvSize;
-    const void *pSpirv;
-    uint32_t declarationCount;
-    const QoShaderDecl *pDeclarations;
-    VkShaderStageFlagBits stage;
+   void* pNext;
+   size_t spirvSize;
+   const void* pSpirv;
+   uint32_t declarationCount;
+   const QoShaderDecl* pDeclarations;
+   VkShaderStageFlagBits stage;
 };
 
 extern ac_shader_config config;
@@ -71,17 +72,17 @@ namespace aco {
 struct ra_test_policy;
 }
 
-void create_program(enum amd_gfx_level gfx_level, aco::Stage stage,
-                    unsigned wave_size=64, enum radeon_family family=CHIP_UNKNOWN);
-bool setup_cs(const char *input_spec, enum amd_gfx_level gfx_level,
-              enum radeon_family family=CHIP_UNKNOWN, const char* subvariant = "",
-              unsigned wave_size=64);
+void create_program(enum amd_gfx_level gfx_level, aco::Stage stage, unsigned wave_size = 64,
+                    enum radeon_family family = CHIP_UNKNOWN);
+bool setup_cs(const charinput_spec, enum amd_gfx_level gfx_level,
+              enum radeon_family family = CHIP_UNKNOWN, const char* subvariant = "",
+              unsigned wave_size = 64);
 
-void finish_program(aco::Program *program);
+void finish_program(aco::Programprogram);
 void finish_validator_test();
 void finish_opt_test();
 void finish_setup_reduce_temp_test();
-void finish_ra_test(aco::ra_test_policy, bool lower=false);
+void finish_ra_test(aco::ra_test_policy, bool lower = false);
 void finish_optimizer_postRA_test();
 void finish_to_hw_instr_test();
 void finish_waitcnt_test();
@@ -89,35 +90,35 @@ void finish_insert_nops_test();
 void finish_form_hard_clause_test();
 void finish_assembler_test();
 
-void writeout(unsigned i, aco::Temp tmp=aco::Temp(0, aco::s1));
+void writeout(unsigned i, aco::Temp tmp = aco::Temp(0, aco::s1));
 void writeout(unsigned i, aco::Builder::Result res);
 void writeout(unsigned i, aco::Operand op);
 void writeout(unsigned i, aco::Operand op0, aco::Operand op1);
 
-aco::Temp fneg(aco::Temp src, aco::Builder b=bld);
-aco::Temp fabs(aco::Temp src, aco::Builder b=bld);
-aco::Temp f2f32(aco::Temp src, aco::Builder b=bld);
-aco::Temp f2f16(aco::Temp src, aco::Builder b=bld);
-aco::Temp u2u16(aco::Temp src, aco::Builder b=bld);
-aco::Temp fadd(aco::Temp src0, aco::Temp src1, aco::Builder b=bld);
-aco::Temp fmul(aco::Temp src0, aco::Temp src1, aco::Builder b=bld);
-aco::Temp fma(aco::Temp src0, aco::Temp src1, aco::Temp src2, aco::Builder b=bld);
-aco::Temp fsat(aco::Temp src, aco::Builder b=bld);
-aco::Temp fmin(aco::Temp src0, aco::Temp src1, aco::Builder b=bld);
-aco::Temp fmax(aco::Temp src0, aco::Temp src1, aco::Builder b=bld);
-aco::Temp ext_ushort(aco::Temp src, unsigned idx, aco::Builder b=bld);
-aco::Temp ext_ubyte(aco::Temp src, unsigned idx, aco::Builder b=bld);
-void emit_divergent_if_else(aco::Program* prog, aco::Builder& b, aco::Operand cond, std::function<void()> then,
-                            std::function<void()> els);
+aco::Temp fneg(aco::Temp src, aco::Builder b = bld);
+aco::Temp fabs(aco::Temp src, aco::Builder b = bld);
+aco::Temp f2f32(aco::Temp src, aco::Builder b = bld);
+aco::Temp f2f16(aco::Temp src, aco::Builder b = bld);
+aco::Temp u2u16(aco::Temp src, aco::Builder b = bld);
+aco::Temp fadd(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
+aco::Temp fmul(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
+aco::Temp fma(aco::Temp src0, aco::Temp src1, aco::Temp src2, aco::Builder b = bld);
+aco::Temp fsat(aco::Temp src, aco::Builder b = bld);
+aco::Temp fmin(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
+aco::Temp fmax(aco::Temp src0, aco::Temp src1, aco::Builder b = bld);
+aco::Temp ext_ushort(aco::Temp src, unsigned idx, aco::Builder b = bld);
+aco::Temp ext_ubyte(aco::Temp src, unsigned idx, aco::Builder b = bld);
+void emit_divergent_if_else(aco::Program* prog, aco::Builder& b, aco::Operand cond,
+                            std::function<void()> then, std::function<void()> els);
 
 /* vulkan helpers */
 VkDevice get_vk_device(enum amd_gfx_level gfx_level);
 VkDevice get_vk_device(enum radeon_family family);
 
 void print_pipeline_ir(VkDevice device, VkPipeline pipeline, VkShaderStageFlagBits stages,
-                       const char *name, bool remove_encoding=false);
+                       const char* name, bool remove_encoding = false);
 
-VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfo *info);
+VkShaderModule __qoCreateShaderModule(VkDevice dev, const QoShaderModuleCreateInfoinfo);
 
 class PipelineBuilder {
 public:
@@ -152,19 +153,21 @@ public:
    ~PipelineBuilder();
 
    PipelineBuilder(const PipelineBuilder&) = delete;
-   PipelineBuilder& operator = (const PipelineBuilder&) = delete;
+   PipelineBuilder& operator=(const PipelineBuilder&) = delete;
 
-   void add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout,
-                         uint32_t binding, VkDescriptorType type, uint32_t count=1);
+   void add_desc_binding(VkShaderStageFlags stage_flags, uint32_t layout, uint32_t binding,
+                         VkDescriptorType type, uint32_t count = 1);
 
-   void add_vertex_binding(uint32_t binding, uint32_t stride, VkVertexInputRate rate=VK_VERTEX_INPUT_RATE_VERTEX);
+   void add_vertex_binding(uint32_t binding, uint32_t stride,
+                           VkVertexInputRate rate = VK_VERTEX_INPUT_RATE_VERTEX);
    void add_vertex_attribute(uint32_t location, uint32_t binding, VkFormat format, uint32_t offset);
 
-   void add_resource_decls(QoShaderModuleCreateInfo *module);
-   void add_io_decls(QoShaderModuleCreateInfo *module);
+   void add_resource_decls(QoShaderModuleCreateInfomodule);
+   void add_io_decls(QoShaderModuleCreateInfomodule);
 
-   void add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char *name="main");
-   void add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module, const char *name="main");
+   void add_stage(VkShaderStageFlagBits stage, VkShaderModule module, const char* name = "main");
+   void add_stage(VkShaderStageFlagBits stage, QoShaderModuleCreateInfo module,
+                  const char* name = "main");
    void add_vsfs(VkShaderModule vs, VkShaderModule fs);
    void add_vsfs(QoShaderModuleCreateInfo vs, QoShaderModuleCreateInfo fs);
    void add_cs(VkShaderModule cs);
@@ -174,7 +177,8 @@ public:
 
    void create_pipeline();
 
-   void print_ir(VkShaderStageFlagBits stages, const char *name, bool remove_encoding=false);
+   void print_ir(VkShaderStageFlagBits stages, const char* name, bool remove_encoding = false);
+
 private:
    void create_compute_pipeline();
    void create_graphics_pipeline();
index 8f5e8ea..a714d3a 100644 (file)
  * IN THE SOFTWARE.
  *
  */
+#include "aco_ir.h"
+
+#include <llvm-c/Target.h>
+
+#include "framework.h"
+#include <getopt.h>
 #include <map>
 #include <set>
-#include <string>
-#include <vector>
+#include <stdarg.h>
 #include <stdio.h>
 #include <string.h>
-#include <getopt.h>
+#include <string>
 #include <unistd.h>
-#include <stdarg.h>
-#include <llvm-c/Target.h>
-#include "aco_ir.h"
-#include "framework.h"
+#include <vector>
 
-static const char *help_message =
+static const charhelp_message =
    "Usage: %s [-h] [-l --list] [--no-check] [TEST [TEST ...]]\n"
    "\n"
    "Run ACO unit test(s). If TEST is not provided, all tests are run.\n"
@@ -50,26 +52,27 @@ static const char *help_message =
    "  --no-check  Print test output instead of checking it.\n";
 
 std::map<std::string, TestDef> tests;
-FILE *output = NULL;
+FILEoutput = NULL;
 
 static TestDef current_test;
 static unsigned tests_written = 0;
-static FILE *checker_stdin = NULL;
-static char *checker_stdin_data = NULL;
+static FILEchecker_stdin = NULL;
+static charchecker_stdin_data = NULL;
 static size_t checker_stdin_size = 0;
 
-static char *output_data = NULL;
+static charoutput_data = NULL;
 static size_t output_size = 0;
 static size_t output_offset = 0;
 
 static char current_variant[64] = {0};
-static std::set<std::string> *variant_filter = NULL;
+static std::set<std::string>variant_filter = NULL;
 
 bool test_failed = false;
 bool test_skipped = false;
 static char fail_message[256] = {0};
 
-void write_test()
+void
+write_test()
 {
    if (!checker_stdin) {
       /* not entirely correct, but shouldn't matter */
@@ -81,18 +84,18 @@ void write_test()
    if (output_offset == output_size && !test_skipped && !test_failed)
       return;
 
-   char *data = output_data + output_offset;
+   chardata = output_data + output_offset;
    uint32_t size = output_size - output_offset;
 
    fwrite("test", 1, 4, checker_stdin);
-   fwrite(current_test.name, 1, strlen(current_test.name)+1, checker_stdin);
-   fwrite(current_variant, 1, strlen(current_variant)+1, checker_stdin);
-   fwrite(current_test.source_file, 1, strlen(current_test.source_file)+1, checker_stdin);
+   fwrite(current_test.name, 1, strlen(current_test.name) + 1, checker_stdin);
+   fwrite(current_variant, 1, strlen(current_variant) + 1, checker_stdin);
+   fwrite(current_test.source_file, 1, strlen(current_test.source_file) + 1, checker_stdin);
    if (test_failed || test_skipped) {
-      const char *res = test_failed ? "failed" : "skipped";
+      const charres = test_failed ? "failed" : "skipped";
       fwrite("\x01", 1, 1, checker_stdin);
-      fwrite(res, 1, strlen(res)+1, checker_stdin);
-      fwrite(fail_message, 1, strlen(fail_message)+1, checker_stdin);
+      fwrite(res, 1, strlen(res) + 1, checker_stdin);
+      fwrite(fail_message, 1, strlen(fail_message) + 1, checker_stdin);
    } else {
       fwrite("\x00", 1, 1, checker_stdin);
    }
@@ -103,7 +106,8 @@ void write_test()
    output_offset += size;
 }
 
-bool set_variant(const char *name)
+bool
+set_variant(const char* name)
 {
    if (variant_filter && !variant_filter->count(name))
       return false;
@@ -118,7 +122,8 @@ bool set_variant(const char *name)
    return true;
 }
 
-void fail_test(const char *fmt, ...)
+void
+fail_test(const char* fmt, ...)
 {
    va_list args;
    va_start(args, fmt);
@@ -129,7 +134,8 @@ void fail_test(const char *fmt, ...)
    va_end(args);
 }
 
-void skip_test(const char *fmt, ...)
+void
+skip_test(const char* fmt, ...)
 {
    va_list args;
    va_start(args, fmt);
@@ -140,7 +146,8 @@ void skip_test(const char *fmt, ...)
    va_end(args);
 }
 
-void run_test(TestDef def)
+void
+run_test(TestDef def)
 {
    current_test = def;
    output_data = NULL;
@@ -163,7 +170,8 @@ void run_test(TestDef def)
    free(output_data);
 }
 
-int check_output(char **argv)
+int
+check_output(char** argv)
 {
    fflush(stdout);
    fflush(stderr);
@@ -183,7 +191,8 @@ int check_output(char **argv)
       close(stdin_pipe[0]);
       close(stdin_pipe[1]);
 
-      execlp(ACO_TEST_PYTHON_BIN, ACO_TEST_PYTHON_BIN, ACO_TEST_SOURCE_DIR "/check_output.py", NULL);
+      execlp(ACO_TEST_PYTHON_BIN, ACO_TEST_PYTHON_BIN, ACO_TEST_SOURCE_DIR "/check_output.py",
+             NULL);
       fprintf(stderr, "%s: execlp() failed: %s\n", argv[0], strerror(errno));
       return 99;
    } else {
@@ -197,7 +206,8 @@ int check_output(char **argv)
    }
 }
 
-bool match_test(std::string name, std::string pattern)
+bool
+match_test(std::string name, std::string pattern)
 {
    if (name.length() < pattern.length())
       return false;
@@ -206,33 +216,25 @@ bool match_test(std::string name, std::string pattern)
    return name == pattern;
 }
 
-int main(int argc, char **argv)
+int
+main(int argc, char** argv)
 {
    int print_help = 0;
    int do_list = 0;
    int do_check = 1;
-   const struct option opts[] = {
-      { "help",     no_argument, &print_help, 1 },
-      { "list",     no_argument, &do_list,    1 },
-      { "no-check", no_argument, &do_check,   0 },
-      { NULL,       0,           NULL,        0 }
-   };
+   const struct option opts[] = {{"help", no_argument, &print_help, 1},
+                                 {"list", no_argument, &do_list, 1},
+                                 {"no-check", no_argument, &do_check, 0},
+                                 {NULL, 0, NULL, 0}};
 
    int c;
    while ((c = getopt_long(argc, argv, "hl", opts, NULL)) != -1) {
       switch (c) {
-      case 'h':
-         print_help = 1;
-         break;
-      case 'l':
-         do_list = 1;
-         break;
-      case 0:
-         break;
+      case 'h': print_help = 1; break;
+      case 'l': do_list = 1; break;
+      case 0: break;
       case '?':
-      default:
-         fprintf(stderr, "%s: Invalid argument\n", argv[0]);
-         return 99;
+      default: fprintf(stderr, "%s: Invalid argument\n", argv[0]); return 99;
       }
    }
 
@@ -262,10 +264,10 @@ int main(int argc, char **argv)
    if (do_check)
       checker_stdin = open_memstream(&checker_stdin_data, &checker_stdin_size);
 
-       LLVMInitializeAMDGPUTargetInfo();
-       LLVMInitializeAMDGPUTarget();
-       LLVMInitializeAMDGPUTargetMC();
-       LLVMInitializeAMDGPUDisassembler();
+   LLVMInitializeAMDGPUTargetInfo();
+   LLVMInitializeAMDGPUTarget();
+   LLVMInitializeAMDGPUTargetMC();
+   LLVMInitializeAMDGPUDisassembler();
 
    aco::init();
 
index 5f4aac4..7adddb3 100644 (file)
  * IN THE SOFTWARE.
  *
  */
+#include <llvm/Config/llvm-config.h>
+
 #include "helpers.h"
 #include "sid.h"
 
-#include <llvm/Config/llvm-config.h>
-
 using namespace aco;
 
 BEGIN_TEST(assembler.s_memtime)
@@ -178,7 +178,7 @@ BEGIN_TEST(assembler.long_jump.conditional_backwards)
    finish_assembler_test();
 END_TEST
 
-BEGIN_TEST(assembler.long_jump.3f)
+BEGIN_TEST(assembler.long_jump .3f)
    if (!setup_cs(NULL, (amd_gfx_level)GFX10))
       return;
 
@@ -354,25 +354,31 @@ BEGIN_TEST(assembler.vopc_sdwa)
 
       //~gfx9>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 86860080
       //~gfx10>> v_cmp_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD   ; 7d8300f9 86860080
-      bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(vcc, s2), Operand::zero(), Operand::zero());
+      bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(vcc, s2), Operand::zero(),
+                    Operand::zero());
 
       //~gfx9! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686ac80
       //~gfx10! v_cmp_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d8300f9 8686ac80
-      bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(PhysReg(0x2c), s2), Operand::zero(), Operand::zero());
+      bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(PhysReg(0x2c), s2), Operand::zero(),
+                    Operand::zero());
 
       //~gfx9! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7d9300f9 8686fe80
       //~gfx10! v_cmp_lt_u32_sdwa exec, 0, 0 src0_sel:DWORD src1_sel:DWORD  ; 7d8300f9 8686fe80
-      bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(exec, s2), Operand::zero(), Operand::zero());
+      bld.vopc_sdwa(aco_opcode::v_cmp_lt_u32, Definition(exec, s2), Operand::zero(),
+                    Operand::zero());
 
       if (i == GFX10) {
          //~gfx10! v_cmpx_lt_u32_sdwa 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7da300f9 86860080
-         bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(exec, s2), Operand::zero(), Operand::zero());
+         bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(exec, s2), Operand::zero(),
+                       Operand::zero());
       } else {
          //~gfx9! v_cmpx_lt_u32_sdwa vcc, 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 86860080
-         bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(vcc, s2), Definition(exec, s2), Operand::zero(), Operand::zero());
+         bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(vcc, s2), Definition(exec, s2),
+                       Operand::zero(), Operand::zero());
 
          //~gfx9! v_cmpx_lt_u32_sdwa s[44:45], 0, 0 src0_sel:DWORD src1_sel:DWORD ; 7db300f9 8686ac80
-         bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(PhysReg(0x2c), s2), Definition(exec, s2), Operand::zero(), Operand::zero());
+         bld.vopc_sdwa(aco_opcode::v_cmpx_lt_u32, Definition(PhysReg(0x2c), s2),
+                       Definition(exec, s2), Operand::zero(), Operand::zero());
       }
 
       finish_assembler_test();
@@ -452,48 +458,70 @@ BEGIN_TEST(assembler.gfx11.mubuf)
    bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, true);
 
    //! buffer_load_b32 v42, v10, s[32:35], s30 idxen               ; e0500000 1e882a0a
-   bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, false)->mubuf().idxen = true;
+   bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v1, op_s1, 0, false)->mubuf().idxen =
+      true;
 
    //! buffer_load_b32 v42, v[20:21], s[32:35], s30 idxen offen    ; e0500000 1ec82a14
-   bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v2, op_s1, 0, true)->mubuf().idxen = true;
+   bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, op_v2, op_s1, 0, true)->mubuf().idxen =
+      true;
 
    //! buffer_load_b32 v42, off, s[32:35], s30 offset:84           ; e0500054 1e082a80
    bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), op_s1, 84, false);
 
    /* Various flags */
    //! buffer_load_b32 v42, off, s[32:35], 0 glc                   ; e0504000 80082a80
-   bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().glc = true;
+   bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
+      ->mubuf()
+      .glc = true;
 
    //! buffer_load_b32 v42, off, s[32:35], 0 dlc                   ; e0502000 80082a80
-   bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().dlc = true;
+   bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
+      ->mubuf()
+      .dlc = true;
 
    //! buffer_load_b32 v42, off, s[32:35], 0 slc                   ; e0501000 80082a80
-   bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().slc = true;
+   bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
+      ->mubuf()
+      .slc = true;
 
    //; if llvm_ver >= 16:
    //;    insert_pattern('buffer_load_b32 v[42:43], off, s[32:35], 0 tfe              ; e0500000 80282a80')
    //; else:
    //;    insert_pattern('buffer_load_b32 v42, off, s[32:35], 0 tfe                   ; e0500000 80282a80')
-   bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)->mubuf().tfe = true;
+   bld.mubuf(aco_opcode::buffer_load_dword, dst, op_s4, Operand(v1), Operand::zero(), 0, false)
+      ->mubuf()
+      .tfe = true;
 
    /* LDS */
    //! buffer_load_lds_b32 off, s[32:35], 0                        ; e0c40000 80080080
-   bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
+   bld.mubuf(aco_opcode::buffer_load_dword, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
+      ->mubuf()
+      .lds = true;
 
    //! buffer_load_lds_i8 off, s[32:35], 0                         ; e0b80000 80080080
-   bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
+   bld.mubuf(aco_opcode::buffer_load_sbyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
+      ->mubuf()
+      .lds = true;
 
    //! buffer_load_lds_i16 off, s[32:35], 0                        ; e0c00000 80080080
-   bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
+   bld.mubuf(aco_opcode::buffer_load_sshort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
+      ->mubuf()
+      .lds = true;
 
    //! buffer_load_lds_u8 off, s[32:35], 0                         ; e0b40000 80080080
-   bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
+   bld.mubuf(aco_opcode::buffer_load_ubyte, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
+      ->mubuf()
+      .lds = true;
 
    //! buffer_load_lds_u16 off, s[32:35], 0                        ; e0bc0000 80080080
-   bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
+   bld.mubuf(aco_opcode::buffer_load_ushort, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
+      ->mubuf()
+      .lds = true;
 
    //! buffer_load_lds_format_x off, s[32:35], 0                   ; e0c80000 80080080
-   bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)->mubuf().lds = true;
+   bld.mubuf(aco_opcode::buffer_load_format_x, op_s4, Operand(v1), Operand::zero(), op_m0, 0, false)
+      ->mubuf()
+      .lds = true;
 
    /* Stores */
    //! buffer_store_b32 v10, off, s[32:35], s30                    ; e0680000 1e080a80
@@ -532,42 +560,62 @@ BEGIN_TEST(assembler.gfx11.mtbuf)
 
    /* Addressing */
    //>> tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9900000 1e082a80
-   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 0, false);
+   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 0,
+             false);
 
    //! tbuffer_load_format_x v42, off, s[32:35], 42 format:[BUF_FMT_32_32_FLOAT] ; e9900000 aa082a80
-   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::c32(42), dfmt, nfmt, 0, false);
+   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::c32(42), dfmt,
+             nfmt, 0, false);
 
    //! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9900000 1e482a0a
    bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, true);
 
    //! tbuffer_load_format_x v42, v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen ; e9900000 1e882a0a
-   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, false)->mtbuf().idxen = true;
+   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v1, op_s1, dfmt, nfmt, 0, false)
+      ->mtbuf()
+      .idxen = true;
 
    //! tbuffer_load_format_x v42, v[20:21], s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] idxen offen ; e9900000 1ec82a14
-   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v2, op_s1, dfmt, nfmt, 0, true)->mtbuf().idxen = true;
+   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, op_v2, op_s1, dfmt, nfmt, 0, true)
+      ->mtbuf()
+      .idxen = true;
 
    //! tbuffer_load_format_x v42, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offset:84 ; e9900054 1e082a80
-   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 84, false);
+   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), op_s1, dfmt, nfmt, 84,
+             false);
 
    /* Various flags */
    //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] glc ; e9904000 80082a80
-   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().glc = true;
+   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
+             nfmt, 0, false)
+      ->mtbuf()
+      .glc = true;
 
    //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] dlc ; e9902000 80082a80
-   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().dlc = true;
+   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
+             nfmt, 0, false)
+      ->mtbuf()
+      .dlc = true;
 
    //! tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] slc ; e9901000 80082a80
-   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().slc = true;
+   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
+             nfmt, 0, false)
+      ->mtbuf()
+      .slc = true;
 
    //; if llvm_ver >= 16:
    //;    insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] ; e9900000 80282a80')
    //; else:
    //;    insert_pattern('tbuffer_load_format_x v42, off, s[32:35], 0 format:[BUF_FMT_32_32_FLOAT] tfe ; e9900000 80282a80')
-   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt, nfmt, 0, false)->mtbuf().tfe = true;
+   bld.mtbuf(aco_opcode::tbuffer_load_format_x, dst, op_s4, Operand(v1), Operand::zero(), dfmt,
+             nfmt, 0, false)
+      ->mtbuf()
+      .tfe = true;
 
    /* Stores */
    //! tbuffer_store_format_x v10, off, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] ; e9920000 1e080a80
-   bld.mtbuf(aco_opcode::tbuffer_store_format_x, op_s4, Operand(v1), op_s1, op_v1, dfmt, nfmt, 0, false);
+   bld.mtbuf(aco_opcode::tbuffer_store_format_x, op_s4, Operand(v1), op_s1, op_v1, dfmt, nfmt, 0,
+             false);
 
    //! tbuffer_store_format_xy v[20:21], v10, s[32:35], s30 format:[BUF_FMT_32_32_FLOAT] offen ; e9928000 1e48140a
    bld.mtbuf(aco_opcode::tbuffer_store_format_xy, op_s4, op_v1, op_s1, op_v2, dfmt, nfmt, 0, true);
@@ -604,7 +652,8 @@ BEGIN_TEST(assembler.gfx11.mimg)
    bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1);
 
    //! image_sample v[84:87], v[20:21], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f04 20105414
-   bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v2)->mimg().dim = ac_image_2d;
+   bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v2)->mimg().dim =
+      ac_image_2d;
 
    //! image_sample v42, v10, s[64:71], s[32:35] dmask:0x1 dim:SQ_RSRC_IMG_1D ; f06c0100 20102a0a
    bld.mimg(aco_opcode::image_sample, dst_v1, op_s8, op_s4, Operand(v1), op_v1)->mimg().dmask = 0x1;
@@ -636,14 +685,20 @@ BEGIN_TEST(assembler.gfx11.mimg)
 
    /* NSA */
    //! image_sample v[84:87], [v10, v40], s[64:71], s[32:35] dmask:0xf dim:SQ_RSRC_IMG_2D ; f06c0f05 2010540a 00000028
-   bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1, Operand(bld.tmp(v1), PhysReg(256 + 40)))->mimg().dim = ac_image_2d;
+   bld.mimg(aco_opcode::image_sample, dst_v4, op_s8, op_s4, Operand(v1), op_v1,
+            Operand(bld.tmp(v1), PhysReg(256 + 40)))
+      ->mimg()
+      .dim = ac_image_2d;
 
    /* Stores */
    //! image_store v[30:33], v10, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_1D ; f0180f00 00101e0a
    bld.mimg(aco_opcode::image_store, op_s8, Operand(s4), op_v4, op_v1);
 
    //! image_atomic_add v10, v20, s[64:71] dmask:0xf dim:SQ_RSRC_IMG_2D ; f0300f04 00100a14
-   bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4), op_v1, op_v2)->mimg().dim = ac_image_2d;
+   bld.mimg(aco_opcode::image_atomic_add, Definition(op_v1.physReg(), v1), op_s8, Operand(s4),
+            op_v1, op_v2)
+      ->mimg()
+      .dim = ac_image_2d;
 
    finish_assembler_test();
 END_TEST
@@ -761,13 +816,19 @@ BEGIN_TEST(assembler.gfx11.vinterp)
    bld.vinterp_inreg(aco_opcode::v_interp_p2_f32_inreg, dst, op0, op1, op2, 0);
 
    //! v_interp_p10_f32 v42, -v10, v20, v30                        ; cd00002a 247a290a
-   bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[0] = true;
+   bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
+      ->vinterp_inreg()
+      .neg[0] = true;
 
    //! v_interp_p10_f32 v42, v10, -v20, v30                        ; cd00002a 447a290a
-   bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[1] = true;
+   bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
+      ->vinterp_inreg()
+      .neg[1] = true;
 
    //! v_interp_p10_f32 v42, v10, v20, -v30                        ; cd00002a 847a290a
-   bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().neg[2] = true;
+   bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
+      ->vinterp_inreg()
+      .neg[2] = true;
 
    //! v_interp_p10_f16_f32 v42, v10, v20, v30 op_sel:[1,0,0,0]    ; cd02082a 047a290a
    bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, dst, op0, op1, op2, 0, 0x1);
@@ -782,7 +843,9 @@ BEGIN_TEST(assembler.gfx11.vinterp)
    bld.vinterp_inreg(aco_opcode::v_interp_p2_rtz_f16_f32_inreg, dst, op0, op1, op2, 0, 0x8);
 
    //! v_interp_p10_f32 v42, v10, v20, v30 clamp                   ; cd00802a 047a290a
-   bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)->vinterp_inreg().clamp = true;
+   bld.vinterp_inreg(aco_opcode::v_interp_p10_f32_inreg, dst, op0, op1, op2, 0)
+      ->vinterp_inreg()
+      .clamp = true;
 
    finish_assembler_test();
 END_TEST
@@ -899,16 +962,22 @@ BEGIN_TEST(assembler.gfx11.vop12c_v128)
    bld.vop1_dpp(aco_opcode::v_rcp_f16, dst_v128, op_v1, dpp_row_rr(1))->dpp16().abs[0] = true;
 
    //! v_mul_f16_e64_dpp v128, -v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5350080 200204fa ff1d2101
-   bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().neg[0] = true;
+   bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().neg[0] =
+      true;
 
    //! v_mul_f16_e64_dpp v128, |v1|, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d5350180 000204fa ff2d2101
-   bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().abs[0] = true;
+   bld.vop2_dpp(aco_opcode::v_mul_f16, dst_v128, op_v1, op_v2, dpp_row_rr(1))->dpp16().abs[0] =
+      true;
 
    //! v_cmp_eq_f16_e64_dpp vcc, -v129, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d402006a 200204fa ff1d2181
-   bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))->dpp16().neg[0] = true;
+   bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))
+      ->dpp16()
+      .neg[0] = true;
 
    //! v_cmp_eq_f16_e64_dpp vcc, |v129|, v2 row_ror:1 row_mask:0xf bank_mask:0xf bound_ctrl:1 fi:1 ; d402016a 000204fa ff2d2181
-   bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))->dpp16().abs[0] = true;
+   bld.vopc_dpp(aco_opcode::v_cmp_eq_f16, bld.def(s2, vcc), op_v129, op_v2, dpp_row_rr(1))
+      ->dpp16()
+      .abs[0] = true;
 
    finish_assembler_test();
 END_TEST
index b0cd1dd..ee0299e 100644 (file)
@@ -633,9 +633,10 @@ BEGIN_TEST(d3d11_derivs.nsa_max)
 
       //~gfx11! v4: %_:v[0-3] = image_sample_c_b_o  s8: undef,  s4: undef,  v1: undef, %_:v[6], %_:v[7], %_:v[8], %_:v[3], %_:v[4-5] 2darray da
 
-      Instruction *instr = bld.mimg(aco_opcode::image_sample_c_b_o, Definition(reg_v0, v4),
-                                    Operand(s8), Operand(s4), Operand(v1), Operand(reg_v0, v6.as_linear()),
-                                    Operand(reg_v6, v1), Operand(reg_v7, v1), Operand(reg_v8, v1));
+      Instruction* instr =
+         bld.mimg(aco_opcode::image_sample_c_b_o, Definition(reg_v0, v4), Operand(s8), Operand(s4),
+                  Operand(v1), Operand(reg_v0, v6.as_linear()), Operand(reg_v6, v1),
+                  Operand(reg_v7, v1), Operand(reg_v8, v1));
       instr->mimg().dim = ac_image_2darray;
       instr->mimg().da = true;
       instr->mimg().strict_wqm = true;
index a9eb4ee..7e61b87 100644 (file)
@@ -26,7 +26,8 @@
 
 using namespace aco;
 
-static void create_mubuf(Temp desc=Temp(0, s8))
+static void
+create_mubuf(Temp desc = Temp(0, s8))
 {
    Operand desc_op(desc);
    desc_op.setFixed(PhysReg(0));
@@ -34,13 +35,15 @@ static void create_mubuf(Temp desc=Temp(0, s8))
              Operand(PhysReg(256), v1), Operand::zero(), 0, false);
 }
 
-static void create_mubuf_store()
+static void
+create_mubuf_store()
 {
    bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4), Operand(PhysReg(256), v1),
              Operand(PhysReg(256), v1), Operand::zero(), 0, false);
 }
 
-static void create_mtbuf(Temp desc=Temp(0, s8))
+static void
+create_mtbuf(Temp desc = Temp(0, s8))
 {
    Operand desc_op(desc);
    desc_op.setFixed(PhysReg(0));
@@ -49,22 +52,25 @@ static void create_mtbuf(Temp desc=Temp(0, s8))
              V_008F0C_BUF_NUM_FORMAT_FLOAT, 0, false);
 }
 
-static void create_flat()
+static void
+create_flat()
 {
-   bld.flat(aco_opcode::flat_load_dword, Definition(PhysReg(256), v1),
-             Operand(PhysReg(256), v2), Operand(s2));
+   bld.flat(aco_opcode::flat_load_dword, Definition(PhysReg(256), v1), Operand(PhysReg(256), v2),
+            Operand(s2));
 }
 
-static void create_global()
+static void
+create_global()
 {
    bld.global(aco_opcode::global_load_dword, Definition(PhysReg(256), v1),
               Operand(PhysReg(256), v2), Operand(s2));
 }
 
-static void create_mimg(bool nsa, Temp desc=Temp(0, s8))
+static void
+create_mimg(bool nsa, Temp desc = Temp(0, s8))
 {
-   aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(
-      aco_opcode::image_sample, Format::MIMG, 5, 1)};
+   aco_ptr<MIMG_instruction> mimg{
+      create_instruction<MIMG_instruction>(aco_opcode::image_sample, Format::MIMG, 5, 1)};
    mimg->definitions[0] = Definition(PhysReg(256), v1);
    mimg->operands[0] = Operand(desc);
    mimg->operands[0].setFixed(PhysReg(0));
@@ -78,13 +84,15 @@ static void create_mimg(bool nsa, Temp desc=Temp(0, s8))
    bld.insert(std::move(mimg));
 }
 
-static void create_smem()
+static void
+create_smem()
 {
    bld.smem(aco_opcode::s_load_dword, Definition(PhysReg(0), s1), Operand(PhysReg(0), s2),
             Operand::zero());
 }
 
-static void create_smem_buffer(Temp desc=Temp(0, s4))
+static void
+create_smem_buffer(Temp desc = Temp(0, s4))
 {
    Operand desc_op(desc);
    desc_op.setFixed(PhysReg(0));
index 8fc3a18..1658e8d 100644 (file)
 
 using namespace aco;
 
-void create_mubuf(unsigned offset, PhysReg dst=PhysReg(256), PhysReg vaddr=PhysReg(256))
+void
+create_mubuf(unsigned offset, PhysReg dst = PhysReg(256), PhysReg vaddr = PhysReg(256))
 {
    bld.mubuf(aco_opcode::buffer_load_dword, Definition(dst, v1), Operand(PhysReg(0), s4),
              Operand(vaddr, v1), Operand::zero(), offset, true);
 }
 
-void create_mubuf_store(PhysReg src=PhysReg(256))
+void
+create_mubuf_store(PhysReg src = PhysReg(256))
 {
-   bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4),
-             Operand(src, v1), Operand::zero(), Operand(src, v1), 0, true);
+   bld.mubuf(aco_opcode::buffer_store_dword, Operand(PhysReg(0), s4), Operand(src, v1),
+             Operand::zero(), Operand(src, v1), 0, true);
 }
 
-void create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
+void
+create_mimg(bool nsa, unsigned addrs, unsigned instr_dwords)
 {
-   aco_ptr<MIMG_instruction> mimg{create_instruction<MIMG_instruction>(
-      aco_opcode::image_sample, Format::MIMG, 3 + addrs, 1)};
+   aco_ptr<MIMG_instruction> mimg{
+      create_instruction<MIMG_instruction>(aco_opcode::image_sample, Format::MIMG, 3 + addrs, 1)};
    mimg->definitions[0] = Definition(PhysReg(256), v1);
    mimg->operands[0] = Operand(PhysReg(0), s8);
    mimg->operands[1] = Operand(PhysReg(0), s4);
@@ -216,7 +219,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
    //! s_waitcnt_depctr vm_vsrc(0)
    //! s1: %0:m0 = s_mov_b32 0
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
-   bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
+   bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
+          Operand(m0, s1));
    bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
 
    //! p_unit_test 5
@@ -224,7 +228,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
    //! s_waitcnt_depctr vm_vsrc(0)
    //! s2: %0:exec = s_mov_b64 -1
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5));
-   bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
+   bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
+          Operand(m0, s1));
    bld.sop1(aco_opcode::s_mov_b64, Definition(exec, s2), Operand::c64(-1));
 
    /* no hazard: LDS */
@@ -232,7 +237,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
    //! v1: %0:v[0] = ds_read_b32 %0:v[0], %0:m0
    //! s1: %0:s[0] = s_mov_b32 0
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6));
-   bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
+   bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
+          Operand(m0, s1));
    bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(0), s1), Operand::zero());
 
    /* no hazard: LDS with VALU in-between */
@@ -241,7 +247,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
    //! v_nop
    //! s1: %0:m0 = s_mov_b32 0
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7));
-   bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
+   bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
+          Operand(m0, s1));
    bld.vop1(aco_opcode::v_nop);
    bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
 
@@ -269,7 +276,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
    //! s_waitcnt lgkmcnt(0)
    //! s1: %0:m0 = s_mov_b32 0
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10));
-   bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
+   bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
+          Operand(m0, s1));
    bld.sopp(aco_opcode::s_waitcnt, -1, 0xc07f);
    bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
 
@@ -300,7 +308,8 @@ BEGIN_TEST(insert_nops.vmem_to_scalar_write)
    //! s_waitcnt_depctr vm_vsrc(0)
    //! s1: %0:m0 = s_mov_b32 0
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13));
-   bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1), Operand(m0, s1));
+   bld.ds(aco_opcode::ds_read_b32, Definition(PhysReg(256), v1), Operand(PhysReg(256), v1),
+          Operand(m0, s1));
    bld.sopp(aco_opcode::s_waitcnt, -1, 0x3f70);
    bld.sop1(aco_opcode::s_mov_b32, Definition(m0, s1), Operand::zero());
 
@@ -932,8 +941,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
    //! s_waitcnt_depctr sa_sdst(0)
    //! s1: %0:s[2] = s_mov_b32 %0:s[1]
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(0));
-   bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
-                Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
+   bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
+                Operand::zero(), Operand(PhysReg(0), s2));
    bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
    bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
 
@@ -944,8 +953,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
    //! s1: %0:s[1] = s_mov_b32 0
    //! s1: %0:s[2] = s_mov_b32 %0:s[1]
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1));
-   bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
-                Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
+   bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
+                Operand::zero(), Operand(PhysReg(0), s2));
    bld.vop1(aco_opcode::v_mov_b32, Definition(PhysReg(257), v1), Operand(PhysReg(1), s1));
    bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
    bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
@@ -957,8 +966,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
    //! s1: %0:s[2] = s_mov_b32 %0:s[1]
    //! s1: %0:s[2] = s_mov_b32 %0:s[1]
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2));
-   bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
-                Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
+   bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
+                Operand::zero(), Operand(PhysReg(0), s2));
    bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
    bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
    bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
@@ -969,8 +978,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
    //! s_waitcnt_depctr sa_sdst(0)
    //! s1: %0:s[2] = s_mov_b32 %0:s[1]
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3));
-   bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
-                Operand::zero(), Operand::zero(), Operand(PhysReg(0), s2));
+   bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand::zero(),
+                Operand::zero(), Operand(PhysReg(0), s2));
    bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
    bld.sopp(aco_opcode::s_waitcnt_depctr, -1, 0xfffe);
    bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
@@ -982,8 +991,8 @@ BEGIN_TEST(insert_nops.valu_mask_write)
    //! s_waitcnt_depctr sa_sdst(0)
    //! s1: %0:s[2] = s_mov_b32 %0:s[1]
    bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4));
-   bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1),
-                Operand(PhysReg(2), s1), Operand::zero(), Operand(PhysReg(0), s2));
+   bld.vop2_e64(aco_opcode::v_cndmask_b32, Definition(PhysReg(256), v1), Operand(PhysReg(2), s1),
+                Operand::zero(), Operand(PhysReg(0), s2));
    bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(1), s1), Operand::zero());
    bld.sop1(aco_opcode::s_mov_b32, Definition(PhysReg(2), s1), Operand(PhysReg(1), s1));
 
index b6c2e83..86d8184 100644 (file)
@@ -36,15 +36,14 @@ BEGIN_TEST(insert_waitcnt.ds_ordered_count)
    Operand chan_counter(PhysReg(260), v1);
    Operand m(m0, s1);
 
-   Instruction *ds_instr;
+   Instructionds_instr;
    //>> ds_ordered_count %0:v[0], %0:v[3], %0:m0 offset0:3072 gds storage:gds semantics:volatile
    //! s_waitcnt lgkmcnt(0)
    ds_instr = bld.ds(aco_opcode::ds_ordered_count, def0, gds_base, m, 3072u, 0u, true);
    ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_volatile);
 
    //! ds_add_rtn_u32 %0:v[1], %0:v[3], %0:v[4], %0:m0 gds storage:gds semantics:volatile,atomic,rmw
-   ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, def1,
-                     gds_base, chan_counter, m, 0u, 0u, true);
+   ds_instr = bld.ds(aco_opcode::ds_add_rtn_u32, def1, gds_base, chan_counter, m, 0u, 0u, true);
    ds_instr->ds().sync = memory_sync_info(storage_gds, semantic_atomicrmw);
 
    //! s_waitcnt lgkmcnt(0)
index a266276..82aabb0 100644 (file)
  * IN THE SOFTWARE.
  *
  */
+#include <llvm/Config/llvm-config.h>
+
 #include "helpers.h"
 #include "test_isel-spirv.h"
 
-#include <llvm/Config/llvm-config.h>
-
 using namespace aco;
 
 BEGIN_TEST(isel.interp.simple)
    QoShaderModuleCreateInfo vs = qoShaderModuleCreateInfoGLSL(VERTEX,
       layout(location = 0) in vec4 in_color;
       layout(location = 0) out vec4 out_color;
-      void main() {
-         out_color = in_color;
+      void main() { out_color = in_color;
       }
    );
    QoShaderModuleCreateInfo fs = qoShaderModuleCreateInfoGLSL(FRAGMENT,
index fa385a2..f49ddc5 100644 (file)
@@ -61,7 +61,8 @@ BEGIN_TEST(optimize.neg)
 
       //! v1: %res5 = v_mul_f32 -%a, %b row_shl:1 bound_ctrl:1
       //! p_unit_test 5, %res5
-      writeout(5, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), neg_a, inputs[1], dpp_row_sl(1)));
+      writeout(5,
+               bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), neg_a, inputs[1], dpp_row_sl(1)));
 
       //! v1: %res6 = v_subrev_f32 %a, %b
       //! p_unit_test 6, %res6
@@ -264,7 +265,8 @@ BEGIN_TEST(optimize.output_modifiers)
    finish_opt_test();
 END_TEST
 
-Temp create_subbrev_co(Operand op0, Operand op1, Operand op2)
+Temp
+create_subbrev_co(Operand op0, Operand op1, Operand op2)
 {
    return bld.vop2_e64(aco_opcode::v_subbrev_co_u32, bld.def(v1), bld.def(bld.lm), op0, op1, op2);
 }
@@ -438,7 +440,7 @@ BEGIN_TEST(optimize.bcnt)
 END_TEST
 
 struct clamp_config {
-   const char *name;
+   const charname;
    aco_opcode min, max, med3;
    Operand lb, ub;
 };
@@ -863,7 +865,7 @@ enum denorm_op {
    denorm_fnegabs = 3,
 };
 
-static const char *denorm_op_names[] = {
+static const chardenorm_op_names[] = {
    "mul1",
    "fneg",
    "fabs",
@@ -877,31 +879,27 @@ struct denorm_config {
    aco_opcode dest;
 };
 
-static const char *srcdest_op_name(aco_opcode op)
+static const char*
+srcdest_op_name(aco_opcode op)
 {
    switch (op) {
-   case aco_opcode::v_cndmask_b32:
-      return "cndmask";
-   case aco_opcode::v_min_f32:
-      return "min";
-   case aco_opcode::v_rcp_f32:
-      return "rcp";
-   default:
-      return "none";
+   case aco_opcode::v_cndmask_b32: return "cndmask";
+   case aco_opcode::v_min_f32: return "min";
+   case aco_opcode::v_rcp_f32: return "rcp";
+   default: return "none";
    }
 }
 
-static Temp emit_denorm_srcdest(aco_opcode op, Temp val)
+static Temp
+emit_denorm_srcdest(aco_opcode op, Temp val)
 {
    switch (op) {
    case aco_opcode::v_cndmask_b32:
       return bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1), Operand::zero(), val, inputs[1]);
    case aco_opcode::v_min_f32:
       return bld.vop2(aco_opcode::v_min_f32, bld.def(v1), Operand::zero(), val);
-   case aco_opcode::v_rcp_f32:
-      return bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), val);
-   default:
-      return val;
+   case aco_opcode::v_rcp_f32: return bld.vop1(aco_opcode::v_rcp_f32, bld.def(v1), val);
+   default: return val;
    }
 }
 
@@ -917,7 +915,8 @@ BEGIN_TEST(optimize.denorm_propagation)
                configs.push_back({flush, op, aco_opcode::num_opcodes, dest});
          }
 
-         for (aco_opcode src : {aco_opcode::v_cndmask_b32, aco_opcode::v_min_f32, aco_opcode::v_rcp_f32}) {
+         for (aco_opcode src :
+              {aco_opcode::v_cndmask_b32, aco_opcode::v_min_f32, aco_opcode::v_rcp_f32}) {
             for (denorm_op op : {denorm_mul1, denorm_fneg, denorm_fabs, denorm_fnegabs})
                configs.push_back({flush, op, src, aco_opcode::num_opcodes});
          }
@@ -925,18 +924,18 @@ BEGIN_TEST(optimize.denorm_propagation)
 
       for (denorm_config cfg : configs) {
          char subvariant[128];
-         sprintf(subvariant, "_%s_%s_%s_%s",
-                 cfg.flush ? "flush" : "keep", srcdest_op_name(cfg.src),
+         sprintf(subvariant, "_%s_%s_%s_%s", cfg.flush ? "flush" : "keep", srcdest_op_name(cfg.src),
                  denorm_op_names[(int)cfg.op], srcdest_op_name(cfg.dest));
          if (!setup_cs("v1 s2", (amd_gfx_level)i, CHIP_UNKNOWN, subvariant))
             continue;
 
-         bool can_propagate = cfg.src == aco_opcode::v_rcp_f32 || (i >= GFX9 && cfg.src == aco_opcode::v_min_f32) ||
-                              cfg.dest == aco_opcode::v_rcp_f32 || (i >= GFX9 && cfg.dest == aco_opcode::v_min_f32) ||
-                              !cfg.flush;
+         bool can_propagate = cfg.src == aco_opcode::v_rcp_f32 ||
+                              (i >= GFX9 && cfg.src == aco_opcode::v_min_f32) ||
+                              cfg.dest == aco_opcode::v_rcp_f32 ||
+                              (i >= GFX9 && cfg.dest == aco_opcode::v_min_f32) || !cfg.flush;
 
-         fprintf(output, "src, dest, op: %s %s %s\n",
-                 srcdest_op_name(cfg.src), srcdest_op_name(cfg.dest), denorm_op_names[(int)cfg.op]);
+         fprintf(output, "src, dest, op: %s %s %s\n", srcdest_op_name(cfg.src),
+                 srcdest_op_name(cfg.dest), denorm_op_names[(int)cfg.op]);
          fprintf(output, "can_propagate: %u\n", can_propagate);
          //! src, dest, op: $src $dest $op
          //! can_propagate: #can_propagate
@@ -976,15 +975,9 @@ BEGIN_TEST(optimize.denorm_propagation)
          case denorm_mul1:
             val = bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x3f800000u), val);
             break;
-         case denorm_fneg:
-            val = fneg(val);
-            break;
-         case denorm_fabs:
-            val = fabs(val);
-            break;
-         case denorm_fnegabs:
-            val = fneg(fabs(val));
-            break;
+         case denorm_fneg: val = fneg(val); break;
+         case denorm_fabs: val = fabs(val); break;
+         case denorm_fnegabs: val = fneg(fabs(val)); break;
          }
          val = emit_denorm_srcdest(cfg.dest, val);
          writeout(
@@ -1123,13 +1116,15 @@ BEGIN_TEST(optimize.dpp_prop)
    //! v1: %res2 = v_mul_f32 0x12345678, %a
    //! p_unit_test 2, %res2
    Temp literal1 = bld.copy(bld.def(v1), Operand::c32(0x12345678u));
-   writeout(2, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), literal1, inputs[0], dpp_row_sl(1)));
+   writeout(2,
+            bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), literal1, inputs[0], dpp_row_sl(1)));
 
    //! v1: %literal2 = p_parallelcopy 0x12345679
    //! v1: %res3 = v_mul_f32 %a, %literal row_shl:1 bound_ctrl:1
    //! p_unit_test 3, %res3
    Temp literal2 = bld.copy(bld.def(v1), Operand::c32(0x12345679u));
-   writeout(3, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], literal2, dpp_row_sl(1)));
+   writeout(3,
+            bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], literal2, dpp_row_sl(1)));
 
    //! v1: %b_v = p_parallelcopy %b
    //! v1: %res4 = v_mul_f32 %b, %a
@@ -1171,7 +1166,9 @@ BEGIN_TEST(optimize.casts)
    //! v1: %res2_tmp = v_mul_f32 -1.0, %a16
    //! v2b: %res2 = v_mul_f16 %res2_tmp, %a16
    //! p_unit_test 2, %res2
-   writeout(2, fmul(u2u16(bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0xbf800000u), bld.as_uniform(a16))), a16));
+   writeout(2, fmul(u2u16(bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1),
+                                       Operand::c32(0xbf800000u), bld.as_uniform(a16))),
+                    a16));
 
    //! v1: %res3_tmp = v_mul_f32 %a, %a
    //! v2b: %res3 = v_add_f16 %res3_tmp, 0 clamp
@@ -1191,7 +1188,8 @@ BEGIN_TEST(optimize.casts)
    //! v2b: %res6_tmp = v_mul_f16 %a16, %a16
    //! v1: %res6 = v_mul_f32 2.0, %res6_tmp
    //! p_unit_test 6, %res6
-   writeout(6, fmul(bld.as_uniform(fmul(a16, a16)), bld.copy(bld.def(v1), Operand::c32(0x40000000))));
+   writeout(6,
+            fmul(bld.as_uniform(fmul(a16, a16)), bld.copy(bld.def(v1), Operand::c32(0x40000000))));
 
    //! v1: %res7_tmp = v_mul_f32 %a, %a
    //! v2b: %res7 = v_add_f16 %res7_tmp, %a16
@@ -1211,7 +1209,8 @@ BEGIN_TEST(optimize.casts)
    //! v2b: %res10_tmp = v_mul_f16 %a16, %a16
    //! v1: %res10 = v_mul_f32 -1.0, %res10_tmp
    //! p_unit_test 10, %res10
-   writeout(10, bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0xbf800000u), bld.as_uniform(fmul(a16, a16))));
+   writeout(10, bld.vop2_e64(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0xbf800000u),
+                             bld.as_uniform(fmul(a16, a16))));
 
    finish_opt_test();
 END_TEST
@@ -1549,7 +1548,8 @@ BEGIN_TEST(optimize.mad_mix.fma.basic)
       //! v1: %res2_mul = v_fma_mix_f32 lo(%a16), %b, -0
       //! v1: %res2 = v_add_f32 %res2_mul, %c *2
       //! p_unit_test 2, %res2
-      writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000), fadd(fmul(f2f32(a16), b), c)));
+      writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), Operand::c32(0x40000000),
+                           fadd(fmul(f2f32(a16), b), c)));
 
       /* neg/abs modifiers */
       //! v1: %res3 = v_fma_mix_f32 -lo(%a16), %b, |lo(%c16)|
@@ -1730,7 +1730,8 @@ BEGIN_TEST(optimize.mad_mix.cast)
    }
 END_TEST
 
-static void vop3p_constant(unsigned *idx, aco_opcode op, const char *swizzle, uint32_t val)
+static void
+vop3p_constant(unsigned* idx, aco_opcode op, const char* swizzle, uint32_t val)
 {
    uint32_t halves[2] = {val & 0xffff, val >> 16};
    uint32_t expected = halves[swizzle[0] - 'x'] | (halves[swizzle[1] - 'x'] << 16);
@@ -1744,7 +1745,7 @@ static void vop3p_constant(unsigned *idx, aco_opcode op, const char *swizzle, ui
 
 BEGIN_TEST(optimize.vop3p_constants)
    for (aco_opcode op : {aco_opcode::v_pk_add_f16, aco_opcode::v_pk_add_u16}) {
-      for (const char *swizzle : {"xx", "yy", "xy", "yx"}) {
+      for (const charswizzle : {"xx", "yy", "xy", "yx"}) {
          char variant[16];
          strcpy(variant, op == aco_opcode::v_pk_add_f16 ? "_f16" : "_u16");
          strcat(variant, "_");
index d6cc320..8913397 100644 (file)
 using namespace aco;
 
 BEGIN_TEST(optimizer_postRA.vcmp)
-    PhysReg reg_v0(256);
-    PhysReg reg_s0(0);
-    PhysReg reg_s2(2);
-    PhysReg reg_s4(4);
-
-    //>> v1: %a:v[0] = p_startpgm
-    ASSERTED bool setup_ok = setup_cs("v1", GFX8);
-    assert(setup_ok);
-
-    auto &startpgm = bld.instructions->at(0);
-    assert(startpgm->opcode == aco_opcode::p_startpgm);
-    startpgm->definitions[0].setFixed(reg_v0);
-
-    Temp v_in = inputs[0];
-
-    {
-        /* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */
-
-        //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
-        //! s2: %e:s[2-3] = p_cbranch_z %b:vcc
-        //! p_unit_test 0, %e:s[2-3]
-        auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
-                             Operand(v_in, reg_v0));
-        auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
-        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
-        writeout(0, Operand(br, reg_s2));
-    }
-
-    //; del b, e
-
-    {
-        /* When VCC is overwritten inbetween, don't optimize. */
-
-        //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
-        //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
-        //! s2: %f:vcc = s_mov_b64 0
-        //! s2: %e:s[2-3] = p_cbranch_z %d:scc
-        //! p_unit_test 1, %e:s[2-3], %f:vcc
-        auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
-                             Operand(v_in, reg_v0));
-        auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
-        auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero());
-        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
-        writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
-    }
-
-    //; del b, c, d, e, f
-
-    {
-        /* When part of VCC is overwritten inbetween, don't optimize. */
-
-        //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
-        //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
-        //! s1: %f:s[107] = s_mov_b32 0
-        //! s2: %e:s[2-3] = p_cbranch_z %d:scc
-        //! p_unit_test 1, %e:s[2-3], %f:vcc
-        auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
-                             Operand(v_in, reg_v0));
-        auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
-        auto ovrwr = bld.sop1(aco_opcode::s_mov_b32, bld.def(s1, vcc_hi), Operand::zero());
-        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
-        writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
-    }
-
-    //; del b, c, d, e, f
-
-    {
-        /* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */
-
-        //! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0]
-        //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec
-        //! s2: %e:s[2-3] = p_cbranch_z %d:scc
-        //! p_unit_test 2, %e:s[2-3]
-        auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(),
-                                 Operand(v_in, reg_v0));
-        auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(vcmp, reg_s4), Operand(exec, bld.lm));
-        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
-        writeout(2, Operand(br, reg_s2));
-    }
-
-    //; del b, c, d, e
-
-    {
-        /* When the VCC isn't written by VOPC, don't optimize */
-
-        //! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5]
-        //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
-        //! s2: %e:s[2-3] = p_cbranch_z %d:scc
-        //! p_unit_test 2, %e:s[2-3]
-        auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc),
-                             Operand::c32(1u), Operand(reg_s4, bld.lm));
-        auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), Operand(salu, vcc), Operand(exec, bld.lm));
-        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
-        writeout(2, Operand(br, reg_s2));
-    }
-
-    //; del b, c, d, e, f, x
-
-    {
-        /* When EXEC is overwritten inbetween, don't optimize. */
-
-        //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
-        //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
-        //! s2: %f:exec = s_mov_b64 42
-        //! s2: %e:s[2-3] = p_cbranch_z %d:scc
-        //! p_unit_test 4, %e:s[2-3], %f:exec
-        auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
-                             Operand(v_in, reg_v0));
-        auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp), Operand(exec, bld.lm));
-        auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u));
-        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
-        writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec));
-    }
-
-    //; del b, c, d, e, f, x
-
-    finish_optimizer_postRA_test();
+   PhysReg reg_v0(256);
+   PhysReg reg_s0(0);
+   PhysReg reg_s2(2);
+   PhysReg reg_s4(4);
+
+   //>> v1: %a:v[0] = p_startpgm
+   ASSERTED bool setup_ok = setup_cs("v1", GFX8);
+   assert(setup_ok);
+
+   auto& startpgm = bld.instructions->at(0);
+   assert(startpgm->opcode == aco_opcode::p_startpgm);
+   startpgm->definitions[0].setFixed(reg_v0);
+
+   Temp v_in = inputs[0];
+
+   {
+      /* Recognize when the result of VOPC goes to VCC, and use that for the branching then. */
+
+      //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
+      //! s2: %e:s[2-3] = p_cbranch_z %b:vcc
+      //! p_unit_test 0, %e:s[2-3]
+      auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
+                           Operand(v_in, reg_v0));
+      auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
+                           Operand(exec, bld.lm));
+      auto br =
+         bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
+      writeout(0, Operand(br, reg_s2));
+   }
+
+   //; del b, e
+
+   {
+      /* When VCC is overwritten inbetween, don't optimize. */
+
+      //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
+      //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
+      //! s2: %f:vcc = s_mov_b64 0
+      //! s2: %e:s[2-3] = p_cbranch_z %d:scc
+      //! p_unit_test 1, %e:s[2-3], %f:vcc
+      auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
+                           Operand(v_in, reg_v0));
+      auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
+                           Operand(exec, bld.lm));
+      auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, vcc), Operand::zero());
+      auto br =
+         bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
+      writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
+   }
+
+   //; del b, c, d, e, f
+
+   {
+      /* When part of VCC is overwritten inbetween, don't optimize. */
+
+      //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
+      //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
+      //! s1: %f:s[107] = s_mov_b32 0
+      //! s2: %e:s[2-3] = p_cbranch_z %d:scc
+      //! p_unit_test 1, %e:s[2-3], %f:vcc
+      auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
+                           Operand(v_in, reg_v0));
+      auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
+                           Operand(exec, bld.lm));
+      auto ovrwr = bld.sop1(aco_opcode::s_mov_b32, bld.def(s1, vcc_hi), Operand::zero());
+      auto br =
+         bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
+      writeout(1, Operand(br, reg_s2), Operand(ovrwr, vcc));
+   }
+
+   //; del b, c, d, e, f
+
+   {
+      /* When the result of VOPC goes to an SGPR pair other than VCC, don't optimize */
+
+      //! s2: %b:s[4-5] = v_cmp_eq_u32 0, %a:v[0]
+      //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:s[4-5], %x:exec
+      //! s2: %e:s[2-3] = p_cbranch_z %d:scc
+      //! p_unit_test 2, %e:s[2-3]
+      auto vcmp = bld.vopc_e64(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, reg_s4), Operand::zero(),
+                               Operand(v_in, reg_v0));
+      auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc),
+                           Operand(vcmp, reg_s4), Operand(exec, bld.lm));
+      auto br =
+         bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
+      writeout(2, Operand(br, reg_s2));
+   }
+
+   //; del b, c, d, e
+
+   {
+      /* When the VCC isn't written by VOPC, don't optimize */
+
+      //! s2: %b:vcc, s1: %f:scc = s_or_b64 1, %0:s[4-5]
+      //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
+      //! s2: %e:s[2-3] = p_cbranch_z %d:scc
+      //! p_unit_test 2, %e:s[2-3]
+      auto salu = bld.sop2(Builder::s_or, bld.def(bld.lm, vcc), bld.def(s1, scc), Operand::c32(1u),
+                           Operand(reg_s4, bld.lm));
+      auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc),
+                           Operand(salu, vcc), Operand(exec, bld.lm));
+      auto br =
+         bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
+      writeout(2, Operand(br, reg_s2));
+   }
+
+   //; del b, c, d, e, f, x
+
+   {
+      /* When EXEC is overwritten inbetween, don't optimize. */
+
+      //! s2: %b:vcc = v_cmp_eq_u32 0, %a:v[0]
+      //! s2: %c:s[0-1], s1: %d:scc = s_and_b64 %b:vcc, %x:exec
+      //! s2: %f:exec = s_mov_b64 42
+      //! s2: %e:s[2-3] = p_cbranch_z %d:scc
+      //! p_unit_test 4, %e:s[2-3], %f:exec
+      auto vcmp = bld.vopc(aco_opcode::v_cmp_eq_u32, bld.def(bld.lm, vcc), Operand::zero(),
+                           Operand(v_in, reg_v0));
+      auto sand = bld.sop2(Builder::s_and, bld.def(bld.lm, reg_s0), bld.def(s1, scc), bld.vcc(vcmp),
+                           Operand(exec, bld.lm));
+      auto ovrwr = bld.sop1(Builder::s_mov, bld.def(bld.lm, exec), Operand::c32(42u));
+      auto br =
+         bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, reg_s2), bld.scc(sand.def(1).getTemp()));
+      writeout(4, Operand(br, reg_s2), Operand(ovrwr, exec));
+   }
+
+   //; del b, c, d, e, f, x
+
+   finish_optimizer_postRA_test();
 END_TEST
 
 BEGIN_TEST(optimizer_postRA.scc_nocmp_opt)
-    //>> s1: %a, s2: %y, s1: %z = p_startpgm
-    ASSERTED bool setup_ok = setup_cs("s1 s2 s1", GFX6);
-    assert(setup_ok);
-
-    PhysReg reg_s0{0};
-    PhysReg reg_s2{2};
-    PhysReg reg_s3{3};
-    PhysReg reg_s4{4};
-    PhysReg reg_s6{6};
-    PhysReg reg_s8{8};
-
-    Temp in_0 = inputs[0];
-    Temp in_1 = inputs[1];
-    Temp in_2 = inputs[2];
-    Operand op_in_0(in_0);
-    op_in_0.setFixed(reg_s0);
-    Operand op_in_1(in_1);
-    op_in_1.setFixed(reg_s4);
-    Operand op_in_2(in_2);
-    op_in_2.setFixed(reg_s6);
-
-    {
-        //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
-        //! s2: %f:vcc = p_cbranch_nz %e:scc
-        //! p_unit_test 0, %f:vcc
-        auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
-                             Operand::c32(0x40018u));
-        auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
-                             Operand::zero());
-        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
-        writeout(0, Operand(br, vcc));
-    }
-
-    //; del d, e, f
-
-    {
-        //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
-        //! s2: %f:vcc = p_cbranch_z %e:scc
-        //! p_unit_test 1, %f:vcc
-        auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
-                             Operand::c32(0x40018u));
-        auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
-                             Operand::zero());
-        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
-        writeout(1, Operand(br, vcc));
-    }
-
-    //; del d, e, f
-
-    {
-        //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
-        //! s2: %f:vcc = p_cbranch_z %e:scc
-        //! p_unit_test 2, %f:vcc
-        auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
-                             Operand::c32(0x40018u));
-        auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
-                             Operand::zero());
-        auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
-        writeout(2, Operand(br, vcc));
-    }
-
-    //; del d, e, f
-
-    {
-        //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
-        //! s2: %f:vcc = p_cbranch_nz %e:scc
-        //! p_unit_test 3, %f:vcc
-        auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
-                             Operand::c32(0x40018u));
-        auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
-                             Operand::zero());
-        auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
-        writeout(3, Operand(br, vcc));
-    }
-
-    //; del d, e, f
-
-    {
-        //! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345
-        //! s2: %f:vcc = p_cbranch_z %e:scc
-        //! p_unit_test 4, %f:vcc
-        auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1,
-                             Operand::c32(0x12345u));
-        auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2),
-                             Operand::zero(8));
-        auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
-        writeout(4, Operand(br, vcc));
-    }
-
-    //; del d, e, f
-
-    {
-        /* SCC is overwritten in between, don't optimize */
-
-        //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
-        //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
-        //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
-        //! s2: %f:vcc = p_cbranch_z %g:scc
-        //! p_unit_test 5, %f:vcc, %h:s[3]
-        auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
-                             Operand::c32(0x40018u));
-        auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
-                             Operand::c32(1u));
-        auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
-                             Operand::zero());
-        auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
-        writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
-    }
-
-    //; del d, e, f, g, h, x
-
-    {
-       /* SCC is overwritten in between, optimize by pulling down */
-
-       //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
-       //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
-       //! s2: %f:vcc = p_cbranch_z %g:scc
-       //! p_unit_test 5, %f:vcc, %h:s[3]
-       auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
-                            Operand::c32(0x40018u));
-       auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
-                            Operand::c32(1u));
-       auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
-                            Operand::zero());
-       auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
-       writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
-    }
-
-    //; del d, e, f, g, h, x
-
-    {
-       /* SCC is overwritten in between, optimize by pulling down */
-
-       //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
-       //! s2: %d:s[8-9], s1: %e:scc = s_and_b64 %b:s[4-5], 0x40018
-       //! s2: %f:vcc = p_cbranch_z %g:scc
-       //! p_unit_test 5, %f:vcc, %h:s[3]
-       auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), op_in_1,
-                            Operand::c32(0x40018u));
-       auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
-                            Operand::c32(1u));
-       auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s8),
-                            Operand::zero());
-       auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
-       writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
-    }
-
-    //; del d, e, f, g, h, x
-
-    {
-        //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
-        //! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc
-        //! p_unit_test 6, %f:s[4]
-        auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
-                             Operand::c32(0x40018u));
-        auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
-                             Operand::zero());
-        auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));
-        writeout(6, Operand(br, reg_s4));
-    }
-
-    //; del d, e, f
-
-    {
-        /* SCC is overwritten in between, don't optimize */
-
-        //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
-        //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
-        //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
-        //! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc
-        //! p_unit_test 7, %f:s[4], %h:s[3]
-        auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
-                             Operand::c32(0x40018u));
-        auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
-                             Operand::c32(1u));
-        auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
-                             Operand::zero());
-        auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0), Operand(op_in_2), bld.scc(scmp));
-        writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3));
-    }
-
-    //; del d, e, f, g, h, x
-
-    finish_optimizer_postRA_test();
+   //>> s1: %a, s2: %y, s1: %z = p_startpgm
+   ASSERTED bool setup_ok = setup_cs("s1 s2 s1", GFX6);
+   assert(setup_ok);
+
+   PhysReg reg_s0{0};
+   PhysReg reg_s2{2};
+   PhysReg reg_s3{3};
+   PhysReg reg_s4{4};
+   PhysReg reg_s6{6};
+   PhysReg reg_s8{8};
+
+   Temp in_0 = inputs[0];
+   Temp in_1 = inputs[1];
+   Temp in_2 = inputs[2];
+   Operand op_in_0(in_0);
+   op_in_0.setFixed(reg_s0);
+   Operand op_in_1(in_1);
+   op_in_1.setFixed(reg_s4);
+   Operand op_in_2(in_2);
+   op_in_2.setFixed(reg_s6);
+
+   {
+      //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
+      //! s2: %f:vcc = p_cbranch_nz %e:scc
+      //! p_unit_test 0, %f:vcc
+      auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
+                           Operand::c32(0x40018u));
+      auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
+                           Operand::zero());
+      auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
+      writeout(0, Operand(br, vcc));
+   }
+
+   //; del d, e, f
+
+   {
+      //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
+      //! s2: %f:vcc = p_cbranch_z %e:scc
+      //! p_unit_test 1, %f:vcc
+      auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
+                           Operand::c32(0x40018u));
+      auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
+                           Operand::zero());
+      auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
+      writeout(1, Operand(br, vcc));
+   }
+
+   //; del d, e, f
+
+   {
+      //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
+      //! s2: %f:vcc = p_cbranch_z %e:scc
+      //! p_unit_test 2, %f:vcc
+      auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
+                           Operand::c32(0x40018u));
+      auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
+                           Operand::zero());
+      auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
+      writeout(2, Operand(br, vcc));
+   }
+
+   //; del d, e, f
+
+   {
+      //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
+      //! s2: %f:vcc = p_cbranch_nz %e:scc
+      //! p_unit_test 3, %f:vcc
+      auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
+                           Operand::c32(0x40018u));
+      auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
+                           Operand::zero());
+      auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
+      writeout(3, Operand(br, vcc));
+   }
+
+   //; del d, e, f
+
+   {
+      //! s2: %d:s[2-3], s1: %e:scc = s_and_b64 %y:s[4-5], 0x12345
+      //! s2: %f:vcc = p_cbranch_z %e:scc
+      //! p_unit_test 4, %f:vcc
+      auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s2), bld.def(s1, scc), op_in_1,
+                           Operand::c32(0x12345u));
+      auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u64, bld.def(s1, scc), Operand(salu, reg_s2),
+                           Operand::zero(8));
+      auto br = bld.branch(aco_opcode::p_cbranch_nz, bld.def(s2, vcc), bld.scc(scmp));
+      writeout(4, Operand(br, vcc));
+   }
+
+   //; del d, e, f
+
+   {
+      /* SCC is overwritten in between, don't optimize */
+
+      //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
+      //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
+      //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
+      //! s2: %f:vcc = p_cbranch_z %g:scc
+      //! p_unit_test 5, %f:vcc, %h:s[3]
+      auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
+                           Operand::c32(0x40018u));
+      auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
+                           Operand::c32(1u));
+      auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
+                           Operand::zero());
+      auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
+      writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
+   }
+
+   //; del d, e, f, g, h, x
+
+   {
+      /* SCC is overwritten in between, optimize by pulling down */
+
+      //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
+      //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
+      //! s2: %f:vcc = p_cbranch_z %g:scc
+      //! p_unit_test 5, %f:vcc, %h:s[3]
+      auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
+                           Operand::c32(0x40018u));
+      auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
+                           Operand::c32(1u));
+      auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s2),
+                           Operand::zero());
+      auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
+      writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
+   }
+
+   //; del d, e, f, g, h, x
+
+   {
+      /* SCC is overwritten in between, optimize by pulling down */
+
+      //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
+      //! s2: %d:s[8-9], s1: %e:scc = s_and_b64 %b:s[4-5], 0x40018
+      //! s2: %f:vcc = p_cbranch_z %g:scc
+      //! p_unit_test 5, %f:vcc, %h:s[3]
+      auto salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), op_in_1,
+                           Operand::c32(0x40018u));
+      auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
+                           Operand::c32(1u));
+      auto scmp = bld.sopc(aco_opcode::s_cmp_lg_u32, bld.def(s1, scc), Operand(salu, reg_s8),
+                           Operand::zero());
+      auto br = bld.branch(aco_opcode::p_cbranch_z, bld.def(s2, vcc), bld.scc(scmp));
+      writeout(5, Operand(br, vcc), Operand(ovrw, reg_s3));
+   }
+
+   //; del d, e, f, g, h, x
+
+   {
+      //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
+      //! s1: %f:s[4] = s_cselect_b32 %z:s[6], %a:s[0], %e:scc
+      //! p_unit_test 6, %f:s[4]
+      auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
+                           Operand::c32(0x40018u));
+      auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
+                           Operand::zero());
+      auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0),
+                         Operand(op_in_2), bld.scc(scmp));
+      writeout(6, Operand(br, reg_s4));
+   }
+
+   //; del d, e, f
+
+   {
+      /* SCC is overwritten in between, don't optimize */
+
+      //! s1: %d:s[2], s1: %e:scc = s_bfe_u32 %a:s[0], 0x40018
+      //! s1: %h:s[3], s1: %x:scc = s_add_u32 %a:s[0], 1
+      //! s1: %g:scc = s_cmp_eq_u32 %d:s[2], 0
+      //! s1: %f:s[4] = s_cselect_b32 %a:s[0], %z:s[6], %g:scc
+      //! p_unit_test 7, %f:s[4], %h:s[3]
+      auto salu = bld.sop2(aco_opcode::s_bfe_u32, bld.def(s1, reg_s2), bld.def(s1, scc), op_in_0,
+                           Operand::c32(0x40018u));
+      auto ovrw = bld.sop2(aco_opcode::s_add_u32, bld.def(s1, reg_s3), bld.def(s1, scc), op_in_0,
+                           Operand::c32(1u));
+      auto scmp = bld.sopc(aco_opcode::s_cmp_eq_u32, bld.def(s1, scc), Operand(salu, reg_s2),
+                           Operand::zero());
+      auto br = bld.sop2(aco_opcode::s_cselect_b32, bld.def(s1, reg_s4), Operand(op_in_0),
+                         Operand(op_in_2), bld.scc(scmp));
+      writeout(7, Operand(br, reg_s4), Operand(ovrw, reg_s3));
+   }
+
+   //; del d, e, f, g, h, x
+
+   finish_optimizer_postRA_test();
 END_TEST
 
 BEGIN_TEST(optimizer_postRA.dpp)
@@ -368,7 +382,8 @@ BEGIN_TEST(optimizer_postRA.dpp)
    //! v1: %res2:v[2] = v_sub_f32 %b:v[1], %tmp2:v[2] row_half_mirror bound_ctrl:1
    //! p_unit_test 2, %res2:v[2]
    Temp tmp2 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
-   Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2), dpp_row_half_mirror);
+   Temp res2 = bld.vop2_dpp(aco_opcode::v_sub_f32, bld.def(v1, reg_v2), b, Operand(tmp2, reg_v2),
+                            dpp_row_half_mirror);
    writeout(2, Operand(res2, reg_v2));
 
    /* modifiers */
@@ -429,14 +444,16 @@ BEGIN_TEST(optimizer_postRA.dpp)
    //! v1: %res8:v[2] = v_cndmask_b32 %a:v[0], %b:v[1], %c:vcc row_mirror bound_ctrl:1
    //! p_unit_test 8, %res8:v[2]
    Temp tmp8 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
-   Temp res8 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c);
+   Temp res8 =
+      bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp8, reg_v2), b, c);
    writeout(8, Operand(res8, reg_v2));
 
    //! v1: %tmp9:v[2] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
    //! v1: %res9:v[2] = v_cndmask_b32 %tmp9:v[2], %b:v[1], %d:s[0-1]
    //! p_unit_test 9, %res9:v[2]
    Temp tmp9 = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v2), a, dpp_row_mirror);
-   Temp res9 = bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d);
+   Temp res9 =
+      bld.vop2(aco_opcode::v_cndmask_b32, bld.def(v1, reg_v2), Operand(tmp9, reg_v2), b, d);
    writeout(9, Operand(res9, reg_v2));
 
    /* control flow */
@@ -485,48 +502,53 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
    Operand c(inputs[2], PhysReg(258)); /* buffer store address */
    Operand d(inputs[3], PhysReg(259)); /* buffer store value */
    Operand e(inputs[4], PhysReg(0));   /* condition */
-   PhysReg reg_v12(268); /* temporary register */
+   PhysReg reg_v12(268);               /* temporary register */
 
    Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror);
 
    //! s2: %saved_exec:s[84-85],  s1: %0:scc,  s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
    //! s2: %0:vcc = p_cbranch_nz BB1, BB2
 
-   emit_divergent_if_else(program.get(), bld, e, [&]() -> void {
-      /* --- logical then --- */
-      //! BB1
-      //! /* logical preds: BB0, / linear preds: BB0, / kind: */
-      //! p_logical_start
-
-      //! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen
-      bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true);
-
-      //! p_logical_end
-      //! s2: %0:vcc = p_branch BB3
-
-      /* --- linear then --- */
-      //! BB2
-      //! /* logical preds: / linear preds: BB0, / kind: */
-      //! s2: %0:vcc = p_branch BB3
-
-      /* --- invert --- */
-      //! BB3
-      //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
-      //! s2: %0:exec,  s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
-      //! s2: %0:vcc = p_cbranch_nz BB4, BB5
-   }, [&]() -> void {
-      /* --- logical else --- */
-      //! BB4
-      //! /* logical preds: BB0, / linear preds: BB3, / kind: */
-      //! p_logical_start
-      //! p_logical_end
-      //! s2: %0:vcc = p_branch BB6
-
-      /* --- linear else --- */
-      //! BB5
-      //! /* logical preds: / linear preds: BB3, / kind: */
-      //! s2: %0:vcc = p_branch BB6
-   });
+   emit_divergent_if_else(
+      program.get(), bld, e,
+      [&]() -> void
+      {
+         /* --- logical then --- */
+         //! BB1
+         //! /* logical preds: BB0, / linear preds: BB0, / kind: */
+         //! p_logical_start
+
+         //! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen
+         bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true);
+
+         //! p_logical_end
+         //! s2: %0:vcc = p_branch BB3
+
+         /* --- linear then --- */
+         //! BB2
+         //! /* logical preds: / linear preds: BB0, / kind: */
+         //! s2: %0:vcc = p_branch BB3
+
+         /* --- invert --- */
+         //! BB3
+         //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
+         //! s2: %0:exec,  s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
+         //! s2: %0:vcc = p_cbranch_nz BB4, BB5
+      },
+      [&]() -> void
+      {
+         /* --- logical else --- */
+         //! BB4
+         //! /* logical preds: BB0, / linear preds: BB3, / kind: */
+         //! p_logical_start
+         //! p_logical_end
+         //! s2: %0:vcc = p_branch BB6
+
+         /* --- linear else --- */
+         //! BB5
+         //! /* logical preds: / linear preds: BB3, / kind: */
+         //! s2: %0:vcc = p_branch BB6
+      });
 
    /* --- merge block --- */
    //! BB6
@@ -535,7 +557,8 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf)
 
    //! v1: %res10:v[12] = v_add_f32 %a:v[0], %b:v[1] row_mirror bound_ctrl:1
    //! p_unit_test 10, %res10:v[12]
-   Temp result = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
+   Temp result =
+      bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
    writeout(10, Operand(result, reg_v12));
 
    finish_optimizer_postRA_test();
@@ -560,7 +583,7 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
    Operand d(inputs[3], PhysReg(259)); /* buffer store value */
    Operand e(inputs[4], PhysReg(0));   /* condition */
    Operand f(inputs[5], PhysReg(2));   /* buffer store address (scalar) */
-   PhysReg reg_v12(268); /* temporary register */
+   PhysReg reg_v12(268);               /* temporary register */
 
    //! v1: %dpp_tmp:v[12] = v_mov_b32 %a:v[0] row_mirror bound_ctrl:1
    Temp dpp_tmp = bld.vop1_dpp(aco_opcode::v_mov_b32, bld.def(v1, reg_v12), a, dpp_row_mirror);
@@ -568,44 +591,50 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
    //! s2: %saved_exec:s[84-85],  s1: %0:scc,  s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
    //! s2: %0:vcc = p_cbranch_nz BB1, BB2
 
-   emit_divergent_if_else(program.get(), bld, e, [&]() -> void {
-      /* --- logical then --- */
-      //! BB1
-      //! /* logical preds: BB0, / linear preds: BB0, / kind: */
-      //! p_logical_start
-
-      //! v1: %addr:v[0] = p_parallelcopy %f:s[2]
-      Temp addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(v1, a.physReg()), f);
-
-      //! buffer_store_dword %addr:v[0], 0, %d:v[3], 0 offen
-      bld.mubuf(aco_opcode::buffer_store_dword, Operand(addr, a.physReg()), Operand::zero(), d, Operand::zero(), 0, true);
-
-      //! p_logical_end
-      //! s2: %0:vcc = p_branch BB3
-
-      /* --- linear then --- */
-      //! BB2
-      //! /* logical preds: / linear preds: BB0, / kind: */
-      //! s2: %0:vcc = p_branch BB3
-
-      /* --- invert --- */
-      //! BB3
-      //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
-      //! s2: %0:exec,  s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
-      //! s2: %0:vcc = p_cbranch_nz BB4, BB5
-   }, [&]() -> void {
-      /* --- logical else --- */
-      //! BB4
-      //! /* logical preds: BB0, / linear preds: BB3, / kind: */
-      //! p_logical_start
-      //! p_logical_end
-      //! s2: %0:vcc = p_branch BB6
-
-      /* --- linear else --- */
-      //! BB5
-      //! /* logical preds: / linear preds: BB3, / kind: */
-      //! s2: %0:vcc = p_branch BB6
-   });
+   emit_divergent_if_else(
+      program.get(), bld, e,
+      [&]() -> void
+      {
+         /* --- logical then --- */
+         //! BB1
+         //! /* logical preds: BB0, / linear preds: BB0, / kind: */
+         //! p_logical_start
+
+         //! v1: %addr:v[0] = p_parallelcopy %f:s[2]
+         Temp addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(v1, a.physReg()), f);
+
+         //! buffer_store_dword %addr:v[0], 0, %d:v[3], 0 offen
+         bld.mubuf(aco_opcode::buffer_store_dword, Operand(addr, a.physReg()), Operand::zero(), d,
+                   Operand::zero(), 0, true);
+
+         //! p_logical_end
+         //! s2: %0:vcc = p_branch BB3
+
+         /* --- linear then --- */
+         //! BB2
+         //! /* logical preds: / linear preds: BB0, / kind: */
+         //! s2: %0:vcc = p_branch BB3
+
+         /* --- invert --- */
+         //! BB3
+         //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
+         //! s2: %0:exec,  s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
+         //! s2: %0:vcc = p_cbranch_nz BB4, BB5
+      },
+      [&]() -> void
+      {
+         /* --- logical else --- */
+         //! BB4
+         //! /* logical preds: BB0, / linear preds: BB3, / kind: */
+         //! p_logical_start
+         //! p_logical_end
+         //! s2: %0:vcc = p_branch BB6
+
+         /* --- linear else --- */
+         //! BB5
+         //! /* logical preds: / linear preds: BB3, / kind: */
+         //! s2: %0:vcc = p_branch BB6
+      });
 
    /* --- merge block --- */
    //! BB6
@@ -613,7 +642,8 @@ BEGIN_TEST(optimizer_postRA.dpp_across_cf_overwritten)
    //! s2: %0:exec = p_parallelcopy %saved_exec:s[84-85]
 
    //! v1: %result:v[12] = v_add_f32 %dpp_mov_tmp:v[12], %b:v[1]
-   Temp result = bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
+   Temp result =
+      bld.vop2(aco_opcode::v_add_f32, bld.def(v1, reg_v12), Operand(dpp_tmp, reg_v12), b);
    //! p_unit_test 10, %result:v[12]
    writeout(10, Operand(result, reg_v12));
 
@@ -631,53 +661,58 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf)
    startpgm->definitions[2].setFixed(PhysReg(259));
    startpgm->definitions[3].setFixed(PhysReg(0));
 
-   Operand a(inputs[0], PhysReg(2)); /* source for s_and */
+   Operand a(inputs[0], PhysReg(2));   /* source for s_and */
    Operand c(inputs[1], PhysReg(258)); /* buffer store address */
    Operand d(inputs[2], PhysReg(259)); /* buffer store value */
    Operand e(inputs[3], PhysReg(0));   /* condition */
-   PhysReg reg_s8(8); /* temporary register */
+   PhysReg reg_s8(8);                  /* temporary register */
 
    auto tmp_salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), a,
-                        Operand::c32(0x40018u));
+                            Operand::c32(0x40018u));
 
    //! s2: %saved_exec:s[84-85],  s1: %0:scc,  s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
    //! s2: %0:vcc = p_cbranch_nz BB1, BB2
 
-   emit_divergent_if_else(program.get(), bld, e, [&]() -> void {
-      /* --- logical then --- */
-      //! BB1
-      //! /* logical preds: BB0, / linear preds: BB0, / kind: */
-      //! p_logical_start
-
-      //! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen
-      bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true);
-
-      //! p_logical_end
-      //! s2: %0:vcc = p_branch BB3
-
-      /* --- linear then --- */
-      //! BB2
-      //! /* logical preds: / linear preds: BB0, / kind: */
-      //! s2: %0:vcc = p_branch BB3
-
-      /* --- invert --- */
-      //! BB3
-      //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
-      //! s2: %0:exec,  s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
-      //! s2: %0:vcc = p_cbranch_nz BB4, BB5
-   }, [&]() -> void {
-      /* --- logical else --- */
-      //! BB4
-      //! /* logical preds: BB0, / linear preds: BB3, / kind: */
-      //! p_logical_start
-      //! p_logical_end
-      //! s2: %0:vcc = p_branch BB6
-
-      /* --- linear else --- */
-      //! BB5
-      //! /* logical preds: / linear preds: BB3, / kind: */
-      //! s2: %0:vcc = p_branch BB6
-   });
+   emit_divergent_if_else(
+      program.get(), bld, e,
+      [&]() -> void
+      {
+         /* --- logical then --- */
+         //! BB1
+         //! /* logical preds: BB0, / linear preds: BB0, / kind: */
+         //! p_logical_start
+
+         //! buffer_store_dword %c:v[2], 0, %d:v[3], 0 offen
+         bld.mubuf(aco_opcode::buffer_store_dword, c, Operand::zero(), d, Operand::zero(), 0, true);
+
+         //! p_logical_end
+         //! s2: %0:vcc = p_branch BB3
+
+         /* --- linear then --- */
+         //! BB2
+         //! /* logical preds: / linear preds: BB0, / kind: */
+         //! s2: %0:vcc = p_branch BB3
+
+         /* --- invert --- */
+         //! BB3
+         //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
+         //! s2: %0:exec,  s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
+         //! s2: %0:vcc = p_cbranch_nz BB4, BB5
+      },
+      [&]() -> void
+      {
+         /* --- logical else --- */
+         //! BB4
+         //! /* logical preds: BB0, / linear preds: BB3, / kind: */
+         //! p_logical_start
+         //! p_logical_end
+         //! s2: %0:vcc = p_branch BB6
+
+         /* --- linear else --- */
+         //! BB5
+         //! /* logical preds: / linear preds: BB3, / kind: */
+         //! s2: %0:vcc = p_branch BB6
+      });
 
    /* --- merge block --- */
    //! BB6
@@ -695,7 +730,6 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf)
    finish_optimizer_postRA_test();
 END_TEST
 
-
 BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten)
    //>> s2: %a:s[2-3], v1: %c:v[2], v1: %d:v[3], s2: %e:s[0-1], s1: %f:s[4] = p_startpgm
    if (!setup_cs("s2 v1 v1 s2 s1", GFX10_3))
@@ -708,59 +742,65 @@ BEGIN_TEST(optimizer_postRA.scc_nocmp_across_cf_partially_overwritten)
    startpgm->definitions[3].setFixed(PhysReg(0));
    startpgm->definitions[4].setFixed(PhysReg(4));
 
-   Operand a(inputs[0], PhysReg(2)); /* source for s_and */
+   Operand a(inputs[0], PhysReg(2));   /* source for s_and */
    Operand c(inputs[1], PhysReg(258)); /* buffer store address */
    Operand d(inputs[2], PhysReg(259)); /* buffer store value */
    Operand e(inputs[3], PhysReg(0));   /* condition */
    Operand f(inputs[4], PhysReg(4));   /* overwrite value */
-   PhysReg reg_s3(3); /* temporary register */
-   PhysReg reg_s8(8); /* temporary register */
+   PhysReg reg_s3(3);                  /* temporary register */
+   PhysReg reg_s8(8);                  /* temporary register */
 
    //! s2: %tmp_salu:s[8-9], s1: %tmp_salu_scc:scc = s_and_b64 %a:s[2-3], 0x40018
    auto tmp_salu = bld.sop2(aco_opcode::s_and_b64, bld.def(s2, reg_s8), bld.def(s1, scc), a,
-                        Operand::c32(0x40018u));
+                            Operand::c32(0x40018u));
 
    //! s2: %saved_exec:s[84-85],  s1: %0:scc,  s2: %0:exec = s_and_saveexec_b64 %e:s[0-1], %0:exec
    //! s2: %0:vcc = p_cbranch_nz BB1, BB2
 
-   emit_divergent_if_else(program.get(), bld, e, [&]() -> void {
-      /* --- logical then --- */
-      //! BB1
-      //! /* logical preds: BB0, / linear preds: BB0, / kind: */
-      //! p_logical_start
-
-      //! s1: %ovrwr:s[3] = p_parallelcopy %f:s[4]
-      Temp s_addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(s1, reg_s3), f);
-
-      //! buffer_store_dword %c:v[2], %ovrwr:s[3], %d:v[3], 0 offen
-      bld.mubuf(aco_opcode::buffer_store_dword, c, Operand(s_addr, reg_s3), d, Operand::zero(), 0, true);
-
-      //! p_logical_end
-      //! s2: %0:vcc = p_branch BB3
-
-      /* --- linear then --- */
-      //! BB2
-      //! /* logical preds: / linear preds: BB0, / kind: */
-      //! s2: %0:vcc = p_branch BB3
-
-      /* --- invert --- */
-      //! BB3
-      //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
-      //! s2: %0:exec,  s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
-      //! s2: %0:vcc = p_cbranch_nz BB4, BB5
-   }, [&]() -> void {
-      /* --- logical else --- */
-      //! BB4
-      //! /* logical preds: BB0, / linear preds: BB3, / kind: */
-      //! p_logical_start
-      //! p_logical_end
-      //! s2: %0:vcc = p_branch BB6
-
-      /* --- linear else --- */
-      //! BB5
-      //! /* logical preds: / linear preds: BB3, / kind: */
-      //! s2: %0:vcc = p_branch BB6
-   });
+   emit_divergent_if_else(
+      program.get(), bld, e,
+      [&]() -> void
+      {
+         /* --- logical then --- */
+         //! BB1
+         //! /* logical preds: BB0, / linear preds: BB0, / kind: */
+         //! p_logical_start
+
+         //! s1: %ovrwr:s[3] = p_parallelcopy %f:s[4]
+         Temp s_addr = bld.pseudo(aco_opcode::p_parallelcopy, bld.def(s1, reg_s3), f);
+
+         //! buffer_store_dword %c:v[2], %ovrwr:s[3], %d:v[3], 0 offen
+         bld.mubuf(aco_opcode::buffer_store_dword, c, Operand(s_addr, reg_s3), d, Operand::zero(),
+                   0, true);
+
+         //! p_logical_end
+         //! s2: %0:vcc = p_branch BB3
+
+         /* --- linear then --- */
+         //! BB2
+         //! /* logical preds: / linear preds: BB0, / kind: */
+         //! s2: %0:vcc = p_branch BB3
+
+         /* --- invert --- */
+         //! BB3
+         //! /* logical preds: / linear preds: BB1, BB2, / kind: invert, */
+         //! s2: %0:exec,  s1: %0:scc = s_andn2_b64 %saved_exec:s[84-85], %0:exec
+         //! s2: %0:vcc = p_cbranch_nz BB4, BB5
+      },
+      [&]() -> void
+      {
+         /* --- logical else --- */
+         //! BB4
+         //! /* logical preds: BB0, / linear preds: BB3, / kind: */
+         //! p_logical_start
+         //! p_logical_end
+         //! s2: %0:vcc = p_branch BB6
+
+         /* --- linear else --- */
+         //! BB5
+         //! /* logical preds: / linear preds: BB3, / kind: */
+         //! s2: %0:vcc = p_branch BB6
+      });
 
    /* --- merge block --- */
    //! BB6
index 93b0680..7f44e55 100644 (file)
@@ -35,22 +35,27 @@ BEGIN_TEST(setup_reduce_temp.divergent_if_phi)
     *    use_linear_vgpr(v0)
     * }
     * ... = phi ...
-   */
-   //TODO: fix the RA validator to spot this
+    */
+   // TODO: fix the RA validator to spot this
    //>> s2: %_, v1: %a = p_startpgm
    if (!setup_cs("s2 v1", GFX9))
       return;
 
    //>> lv1: %lv = p_start_linear_vgpr
-   emit_divergent_if_else(program.get(), bld, Operand(inputs[0]), [&]() -> void {
-      //>> s1: %_, s2: %_, s1: %_:scc = p_reduce %a, %lv, lv1: undef op:umin32 cluster_size:64
-      Instruction* reduce = bld.reduction(aco_opcode::p_reduce, bld.def(s1),
-                                          bld.def(bld.lm), bld.def(s1, scc), inputs[1],
-                                          Operand(v1.as_linear()), Operand(v1.as_linear()), umin32);
-      reduce->reduction().cluster_size = bld.lm.bytes() * 8;
-   }, [&]() -> void {
-      /* nothing */
-   });
+   emit_divergent_if_else(
+      program.get(), bld, Operand(inputs[0]),
+      [&]() -> void
+      {
+         //>> s1: %_, s2: %_, s1: %_:scc = p_reduce %a, %lv, lv1: undef op:umin32 cluster_size:64
+         Instruction* reduce =
+            bld.reduction(aco_opcode::p_reduce, bld.def(s1), bld.def(bld.lm), bld.def(s1, scc),
+                          inputs[1], Operand(v1.as_linear()), Operand(v1.as_linear()), umin32);
+         reduce->reduction().cluster_size = bld.lm.bytes() * 8;
+      },
+      [&]() -> void
+      {
+         /* nothing */
+      });
    bld.pseudo(aco_opcode::p_phi, bld.def(v1), Operand::c32(1), Operand::zero());
    //>> /* logical preds: BB1, BB4, / linear preds: BB4, BB5, / kind: uniform, top-level, merge, */
    //! p_end_linear_vgpr %lv
index 6b2b9d3..456c423 100644 (file)
@@ -37,7 +37,7 @@ BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
 
    /* TODO: is this possible to do on GFX11? */
    for (amd_gfx_level cc = GFX8; cc <= GFX10_3; cc = (amd_gfx_level)((unsigned)cc + 1)) {
-      for (bool pessimistic : { false, true }) {
+      for (bool pessimistic : {false, true}) {
          const char* subvariant = pessimistic ? "/pessimistic" : "/optimistic";
 
          //>> v1: %_:v[#a] = p_startpgm
@@ -45,7 +45,8 @@ BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
             return;
 
          //! v2b: %_:v[#a][0:16], v2b: %res1:v[#a][16:32] = p_split_vector %_:v[#a]
-         Builder::Result tmp = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
+         Builder::Result tmp =
+            bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]);
 
          //! v1: %_:v[#b] = v_cvt_f32_f16 %_:v[#a][16:32] dst_sel:dword src0_sel:uword1
          //! v1: %_:v[#a] = v_cvt_f32_f16 %_:v[#a][0:16]
@@ -55,7 +56,7 @@ BEGIN_TEST(regalloc.subdword_alloc.reuse_16bit_operands)
          writeout(0, result1);
          writeout(1, result2);
 
-         finish_ra_test(ra_test_policy { pessimistic });
+         finish_ra_test(ra_test_policy{pessimistic});
       }
    }
 END_TEST
@@ -67,7 +68,8 @@ BEGIN_TEST(regalloc._32bit_partial_write)
 
    /* ensure high 16 bits are occupied */
    //! v2b: %_:v[0][0:16], v2b: %_:v[0][16:32] = p_split_vector %_:v[0]
-   Temp hi = bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
+   Temp hi =
+      bld.pseudo(aco_opcode::p_split_vector, bld.def(v2b), bld.def(v2b), inputs[0]).def(1).getTemp();
 
    /* This test checks if this instruction uses SDWA. */
    //! v2b: %_:v[0][0:16] = v_not_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
@@ -168,9 +170,9 @@ BEGIN_TEST(regalloc.precolor.multiple_operands)
 
    //! v1: %tmp3_2:v[0], v1: %tmp0_2:v[1], v1: %tmp1_2:v[2], v1: %tmp2_2:v[3] = p_parallelcopy %tmp3:v[3], %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
    //! p_unit_test %tmp3_2:v[0], %tmp0_2:v[1], %tmp1_2:v[2], %tmp2_2:v[3]
-   bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[3], PhysReg(256+0)),
-              Operand(inputs[0], PhysReg(256+1)), Operand(inputs[1], PhysReg(256+2)),
-              Operand(inputs[2], PhysReg(256+3)));
+   bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[3], PhysReg(256 + 0)),
+              Operand(inputs[0], PhysReg(256 + 1)), Operand(inputs[1], PhysReg(256 + 2)),
+              Operand(inputs[2], PhysReg(256 + 3)));
 
    finish_ra_test(ra_test_policy());
 END_TEST
@@ -182,8 +184,8 @@ BEGIN_TEST(regalloc.precolor.different_regs)
 
    //! v1: %tmp1:v[1], v1: %tmp2:v[2] = p_parallelcopy %tmp0:v[0], %tmp0:v[0]
    //! p_unit_test %tmp0:v[0], %tmp1:v[1], %tmp2:v[2]
-   bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[0], PhysReg(256+0)),
-              Operand(inputs[0], PhysReg(256+1)), Operand(inputs[0], PhysReg(256+2)));
+   bld.pseudo(aco_opcode::p_unit_test, Operand(inputs[0], PhysReg(256 + 0)),
+              Operand(inputs[0], PhysReg(256 + 1)), Operand(inputs[0], PhysReg(256 + 2)));
 
    finish_ra_test(ra_test_policy());
 END_TEST
@@ -256,7 +258,8 @@ BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_impl)
 
    //! s1: %scc_tmp:scc, s1: %1:s[0] = p_unit_test
    Temp s0_tmp = bld.tmp(s1);
-   Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc), Definition(s0_tmp.id(), PhysReg{0}, s1));
+   Temp scc_tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(s1, scc),
+                             Definition(s0_tmp.id(), PhysReg{0}, s1));
 
    //! lv1: %tmp1:v[1] = p_unit_test
    Temp tmp = bld.pseudo(aco_opcode::p_unit_test, bld.def(v1.as_linear(), reg_v1));
@@ -273,7 +276,8 @@ BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_reg_impl)
    //>> lv1: %5:v[2] = p_parallelcopy %3:v[1] scc:1 scratch:s1
    Pseudo_instruction& parallelcopy = program->blocks[0].instructions[3]->pseudo();
    aco_print_instr(program->gfx_level, &parallelcopy, output);
-   fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc, parallelcopy.scratch_sgpr.reg());
+   fprintf(output, " scc:%u scratch:s%u\n", parallelcopy.tmp_in_scc,
+           parallelcopy.scratch_sgpr.reg());
 END_TEST
 
 BEGIN_TEST(regalloc.linear_vgpr.live_range_split.get_regs_for_copies)
@@ -392,13 +396,15 @@ BEGIN_TEST(regalloc.vinterp_fp16)
 
    //! v1: %tmp0:v[1] = v_interp_p10_f16_f32_inreg %lo:v[3][0:16], %in1:v[1], hi(%hi:v[3][16:32])
    //! p_unit_test %tmp0:v[1]
-   Temp tmp0 = bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, bld.def(v1), lo, inputs[1], hi);
+   Temp tmp0 =
+      bld.vinterp_inreg(aco_opcode::v_interp_p10_f16_f32_inreg, bld.def(v1), lo, inputs[1], hi);
    bld.pseudo(aco_opcode::p_unit_test, tmp0);
 
    //! v2b: %tmp1:v[0][16:32] = v_interp_p2_f16_f32_inreg %in0:v[0], %in2:v[2], %tmp0:v[1] opsel_hi
    //! v1: %tmp2:v[0] = p_create_vector 0, %tmp1:v[0][16:32]
    //! p_unit_test %tmp2:v[0]
-   Temp tmp1 = bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, bld.def(v2b), inputs[0], inputs[2], tmp0);
+   Temp tmp1 = bld.vinterp_inreg(aco_opcode::v_interp_p2_f16_f32_inreg, bld.def(v2b), inputs[0],
+                                 inputs[2], tmp0);
    Temp tmp2 = bld.pseudo(aco_opcode::p_create_vector, bld.def(v1), Operand::zero(2), tmp1);
    bld.pseudo(aco_opcode::p_unit_test, tmp2);
 
index d398373..f544cc7 100644 (file)
@@ -34,7 +34,8 @@ BEGIN_TEST(validate.sdwa.allow)
       //>> Validation results:
       //! Validation passed
 
-      SDWA_instruction *sdwa = &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1])->sdwa();
+      SDWA_instruction* sdwa =
+         &bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], inputs[1])->sdwa();
       sdwa->neg[0] = sdwa->neg[1] = sdwa->abs[0] = sdwa->abs[1] = true;
 
       bld.vop2_sdwa(aco_opcode::v_mul_f32, bld.def(v1b), inputs[0], inputs[1]);
@@ -105,7 +106,9 @@ BEGIN_TEST(validate.sdwa.vopc)
       bld.vopc_sdwa(aco_opcode::v_cmp_lt_f32, bld.def(bld.lm), inputs[0], inputs[1]);
 
       //~gfx(9|10)! SDWA VOPC clamp only supported on GFX8: s2: %_:vcc = v_cmp_eq_f32 %vgpr0, %vgpr1 clamp src0_sel:dword src1_sel:dword
-      bld.vopc_sdwa(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm, vcc), inputs[0], inputs[1])->sdwa().clamp = true;
+      bld.vopc_sdwa(aco_opcode::v_cmp_eq_f32, bld.def(bld.lm, vcc), inputs[0], inputs[1])
+         ->sdwa()
+         .clamp = true;
 
       //! Validation failed
 
@@ -138,11 +141,13 @@ BEGIN_TEST(validate.sdwa.vcc)
 
       //! 3rd operand must be fixed to vcc with SDWA: v1: %_ = v_cndmask_b32 %vgpr0, %vgpr1, %_ dst_sel:dword src0_sel:dword src1_sel:dword
       bld.vop2_sdwa(aco_opcode::v_cndmask_b32, bld.def(v1), inputs[0], inputs[1], inputs[2]);
-      bld.vop2_sdwa(aco_opcode::v_cndmask_b32, bld.def(v1), inputs[0], inputs[1], bld.vcc(inputs[2]));
+      bld.vop2_sdwa(aco_opcode::v_cndmask_b32, bld.def(v1), inputs[0], inputs[1],
+                    bld.vcc(inputs[2]));
 
       //! 2nd definition must be fixed to vcc with SDWA: v1: %_, s2: %_ = v_add_co_u32 %vgpr0, %vgpr1 dst_sel:dword src0_sel:dword src1_sel:dword
       bld.vop2_sdwa(aco_opcode::v_add_co_u32, bld.def(v1), bld.def(bld.lm), inputs[0], inputs[1]);
-      bld.vop2_sdwa(aco_opcode::v_add_co_u32, bld.def(v1), bld.def(bld.lm, vcc), inputs[0], inputs[1]);
+      bld.vop2_sdwa(aco_opcode::v_add_co_u32, bld.def(v1), bld.def(bld.lm, vcc), inputs[0],
+                    inputs[1]);
 
       //! Validation failed
 
@@ -152,125 +157,127 @@ END_TEST
 
 BEGIN_TEST(optimize.sdwa.extract)
    for (unsigned i = GFX7; i <= GFX10; i++) {
-   for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
-      //>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
-      if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
-         continue;
-
-      //; def standard_test(index, sel):
-      //;    res = 'v1: %%res%s = v_mul_f32 %%a, %%b dst_sel:dword src0_sel:dword src1_sel:%c%s\n' % (index, 's' if variant.endswith('_signed') else 'u', sel)
-      //;    res += 'p_unit_test %s, %%res%s' % (index, index)
-      //;    return res
-      //; funcs['standard_test'] = lambda a: standard_test(*(v for v in a.split(',')))
-
-      aco_opcode ext = aco_opcode::p_extract;
-      aco_opcode ins = aco_opcode::p_insert;
-
-      {
-      //~gfx[^7].*! @standard_test(0,byte0)
-      Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u),
-                                    Operand::c32(is_signed));
-      writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte0_b));
-
-      //~gfx[^7].*! @standard_test(1,byte1)
-      Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u),
-                                    Operand::c32(is_signed));
-      writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte1_b));
-
-      //~gfx[^7].*! @standard_test(2,byte2)
-      Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u), Operand::c32(8u),
-                                    Operand::c32(is_signed));
-      writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte2_b));
-
-      //~gfx[^7].*! @standard_test(3,byte3)
-      Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u), Operand::c32(8u),
-                                    Operand::c32(is_signed));
-      writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte3_b));
-
-      //~gfx[^7].*! @standard_test(4,word0)
-      Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u),
-                                    Operand::c32(is_signed));
-      writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word0_b));
-
-      //~gfx[^7].*! @standard_test(5,word1)
-      Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
-                                    Operand::c32(16u), Operand::c32(is_signed));
-      writeout(5, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word1_b));
-
-      //~gfx[^7]_unsigned! @standard_test(6,byte0)
-      Temp bfi_byte0_b = bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u));
-      writeout(6, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte0_b));
-
-      //~gfx[^7]_unsigned! @standard_test(7,word0)
-      Temp bfi_word0_b =
-         bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u));
-      writeout(7, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_word0_b));
-      }
-
-      //>> p_unit_test 63
-      writeout(63);
-
-      {
-      //! v1: %tmp8 = p_insert %b, 1, 8
-      //! v1: %res8 = v_mul_f32 %a, %tmp8
-      //! p_unit_test 8, %res8
-      Temp bfi_byte1_b =
-         bld.pseudo(ins, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u));
-      writeout(8, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte1_b));
-
-      /* v_cvt_f32_ubyte[0-3] can be used instead of v_cvt_f32_u32+sdwa */
-      //~gfx7_signed! v1: %bfe_byte0_b = p_extract %b, 0, 8, 1
-      //~gfx7_signed! v1: %res9 = v_cvt_f32_u32 %bfe_byte0_b
-      //~gfx[^7]+_signed! v1: %res9 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte0
-      //~gfx\d+_unsigned! v1: %res9 = v_cvt_f32_ubyte0 %b
-      //! p_unit_test 9, %res9
-      Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u),
-                                    Operand::c32(is_signed));
-      writeout(9, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte0_b));
-
-      //~gfx7_signed! v1: %bfe_byte1_b = p_extract %b, 1, 8, 1
-      //~gfx7_signed! v1: %res10 = v_cvt_f32_u32 %bfe_byte1_b
-      //~gfx[^7]+_signed! v1: %res10 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte1
-      //~gfx\d+_unsigned! v1: %res10 = v_cvt_f32_ubyte1 %b
-      //! p_unit_test 10, %res10
-      Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u),
-                                    Operand::c32(is_signed));
-      writeout(10, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte1_b));
-
-      //~gfx7_signed! v1: %bfe_byte2_b = p_extract %b, 2, 8, 1
-      //~gfx7_signed! v1: %res11 = v_cvt_f32_u32 %bfe_byte2_b
-      //~gfx[^7]+_signed! v1: %res11 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte2
-      //~gfx\d+_unsigned! v1: %res11 = v_cvt_f32_ubyte2 %b
-      //! p_unit_test 11, %res11
-      Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u), Operand::c32(8u),
-                                    Operand::c32(is_signed));
-      writeout(11, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte2_b));
-
-      //~gfx7_signed! v1: %bfe_byte3_b = p_extract %b, 3, 8, 1
-      //~gfx7_signed! v1: %res12 = v_cvt_f32_u32 %bfe_byte3_b
-      //~gfx[^7]+_signed! v1: %res12 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte3
-      //~gfx\d+_unsigned! v1: %res12 = v_cvt_f32_ubyte3 %b
-      //! p_unit_test 12, %res12
-      Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u), Operand::c32(8u),
-                                    Operand::c32(is_signed));
-      writeout(12, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte3_b));
-
-      /* VOP3-only instructions can't use SDWA but they can use opsel on GFX9+ instead */
-      //~gfx(9|10).*! v1: %res13 = v_add_i16 %a, %b
-      //~gfx(9|10).*! p_unit_test 13, %res13
-      Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u),
-                                    Operand::c32(is_signed));
-      writeout(13, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word0_b));
-
-      //~gfx(9|10).*! v1: %res14 = v_add_i16 %a, hi(%b)
-      //~gfx(9|10).*! p_unit_test 14, %res14
-      Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
-                                    Operand::c32(16u), Operand::c32(is_signed));
-      writeout(14, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word1_b));
+      for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
+         //>> v1: %a, v1: %b, s1: %c, s1: %d = p_startpgm
+         if (!setup_cs("v1 v1 s1 s1", (amd_gfx_level)i, CHIP_UNKNOWN,
+                       is_signed ? "_signed" : "_unsigned"))
+            continue;
+
+         //; def standard_test(index, sel):
+         //;    res = 'v1: %%res%s = v_mul_f32 %%a, %%b dst_sel:dword src0_sel:dword src1_sel:%c%s\n' % (index, 's' if variant.endswith('_signed') else 'u', sel)
+         //;    res += 'p_unit_test %s, %%res%s' % (index, index)
+         //;    return res
+         //; funcs['standard_test'] = lambda a: standard_test(*(v for v in a.split(',')))
+
+         aco_opcode ext = aco_opcode::p_extract;
+         aco_opcode ins = aco_opcode::p_insert;
+
+         {
+            //~gfx[^7].*! @standard_test(0,byte0)
+            Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
+                                          Operand::c32(8u), Operand::c32(is_signed));
+            writeout(0, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte0_b));
+
+            //~gfx[^7].*! @standard_test(1,byte1)
+            Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
+                                          Operand::c32(8u), Operand::c32(is_signed));
+            writeout(1, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte1_b));
+
+            //~gfx[^7].*! @standard_test(2,byte2)
+            Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u),
+                                          Operand::c32(8u), Operand::c32(is_signed));
+            writeout(2, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte2_b));
+
+            //~gfx[^7].*! @standard_test(3,byte3)
+            Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u),
+                                          Operand::c32(8u), Operand::c32(is_signed));
+            writeout(3, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_byte3_b));
+
+            //~gfx[^7].*! @standard_test(4,word0)
+            Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
+                                          Operand::c32(16u), Operand::c32(is_signed));
+            writeout(4, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word0_b));
+
+            //~gfx[^7].*! @standard_test(5,word1)
+            Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
+                                          Operand::c32(16u), Operand::c32(is_signed));
+            writeout(5, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfe_word1_b));
+
+            //~gfx[^7]_unsigned! @standard_test(6,byte0)
+            Temp bfi_byte0_b =
+               bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(8u));
+            writeout(6, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte0_b));
+
+            //~gfx[^7]_unsigned! @standard_test(7,word0)
+            Temp bfi_word0_b =
+               bld.pseudo(ins, bld.def(v1), inputs[1], Operand::zero(), Operand::c32(16u));
+            writeout(7, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_word0_b));
+         }
+
+         //>> p_unit_test 63
+         writeout(63);
+
+         {
+            //! v1: %tmp8 = p_insert %b, 1, 8
+            //! v1: %res8 = v_mul_f32 %a, %tmp8
+            //! p_unit_test 8, %res8
+            Temp bfi_byte1_b =
+               bld.pseudo(ins, bld.def(v1), inputs[1], Operand::c32(1u), Operand::c32(8u));
+            writeout(8, bld.vop2(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], bfi_byte1_b));
+
+            /* v_cvt_f32_ubyte[0-3] can be used instead of v_cvt_f32_u32+sdwa */
+            //~gfx7_signed! v1: %bfe_byte0_b = p_extract %b, 0, 8, 1
+            //~gfx7_signed! v1: %res9 = v_cvt_f32_u32 %bfe_byte0_b
+            //~gfx[^7]+_signed! v1: %res9 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte0
+            //~gfx\d+_unsigned! v1: %res9 = v_cvt_f32_ubyte0 %b
+            //! p_unit_test 9, %res9
+            Temp bfe_byte0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
+                                          Operand::c32(8u), Operand::c32(is_signed));
+            writeout(9, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte0_b));
+
+            //~gfx7_signed! v1: %bfe_byte1_b = p_extract %b, 1, 8, 1
+            //~gfx7_signed! v1: %res10 = v_cvt_f32_u32 %bfe_byte1_b
+            //~gfx[^7]+_signed! v1: %res10 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte1
+            //~gfx\d+_unsigned! v1: %res10 = v_cvt_f32_ubyte1 %b
+            //! p_unit_test 10, %res10
+            Temp bfe_byte1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
+                                          Operand::c32(8u), Operand::c32(is_signed));
+            writeout(10, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte1_b));
+
+            //~gfx7_signed! v1: %bfe_byte2_b = p_extract %b, 2, 8, 1
+            //~gfx7_signed! v1: %res11 = v_cvt_f32_u32 %bfe_byte2_b
+            //~gfx[^7]+_signed! v1: %res11 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte2
+            //~gfx\d+_unsigned! v1: %res11 = v_cvt_f32_ubyte2 %b
+            //! p_unit_test 11, %res11
+            Temp bfe_byte2_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(2u),
+                                          Operand::c32(8u), Operand::c32(is_signed));
+            writeout(11, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte2_b));
+
+            //~gfx7_signed! v1: %bfe_byte3_b = p_extract %b, 3, 8, 1
+            //~gfx7_signed! v1: %res12 = v_cvt_f32_u32 %bfe_byte3_b
+            //~gfx[^7]+_signed! v1: %res12 = v_cvt_f32_u32 %b dst_sel:dword src0_sel:sbyte3
+            //~gfx\d+_unsigned! v1: %res12 = v_cvt_f32_ubyte3 %b
+            //! p_unit_test 12, %res12
+            Temp bfe_byte3_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(3u),
+                                          Operand::c32(8u), Operand::c32(is_signed));
+            writeout(12, bld.vop1(aco_opcode::v_cvt_f32_u32, bld.def(v1), bfe_byte3_b));
+
+            /* VOP3-only instructions can't use SDWA but they can use opsel on GFX9+ instead */
+            //~gfx(9|10).*! v1: %res13 = v_add_i16 %a, %b
+            //~gfx(9|10).*! p_unit_test 13, %res13
+            Temp bfe_word0_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::zero(),
+                                          Operand::c32(16u), Operand::c32(is_signed));
+            writeout(13, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word0_b));
+
+            //~gfx(9|10).*! v1: %res14 = v_add_i16 %a, hi(%b)
+            //~gfx(9|10).*! p_unit_test 14, %res14
+            Temp bfe_word1_b = bld.pseudo(ext, bld.def(v1), inputs[1], Operand::c32(1u),
+                                          Operand::c32(16u), Operand::c32(is_signed));
+            writeout(14, bld.vop3(aco_opcode::v_add_i16, bld.def(v1), inputs[0], bfe_word1_b));
+         }
+
+         finish_opt_test();
       }
-
-      finish_opt_test();
-   }
    }
 END_TEST
 
index f6d0892..c067c83 100644 (file)
@@ -52,8 +52,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
       bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
-      bld.pseudo(aco_opcode::p_parallelcopy,
-                 Definition(v0_lo, v2b), Definition(v1_lo, v2b),
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v1_lo, v2b),
                  Operand(v1_lo, v2b), Operand(v0_lo, v2b));
 
       //~gfx[67]! p_unit_test 1
@@ -61,9 +60,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[1][0:16], %0:v[0][16:32], 2
       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
-      bld.pseudo(aco_opcode::p_create_vector,
-                 Definition(v0_lo, v1),
-                 Operand(v1_lo, v2b), Operand(v0_lo, v2b));
+      bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v1_lo, v2b),
+                 Operand(v0_lo, v2b));
 
       //~gfx[67]! p_unit_test 2
       //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
@@ -71,8 +69,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
       //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[2][0:16]
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
-      bld.pseudo(aco_opcode::p_create_vector,
-                 Definition(v0_lo, v6b), Operand(v1_lo, v2b),
+      bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v6b), Operand(v1_lo, v2b),
                  Operand(v0_lo, v2b), Operand(v2_lo, v2b));
 
       //~gfx[67]! p_unit_test 3
@@ -82,10 +79,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[2][0:16]
       //~gfx[67]! v1: %0:v[1] = v_alignbyte_b32 %0:v[3][0:16], %0:v[1][16:32], 2
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
-      bld.pseudo(aco_opcode::p_create_vector,
-                 Definition(v0_lo, v2),
-                 Operand(v1_lo, v2b), Operand(v0_lo, v2b),
-                 Operand(v2_lo, v2b), Operand(v3_lo, v2b));
+      bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2), Operand(v1_lo, v2b),
+                 Operand(v0_lo, v2b), Operand(v2_lo, v2b), Operand(v3_lo, v2b));
 
       //~gfx[67]! p_unit_test 4
       //~gfx[67]! v2b: %0:v[1][16:32] = v_lshlrev_b32 16, %0:v[1][0:16]
@@ -96,17 +91,14 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
-      bld.pseudo(aco_opcode::p_create_vector,
-                 Definition(v0_lo, v2),
-                 Operand(v1_lo, v2b), Operand(v2_lo, v2b),
-                 Operand(v0_lo, v2b), Operand(v3_lo, v2b));
+      bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2), Operand(v1_lo, v2b),
+                 Operand(v2_lo, v2b), Operand(v0_lo, v2b), Operand(v3_lo, v2b));
 
       //~gfx[67]! p_unit_test 5
       //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
       //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
-      bld.pseudo(aco_opcode::p_split_vector,
-                 Definition(v1_lo, v2b), Definition(v0_lo, v2b),
+      bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b),
                  Operand(v0_lo, v1));
 
       //~gfx[67]! p_unit_test 6
@@ -114,8 +106,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[67]! v2b: %0:v[1][0:16] = v_mov_b32 %0:v[0][0:16]
       //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
-      bld.pseudo(aco_opcode::p_split_vector,
-                 Definition(v1_lo, v2b), Definition(v0_lo, v2b),
+      bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b),
                  Definition(v2_lo, v2b), Operand(v0_lo, v6b));
 
       //~gfx[67]! p_unit_test 7
@@ -124,10 +115,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[67]! v2b: %0:v[0][0:16] = v_lshrrev_b32 16, %0:v[1][16:32]
       //~gfx[67]! v2b: %0:v[3][0:16] = v_lshrrev_b32 16, %0:v[2][16:32]
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
-      bld.pseudo(aco_opcode::p_split_vector,
-                 Definition(v1_lo, v2b), Definition(v0_lo, v2b),
-                 Definition(v2_lo, v2b), Definition(v3_lo, v2b),
-                 Operand(v0_lo, v2));
+      bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v0_lo, v2b),
+                 Definition(v2_lo, v2b), Definition(v3_lo, v2b), Operand(v0_lo, v2));
 
       //~gfx[67]! p_unit_test 8
       //~gfx[67]! v2b: %0:v[2][0:16] = v_lshrrev_b32 16, %0:v[0][16:32]
@@ -136,18 +125,15 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
-      bld.pseudo(aco_opcode::p_split_vector,
-                 Definition(v1_lo, v2b), Definition(v2_lo, v2b),
-                 Definition(v0_lo, v2b), Definition(v3_lo, v2b),
-                 Operand(v0_lo, v2));
+      bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v2b), Definition(v2_lo, v2b),
+                 Definition(v0_lo, v2b), Definition(v3_lo, v2b), Operand(v0_lo, v2));
 
       //~gfx[67]! p_unit_test 9
       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
       //~gfx[67]! v1: %0:v[0] = v_xor_b32 %0:v[1], %0:v[0]
       //~gfx[67]! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
-      bld.pseudo(aco_opcode::p_parallelcopy,
-                 Definition(v0_lo, v1b), Definition(v1_lo, v1b),
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v1_lo, v1b),
                  Operand(v1_lo, v1b), Operand(v0_lo, v1b));
 
       //~gfx[67]! p_unit_test 10
@@ -155,9 +141,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[67]! v2b: %0:v[1][0:16] = v_alignbyte_b32 %0:v[0][0:8], %0:v[1][24:32], 3
       //~gfx[67]! v2b: %0:v[0][0:16] = v_mov_b32 %0:v[1][0:16]
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
-      bld.pseudo(aco_opcode::p_create_vector,
-                 Definition(v0_lo, v2b),
-                 Operand(v1_lo, v1b), Operand(v0_lo, v1b));
+      bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v2b), Operand(v1_lo, v1b),
+                 Operand(v0_lo, v1b));
 
       //~gfx[67]! p_unit_test 11
       //~gfx[67]! v1b: %0:v[1][24:32] = v_lshlrev_b32 24, %0:v[1][0:8]
@@ -166,8 +151,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[67]! v2b: %0:v[0][16:32] = v_lshlrev_b32 16, %0:v[0][0:16]
       //~gfx[67]! v3b: %0:v[0][0:24] = v_alignbyte_b32 %0:v[2][0:8], %0:v[0][16:32], 2
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(11u));
-      bld.pseudo(aco_opcode::p_create_vector,
-                 Definition(v0_lo, v3b), Operand(v1_lo, v1b),
+      bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v3b), Operand(v1_lo, v1b),
                  Operand(v0_lo, v1b), Operand(v2_lo, v1b));
 
       //~gfx[67]! p_unit_test 12
@@ -179,10 +163,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[67]! v3b: %0:v[0][8:32] = v_lshlrev_b32 8, %0:v[0][0:24]
       //~gfx[67]! v1: %0:v[0] = v_alignbyte_b32 %0:v[3][0:8], %0:v[0][8:32], 1
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
-      bld.pseudo(aco_opcode::p_create_vector,
-                 Definition(v0_lo, v1),
-                 Operand(v1_lo, v1b), Operand(v0_lo, v1b),
-                 Operand(v2_lo, v1b), Operand(v3_lo, v1b));
+      bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v1_lo, v1b),
+                 Operand(v0_lo, v1b), Operand(v2_lo, v1b), Operand(v3_lo, v1b));
 
       //~gfx[67]! p_unit_test 13
       //~gfx[67]! v1b: %0:v[0][0:8] = v_and_b32 0xff, %0:v[0][0:8]
@@ -193,18 +175,16 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[67]! s1: %0:m0 = s_mov_b32 0x1000001
       //~gfx[67]! v1: %0:v[0] = v_mul_lo_u32 %0:m0, %0:v[0][0:8]
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));
-      Instruction* pseudo = bld.pseudo(aco_opcode::p_create_vector,
-                                       Definition(v0_lo, v1),
-                                       Operand(v0_lo, v1b), Operand(v0_lo, v1b),
-                                       Operand(v0_lo, v1b), Operand(v0_lo, v1b));
+      Instruction* pseudo =
+         bld.pseudo(aco_opcode::p_create_vector, Definition(v0_lo, v1), Operand(v0_lo, v1b),
+                    Operand(v0_lo, v1b), Operand(v0_lo, v1b), Operand(v0_lo, v1b));
       pseudo->pseudo().scratch_sgpr = m0;
 
       //~gfx[67]! p_unit_test 14
       //~gfx[67]! v1b: %0:v[1][0:8] = v_mov_b32 %0:v[0][0:8]
       //~gfx[67]! v1b: %0:v[0][0:8] = v_lshrrev_b32 8, %0:v[1][8:16]
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));
-      bld.pseudo(aco_opcode::p_split_vector,
-                 Definition(v1_lo, v1b), Definition(v0_lo, v1b),
+      bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v1b), Definition(v0_lo, v1b),
                  Operand(v0_lo, v2b));
 
       //~gfx[67]! p_unit_test 15
@@ -213,10 +193,8 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[67]! v1b: %0:v[2][0:8] = v_lshrrev_b32 16, %0:v[1][16:24]
       //~gfx[67]! v1b: %0:v[3][0:8] = v_lshrrev_b32 24, %0:v[1][24:32]
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u));
-      bld.pseudo(aco_opcode::p_split_vector,
-                 Definition(v1_lo, v1b), Definition(v0_lo, v1b),
-                 Definition(v2_lo, v1b), Definition(v3_lo, v1b),
-                 Operand(v0_lo, v1));
+      bld.pseudo(aco_opcode::p_split_vector, Definition(v1_lo, v1b), Definition(v0_lo, v1b),
+                 Definition(v2_lo, v1b), Definition(v3_lo, v1b), Operand(v0_lo, v1));
 
       //~gfx[67]! s_endpgm
 
@@ -231,8 +209,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx8! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
       //~gfx(9|11)! v1: %0:v[0] = v_pack_b32_f16 hi(%0:v[0][16:32]), %0:v[0][0:16]
       bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
-      bld.pseudo(aco_opcode::p_parallelcopy,
-                 Definition(v0_lo, v2b), Definition(v0_hi, v2b),
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
                  Operand(v0_hi, v2b), Operand(v0_lo, v2b));
 
       //~gfx(8|9|11)! p_unit_test 1
@@ -243,8 +220,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[89]! v2b: %0:v[1][16:32] = v_mov_b32 %0:v[0][16:32] dst_sel:uword1 dst_preserve src0_sel:uword1
       //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(1u));
-      bld.pseudo(aco_opcode::p_parallelcopy,
-                 Definition(v0_lo, v1), Definition(v1_lo, v2b),
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b),
                  Operand(v1_lo, v1), Operand(v0_lo, v2b));
 
       //~gfx(8|9|11)! p_unit_test 2
@@ -259,9 +235,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx11! v2b: %0:v[1][0:16] = v_sub_u16_e64 %0:v[0][0:16], %0:v[1][0:16]
       //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], %0:v[1][0:16]
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
-      bld.pseudo(aco_opcode::p_parallelcopy,
-                 Definition(v0_lo, v1), Definition(v1_lo, v2b), Definition(v1_hi, v2b),
-                 Operand(v1_lo, v1), Operand(v0_lo, v2b), Operand(v0_lo, v2b));
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v2b),
+                 Definition(v1_hi, v2b), Operand(v1_lo, v1), Operand(v0_lo, v2b),
+                 Operand(v0_lo, v2b));
 
       //~gfx(8|9|11)! p_unit_test 3
       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
@@ -273,8 +249,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx11! v2b: %0:v[1][0:16] = v_mov_b16 %0:v[0][0:16]
       //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7020504
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(3u));
-      bld.pseudo(aco_opcode::p_parallelcopy,
-                 Definition(v0_lo, v1), Definition(v1_b3, v1b),
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_b3, v1b),
                  Operand(v1_lo, v1), Operand(v0_b3, v1b));
 
       //~gfx(8|9|11)! p_unit_test 4
@@ -287,8 +262,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x7060104
       //~gfx11! v2b: %0:v[1][16:32] = v_mov_b16 hi(%0:v[0][16:32]) opsel_hi
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
-      bld.pseudo(aco_opcode::p_parallelcopy,
-                 Definition(v0_lo, v1), Definition(v1_lo, v1b),
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1), Definition(v1_lo, v1b),
                  Operand(v1_lo, v1), Operand(v0_lo, v1b));
 
       //~gfx(8|9|11)! p_unit_test 5
@@ -301,9 +275,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x7060104
       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], %0:v[1], 0x3060504
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(5u));
-      bld.pseudo(aco_opcode::p_parallelcopy,
-                 Definition(v0_lo, v1b), Definition(v0_hi, v1b), Definition(v1_lo, v1),
-                 Operand(v1_lo, v1b), Operand(v1_hi, v1b), Operand(v0_lo, v1));
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Definition(v0_hi, v1b),
+                 Definition(v1_lo, v1), Operand(v1_lo, v1b), Operand(v1_hi, v1b),
+                 Operand(v0_lo, v1));
 
       //~gfx(8|9|11)! p_unit_test 6
       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
@@ -311,9 +285,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
       //~gfx(9|11)! v1: %0:v[0],  v1: %0:v[1] = v_swap_b32 %0:v[1], %0:v[0]
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(6u));
-      bld.pseudo(aco_opcode::p_parallelcopy,
-                 Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),
-                 Operand(v1_lo, v2b), Operand(v1_hi, v2b), Operand(v0_lo, v1));
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
+                 Definition(v1_lo, v1), Operand(v1_lo, v2b), Operand(v1_hi, v2b),
+                 Operand(v0_lo, v1));
 
       //~gfx(8|9|11)! p_unit_test 7
       //~gfx8! v1: %0:v[0] = v_xor_b32 %0:v[0], %0:v[1]
@@ -322,9 +296,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx(9|11)! v1: %0:v[1],  v1: %0:v[0] = v_swap_b32 %0:v[0], %0:v[1]
       //~gfx(8|9|11)! v1: %0:v[0] = v_alignbyte_b32 %0:v[0][0:16], %0:v[0][16:32], 2
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(7u));
-      bld.pseudo(aco_opcode::p_parallelcopy,
-                 Definition(v0_lo, v2b), Definition(v0_hi, v2b), Definition(v1_lo, v1),
-                 Operand(v1_hi, v2b), Operand(v1_lo, v2b), Operand(v0_lo, v1));
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Definition(v0_hi, v2b),
+                 Definition(v1_lo, v1), Operand(v1_hi, v2b), Operand(v1_lo, v2b),
+                 Operand(v0_lo, v1));
 
       //~gfx(8|9|11)! p_unit_test 8
       //~gfx8! v1: %0:v[1] = v_xor_b32 %0:v[1], %0:v[0]
@@ -342,8 +316,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi
       //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(8u));
-      bld.pseudo(aco_opcode::p_parallelcopy,
-                 Definition(v0_lo, v3b), Definition(v1_lo, v3b),
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b),
                  Operand(v1_lo, v3b), Operand(v0_lo, v3b));
 
       //~gfx(8|9|11)! p_unit_test 9
@@ -354,9 +327,9 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[89]! v1b: %0:v[1][24:32] = v_mov_b32 %0:v[0][24:32] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3
       //~gfx11! v1: %0:v[1] = v_perm_b32 %0:v[1], %0:v[0], 0x3060504
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(9u));
-      bld.pseudo(aco_opcode::p_parallelcopy,
-                 Definition(v0_lo, v3b), Definition(v1_lo, v3b), Definition(v0_b3, v1b),
-                 Operand(v1_lo, v3b), Operand(v0_lo, v3b), Operand(v1_b3, v1b));
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v3b), Definition(v1_lo, v3b),
+                 Definition(v0_b3, v1b), Operand(v1_lo, v3b), Operand(v0_lo, v3b),
+                 Operand(v1_b3, v1b));
 
       //~gfx(8|9|11)! p_unit_test 10
       //~gfx[89]! v1b: %0:v[1][8:16] = v_xor_b32 %0:v[1][8:16], %0:v[0][8:16] dst_sel:ubyte1 dst_preserve src0_sel:ubyte1 src1_sel:ubyte1
@@ -380,8 +353,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx11! v2b: %0:v[1][16:32] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32]) opsel_hi
       //~gfx11! v2b: %0:v[0][0:16] = v_sub_u16_e64 %0:v[0][0:16], hi(%0:v[1][16:32])
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(10u));
-      bld.pseudo(aco_opcode::p_parallelcopy,
-                 Definition(v0_b1, v2b), Definition(v1_b1, v2b),
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v2b), Definition(v1_b1, v2b),
                  Operand(v1_b1, v2b), Operand(v0_b1, v2b));
 
       //~gfx(8|9|11)! p_unit_test 11
@@ -398,8 +370,7 @@ BEGIN_TEST(to_hw_instr.swap_subdword)
       //~gfx[89]! v1b: %0:v[0][24:32] = v_xor_b32 %0:v[0][24:32], %0:v[0][8:16] dst_sel:ubyte3 dst_preserve src0_sel:ubyte3 src1_sel:ubyte1
       //~gfx11! v1: %0:v[0] = v_perm_b32 %0:v[0], 0, 0x5060704
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(12u));
-      bld.pseudo(aco_opcode::p_parallelcopy,
-                 Definition(v0_b1, v1b), Definition(v0_b3, v1b),
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_b1, v1b), Definition(v0_b3, v1b),
                  Operand(v0_b3, v1b), Operand(v0_b1, v1b));
 
       //~gfx(8|9|11)! s_endpgm
@@ -535,8 +506,7 @@ BEGIN_TEST(to_hw_instr.subdword_constant)
       //~gfx10! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060c0d
       //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0xff
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(13u));
-      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b),
-                 Operand::c16(0x00ff));
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::c16(0x00ff));
 
       //! p_unit_test 14
       //~gfx9! v1: %_:v[0] = v_and_b32 0xffff, %_:v[0]
@@ -544,29 +514,25 @@ BEGIN_TEST(to_hw_instr.subdword_constant)
       //~gfx10! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0xd0c0504
       //~gfx11! v2b: %0:v[0][16:32] = v_mov_b16 0xffffff00 opsel_hi
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(14u));
-      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b),
-                 Operand::c16(0xff00));
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_hi, v2b), Operand::c16(0xff00));
 
       //! p_unit_test 15
       //~gfx(9|10)! v2b: %_:v[0][0:16] = v_mov_b32 0 dst_sel:uword0 dst_preserve src0_sel:dword
       //~gfx11! v2b: %0:v[0][0:16] = v_mov_b16 0
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(15u));
-      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b),
-                 Operand::zero(2));
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v2b), Operand::zero(2));
 
       //! p_unit_test 16
       //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 -1 dst_sel:ubyte0 dst_preserve src0_sel:dword
       //~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050d
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(16u));
-      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b),
-                 Operand::c8(0xff));
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::c8(0xff));
 
       //! p_unit_test 17
       //~gfx(9|10)! v1b: %_:v[0][0:8] = v_mov_b32 0 dst_sel:ubyte0 dst_preserve src0_sel:dword
       //~gfx11! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x706050c
       bld.pseudo(aco_opcode::p_unit_test, Operand::c32(17u));
-      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b),
-                 Operand::zero(1));
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(v0_lo, v1b), Operand::zero(1));
 
       //! s_endpgm
 
@@ -589,12 +555,12 @@ BEGIN_TEST(to_hw_instr.self_intersecting_swap)
    //! v1: %0:v[3],  v1: %0:v[7] = v_swap_b32 %0:v[7], %0:v[3]
    //! s_endpgm
    bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
-   //v[1:2] = v[2:3]
-   //v3 = v7
-   //v7 = v1
-   bld.pseudo(aco_opcode::p_parallelcopy,
-              Definition(reg_v1, v2), Definition(reg_v3, v1), Definition(reg_v7, v1),
-              Operand(reg_v2, v2), Operand(reg_v7, v1), Operand(reg_v1, v1));
+   // v[1:2] = v[2:3]
+   // v3 = v7
+   // v7 = v1
+   bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v1, v2), Definition(reg_v3, v1),
+              Definition(reg_v7, v1), Operand(reg_v2, v2), Operand(reg_v7, v1),
+              Operand(reg_v1, v1));
 
    finish_to_hw_instr_test();
 END_TEST
@@ -606,98 +572,98 @@ BEGIN_TEST(to_hw_instr.extract)
    PhysReg v1_lo{257};
 
    for (amd_gfx_level lvl : {GFX7, GFX8, GFX9, GFX11}) {
-   for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
-      if (!setup_cs(NULL, lvl, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
-         continue;
+      for (unsigned is_signed = 0; is_signed <= 1; is_signed++) {
+         if (!setup_cs(NULL, lvl, CHIP_UNKNOWN, is_signed ? "_signed" : "_unsigned"))
+            continue;
 
 #define EXT(idx, size)                                                                             \
    bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v1), Operand(v1_lo, v1), Operand::c32(idx), \
               Operand::c32(size), Operand::c32(is_signed));
 
-      //; funcs['v_bfe'] = lambda _: 'v_bfe_i32' if variant.endswith('_signed') else 'v_bfe_u32'
-      //; funcs['v_shr'] = lambda _: 'v_ashrrev_i32' if variant.endswith('_signed') else 'v_lshrrev_b32'
-      //; funcs['s_bfe'] = lambda _: 's_bfe_i32' if variant.endswith('_signed') else 's_bfe_u32'
-      //; funcs['s_shr'] = lambda _: 's_ashr_i32' if variant.endswith('_signed') else 's_lshr_b32'
-      //; funcs['byte'] = lambda n: '%cbyte%s' % ('s' if variant.endswith('_signed') else 'u', n)
-
-      //>> p_unit_test 0
-      bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
-      //! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8
-      EXT(0, 8)
-      //! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8
-      EXT(1, 8)
-      //! v1: %_:v[0] = @v_bfe %_:v[1], 16, 8
-      EXT(2, 8)
-      //! v1: %_:v[0] = @v_shr 24, %_:v[1]
-      EXT(3, 8)
-      //~gfx(7|8|9)_.*! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16
-      //~gfx11_unsigned! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1]
-      //~gfx11_signed! v1: %_:v[0] = v_cvt_i32_i16 %_:v[1]
-      EXT(0, 16)
-      //! v1: %_:v[0] = @v_shr 16, %_:v[1]
-      EXT(1, 16)
-
-      #undef EXT
+         //; funcs['v_bfe'] = lambda _: 'v_bfe_i32' if variant.endswith('_signed') else 'v_bfe_u32'
+         //; funcs['v_shr'] = lambda _: 'v_ashrrev_i32' if variant.endswith('_signed') else 'v_lshrrev_b32'
+         //; funcs['s_bfe'] = lambda _: 's_bfe_i32' if variant.endswith('_signed') else 's_bfe_u32'
+         //; funcs['s_shr'] = lambda _: 's_ashr_i32' if variant.endswith('_signed') else 's_lshr_b32'
+         //; funcs['byte'] = lambda n: '%cbyte%s' % ('s' if variant.endswith('_signed') else 'u', n)
+
+         //>> p_unit_test 0
+         bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
+         //! v1: %_:v[0] = @v_bfe %_:v[1], 0, 8
+         EXT(0, 8)
+         //! v1: %_:v[0] = @v_bfe %_:v[1], 8, 8
+         EXT(1, 8)
+         //! v1: %_:v[0] = @v_bfe %_:v[1], 16, 8
+         EXT(2, 8)
+         //! v1: %_:v[0] = @v_shr 24, %_:v[1]
+         EXT(3, 8)
+         //~gfx(7|8|9)_.*! v1: %_:v[0] = @v_bfe %_:v[1], 0, 16
+         //~gfx11_unsigned! v1: %_:v[0] = v_cvt_u32_u16 %_:v[1]
+         //~gfx11_signed! v1: %_:v[0] = v_cvt_i32_i16 %_:v[1]
+         EXT(0, 16)
+         //! v1: %_:v[0] = @v_shr 16, %_:v[1]
+         EXT(1, 16)
+
+#undef EXT
 
 #define EXT(idx, size)                                                                             \
    bld.pseudo(aco_opcode::p_extract, Definition(s0_lo, s1), Definition(scc, s1),                   \
               Operand(s1_lo, s1), Operand::c32(idx), Operand::c32(size), Operand::c32(is_signed));
 
-      //>> p_unit_test 2
-      bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
-      //~gfx.*_unsigned! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x80000
-      //~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1]
-      EXT(0, 8)
-      //! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x80008
-      EXT(1, 8)
-      //! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x80010
-      EXT(2, 8)
-      //! s1: %_:s[0],  s1: %_:scc = @s_shr %_:s[1], 24
-      EXT(3, 8)
-      //~gfx(7|8)_unsigned! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x100000
-      //~gfx(9|11)_unsigned! s1: %_:s[0] = s_pack_ll_b32_b16 %_:s[1], 0
-      //~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1]
-      EXT(0, 16)
-      //! s1: %_:s[0],  s1: %_:scc = @s_shr %_:s[1], 16
-      EXT(1, 16)
-
-      #undef EXT
+         //>> p_unit_test 2
+         bld.pseudo(aco_opcode::p_unit_test, Operand::c32(2u));
+         //~gfx.*_unsigned! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x80000
+         //~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i8 %_:s[1]
+         EXT(0, 8)
+         //! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x80008
+         EXT(1, 8)
+         //! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x80010
+         EXT(2, 8)
+         //! s1: %_:s[0],  s1: %_:scc = @s_shr %_:s[1], 24
+         EXT(3, 8)
+         //~gfx(7|8)_unsigned! s1: %_:s[0],  s1: %_:scc = @s_bfe %_:s[1], 0x100000
+         //~gfx(9|11)_unsigned! s1: %_:s[0] = s_pack_ll_b32_b16 %_:s[1], 0
+         //~gfx.*_signed! s1: %_:s[0] = s_sext_i32_i16 %_:s[1]
+         EXT(0, 16)
+         //! s1: %_:s[0],  s1: %_:scc = @s_shr %_:s[1], 16
+         EXT(1, 16)
+
+#undef EXT
 
 #define EXT(idx, src_b)                                                                            \
    bld.pseudo(aco_opcode::p_extract, Definition(v0_lo, v2b), Operand(v1_lo.advance(src_b), v2b),   \
               Operand::c32(idx), Operand::c32(8u), Operand::c32(is_signed));
 
-      //>> p_unit_test 4
-      bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
-      //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8
-      //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0)
-      //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c00
-      //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060000
-      //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
-      EXT(0, 0)
-      //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2)
-      //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c02
-      //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060202
-      //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
-      if (lvl != GFX7)
-         EXT(0, 2)
-      //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8
-      //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1)
-      //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c01
-      //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060801
-      EXT(1, 0)
-      //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3)
-      //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c03
-      //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060903
-      if (lvl != GFX7)
-         EXT(1, 2)
-
-      #undef EXT
-
-      finish_to_hw_instr_test();
-
-      //! s_endpgm
-   }
+         //>> p_unit_test 4
+         bld.pseudo(aco_opcode::p_unit_test, Operand::c32(4u));
+         //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 0, 8
+         //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(0)
+         //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c00
+         //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060000
+         //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
+         EXT(0, 0)
+         //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(2)
+         //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c02
+         //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060202
+         //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], 0, 0x7060a04
+         if (lvl != GFX7)
+            EXT(0, 2)
+         //~gfx7.*! v2b: %_:v[0][0:16] = @v_bfe %_:v[1][0:16], 8, 8
+         //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][0:16] dst_sel:uword0 dst_preserve src0_sel:@byte(1)
+         //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c01
+         //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060801
+         EXT(1, 0)
+         //~gfx(8|9).*! v2b: %_:v[0][0:16] = v_mov_b32 %_:v[1][16:32] dst_sel:uword0 dst_preserve src0_sel:@byte(3)
+         //~gfx11_unsigned! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060c03
+         //~gfx11_signed! v1: %_:v[0] = v_perm_b32 %_:v[0], %_:v[1], 0x7060903
+         if (lvl != GFX7)
+            EXT(1, 2)
+
+#undef EXT
+
+         finish_to_hw_instr_test();
+
+         //! s_endpgm
+      }
    }
 END_TEST
 
@@ -736,7 +702,7 @@ BEGIN_TEST(to_hw_instr.insert)
       //! v1: %0:v[0] = v_lshlrev_b32 16, %0:v[1]
       INS(1, 16)
 
-      #undef INS
+#undef INS
 
 #define INS(idx, size)                                                                             \
    bld.pseudo(aco_opcode::p_insert, Definition(s0_lo, s1), Definition(scc, s1),                    \
@@ -759,7 +725,7 @@ BEGIN_TEST(to_hw_instr.insert)
       //! s1: %_:s[0],  s1: %_:scc = s_lshl_b32 %_:s[1], 16
       INS(1, 16)
 
-      #undef INS
+#undef INS
 
 #define INS(idx, def_b)                                                                            \
    bld.pseudo(aco_opcode::p_insert, Definition(v0_lo.advance(def_b), v2b), Operand(v1_lo, v2b),    \
@@ -784,7 +750,7 @@ BEGIN_TEST(to_hw_instr.insert)
       if (lvl != GFX7)
          INS(1, 2)
 
-      #undef INS
+#undef INS
 
       finish_to_hw_instr_test();
 
@@ -816,10 +782,9 @@ BEGIN_TEST(to_hw_instr.copy_linear_vgpr_scc)
    //! lv1: %0:v[0] = v_mov_b32 %0:v[1]
    //! s2: %0:exec,  s1: %0:scc = s_not_b64 %0:exec
    //! s1: %0:scc = s_cmp_lg_i32 %0:m0, 0
-   Instruction *instr = bld.pseudo(
-      aco_opcode::p_parallelcopy,
-      Definition(scc, s1), Definition(v0_lo, v1.as_linear()),
-      Operand(reg_s0, s1), Operand(v1_lo, v1.as_linear()));
+   Instruction* instr =
+      bld.pseudo(aco_opcode::p_parallelcopy, Definition(scc, s1), Definition(v0_lo, v1.as_linear()),
+                 Operand(reg_s0, s1), Operand(v1_lo, v1.as_linear()));
    instr->pseudo().scratch_sgpr = m0;
 
    finish_to_hw_instr_test();
@@ -836,10 +801,9 @@ BEGIN_TEST(to_hw_instr.swap_linear_vgpr)
    //>> p_unit_test 0
    bld.pseudo(aco_opcode::p_unit_test, Operand::zero());
 
-   Instruction *instr = bld.pseudo(
-      aco_opcode::p_parallelcopy,
-      Definition(reg_v0, v1_linear), Definition(reg_v1, v1_linear),
-      Operand(reg_v1, v1_linear), Operand(reg_v0, v1_linear));
+   Instruction* instr = bld.pseudo(aco_opcode::p_parallelcopy, Definition(reg_v0, v1_linear),
+                                   Definition(reg_v1, v1_linear), Operand(reg_v1, v1_linear),
+                                   Operand(reg_v0, v1_linear));
    instr->pseudo().scratch_sgpr = m0;
 
    finish_to_hw_instr_test();