index_op.setLateKill(true);
input_data.setLateKill(true);
- return bld.pseudo(aco_opcode::p_bpermute, bld.def(v1), bld.def(bld.lm), bld.def(bld.lm, vcc),
- index_op, input_data);
+ return bld.pseudo(aco_opcode::p_bpermute_gfx6, bld.def(v1), bld.def(bld.lm),
+ bld.def(bld.lm, vcc), index_op, input_data);
} else if (ctx->options->gfx_level >= GFX10 && ctx->program->wave_size == 64) {
/* GFX10 wave64 mode: emulate full-wave bpermute */
* Note, that these have twice the allocation granularity of normal VGPRs */
ctx->program->config->num_shared_vgprs = 2 * ctx->program->dev.vgpr_alloc_granule;
- return bld.pseudo(aco_opcode::p_bpermute, bld.def(v1), bld.def(s2), bld.def(s1, scc),
+ return bld.pseudo(aco_opcode::p_bpermute_gfx10w64, bld.def(v1), bld.def(s2), bld.def(s1, scc),
index_x4, input_data, same_half);
} else {
/* GFX8-9 or GFX10 wave32: bpermute works normally */
}
break;
}
- case aco_opcode::p_bpermute: {
- if (ctx.program->gfx_level <= GFX7)
- emit_gfx6_bpermute(program, instr, bld);
- else if (ctx.program->gfx_level >= GFX10 && ctx.program->wave_size == 64)
- emit_gfx10_wave64_bpermute(program, instr, bld);
- else
- unreachable("Current hardware supports ds_bpermute, don't emit p_bpermute.");
+ case aco_opcode::p_bpermute_gfx6: {
+ emit_gfx6_bpermute(program, instr, bld);
+ break;
+ }
+ case aco_opcode::p_bpermute_gfx10w64: {
+ emit_gfx10_wave64_bpermute(program, instr, bld);
break;
}
case aco_opcode::p_constaddr: {
opcode("p_is_helper")
opcode("p_exit_early_if")
-# simulates proper bpermute behavior when it's unsupported, eg. GFX10 wave64
-opcode("p_bpermute")
+# simulates proper bpermute behavior on GFX6
+# definitions: result VGPR, temp EXEC, clobbered VCC
+# operands: index, input data
+opcode("p_bpermute_gfx6")
+
+# simulates proper bpermute behavior on GFX10
+# definitions: result VGPR, temp EXEC, clobbered SCC
+# operands: index * 4, input data, same half (bool)
+opcode("p_bpermute_gfx10w64")
# creates a lane mask where only the first active lane is selected
opcode("p_elect")
case aco_opcode::v_readfirstlane_b32:
case aco_opcode::p_extract:
case aco_opcode::p_insert: return operand != 0;
- case aco_opcode::p_bpermute:
+ case aco_opcode::p_bpermute_gfx6:
+ case aco_opcode::p_bpermute_gfx10w64:
case aco_opcode::p_interp_gfx11:
case aco_opcode::p_dual_src_export_gfx11: return false;
default: return true;