"DC untyped atomic float op",
[GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP] =
"DC A64 untyped atomic float op",
+ [GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP] =
+ "DC A64 untyped atomic half-integer op",
+ [GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP] =
+ "DC A64 untyped atomic half-float op",
};
static const char *const aop[16] = {
case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2:
case HSW_DATAPORT_DC_PORT1_ATOMIC_COUNTER_OP_SIMD4X2:
case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
+ case GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP:
control(file, "atomic op", aop, msg_ctrl & 0xf, &space);
break;
case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ:
}
case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
+ case GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP:
format(file, "SIMD%d,", (msg_ctrl & (1 << 4)) ? 8 : 16);
control(file, "atomic float op", aop_float, msg_ctrl & 0xf,
&space);
{
assert(exec_size == 8);
assert(devinfo->gen >= 8);
- assert(bit_size == 32 || bit_size == 64);
+ assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
+ assert(devinfo->gen >= 12 || bit_size >= 32);
- const unsigned msg_type = GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
+ const unsigned msg_type = bit_size == 16 ?
+ GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP :
+ GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP;
const unsigned msg_control =
SET_BITS(atomic_op, 3, 0) |
static inline uint32_t
brw_dp_a64_untyped_atomic_float_desc(const struct gen_device_info *devinfo,
ASSERTED unsigned exec_size,
+ unsigned bit_size,
unsigned atomic_op,
bool response_expected)
{
assert(exec_size == 8);
assert(devinfo->gen >= 9);
+ assert(bit_size == 16 || bit_size == 32);
+ assert(devinfo->gen >= 12 || bit_size == 32);
assert(exec_size > 0);
- const unsigned msg_type = GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP;
+ const unsigned msg_type = bit_size == 32 ?
+ GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP :
+ GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP;
const unsigned msg_control =
SET_BITS(atomic_op, 1, 0) |
SHADER_OPCODE_A64_UNALIGNED_OWORD_BLOCK_READ_LOGICAL,
SHADER_OPCODE_A64_OWORD_BLOCK_WRITE_LOGICAL,
SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
+ SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL,
SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL,
- SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL,
+ SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL,
+ SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL,
SHADER_OPCODE_TYPED_ATOMIC_LOGICAL,
SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL,
#define GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ 0x10
#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ 0x11
#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP 0x12
+#define GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP 0x13
#define GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_READ 0x14
#define GEN9_DATAPORT_DC_PORT1_A64_OWORD_BLOCK_WRITE 0x15
#define GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE 0x19
#define GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE 0x1a
#define GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP 0x1b
#define GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP 0x1d
+#define GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP 0x1e
/* GEN9 */
#define GEN9_DATAPORT_RC_RENDER_TARGET_WRITE 12
return i == 1 ? src[2].ud : 1;
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
assert(src[2].file == IMM);
if (i == 1) {
return 1;
}
- case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:
assert(src[2].file == IMM);
if (i == 1) {
/* Data source */
!inst->dst.is_null());
break;
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:
+ desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, 16,
+ arg, /* atomic_op */
+ !inst->dst.is_null());
+ break;
+
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
desc = brw_dp_a64_untyped_atomic_desc(devinfo, inst->exec_size, 64,
arg, /* atomic_op */
!inst->dst.is_null());
break;
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:
+ desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size,
+ 16, /* bit_size */
+ arg, /* atomic_op */
+ !inst->dst.is_null());
+ break;
- case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:
desc = brw_dp_a64_untyped_atomic_float_desc(devinfo, inst->exec_size,
+ 32, /* bit_size */
arg, /* atomic_op */
!inst->dst.is_null());
break;
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
- case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:
lower_a64_logical_send(ibld, inst);
break;
return inst->exec_size;
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
- case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:
return 8;
case SHADER_OPCODE_URB_READ_SIMD8:
dest, srcs, SURFACE_LOGICAL_NUM_SRCS);
}
+static fs_reg
+expand_to_32bit(const fs_builder &bld, const fs_reg &src)
+{
+ if (type_sz(src.type) == 2) {
+ fs_reg src32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
+ bld.MOV(src32, retype(src, BRW_REGISTER_TYPE_UW));
+ return src32;
+ } else {
+ return src;
+ }
+}
+
void
fs_visitor::nir_emit_global_atomic(const fs_builder &bld,
int op, nir_intrinsic_instr *instr)
fs_reg data;
if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
- data = get_nir_src(instr->src[1]);
+ data = expand_to_32bit(bld, get_nir_src(instr->src[1]));
if (op == BRW_AOP_CMPWR) {
fs_reg tmp = bld.vgrf(data.type, 2);
- fs_reg sources[2] = { data, get_nir_src(instr->src[2]) };
+ fs_reg sources[2] = {
+ data,
+ expand_to_32bit(bld, get_nir_src(instr->src[2]))
+ };
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
data = tmp;
}
- if (nir_dest_bit_size(instr->dest) == 64) {
- bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL,
- dest, addr, data, brw_imm_ud(op));
- } else {
- assert(nir_dest_bit_size(instr->dest) == 32);
+ switch (nir_dest_bit_size(instr->dest)) {
+ case 16: {
+ fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
+ bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL,
+ dest32, addr, data, brw_imm_ud(op));
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), dest32);
+ break;
+ }
+ case 32:
bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
dest, addr, data, brw_imm_ud(op));
+ break;
+ case 64:
+ bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL,
+ dest, addr, data, brw_imm_ud(op));
+ break;
+ default:
+ unreachable("Unsupported bit size");
}
}
fs_reg addr = get_nir_src(instr->src[0]);
assert(op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC);
- fs_reg data = get_nir_src(instr->src[1]);
+ fs_reg data = expand_to_32bit(bld, get_nir_src(instr->src[1]));
if (op == BRW_AOP_FCMPWR) {
fs_reg tmp = bld.vgrf(data.type, 2);
- fs_reg sources[2] = { data, get_nir_src(instr->src[2]) };
+ fs_reg sources[2] = {
+ data,
+ expand_to_32bit(bld, get_nir_src(instr->src[2]))
+ };
bld.LOAD_PAYLOAD(tmp, sources, 2, 0);
data = tmp;
}
- bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL,
- dest, addr, data, brw_imm_ud(op));
+ switch (nir_dest_bit_size(instr->dest)) {
+ case 16: {
+ fs_reg dest32 = bld.vgrf(BRW_REGISTER_TYPE_UD);
+ bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL,
+ dest32, addr, data, brw_imm_ud(op));
+ bld.MOV(retype(dest, BRW_REGISTER_TYPE_UW), dest32);
+ break;
+ }
+ case 32:
+ bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL,
+ dest, addr, data, brw_imm_ud(op));
+ break;
+ default:
+ unreachable("Unsupported bit size");
+ }
}
void
case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
+ case GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_INT_OP:
+ case GEN12_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_HALF_FLOAT_OP:
/* See also GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP */
latency = 14000;
break;
return "a64_byte_scattered_write_logical";
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
return "a64_untyped_atomic_logical";
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:
+ return "a64_untyped_atomic_int16_logical";
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
return "a64_untyped_atomic_int64_logical";
- case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
- return "a64_untyped_atomic_float_logical";
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:
+ return "a64_untyped_atomic_float16_logical";
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:
+ return "a64_untyped_atomic_float32_logical";
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
return "typed_atomic_logical";
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT16_LOGICAL:
case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
- case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT16_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT32_LOGICAL:
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: