}
static void
+emit_urb_direct_vec4_write_xe2(const fs_builder &bld,
+ unsigned offset_in_bytes,
+ const fs_reg &src,
+ fs_reg urb_handle,
+ unsigned comps,
+ unsigned mask)
+{
+ const struct intel_device_info *devinfo = bld.shader->devinfo;
+ const unsigned runit = reg_unit(devinfo);
+ const unsigned write_size = 8 * runit;
+
+ if (offset_in_bytes > 0) {
+ fs_builder bldall = bld.group(write_size, 0).exec_all();
+ fs_reg new_handle = bldall.vgrf(BRW_REGISTER_TYPE_UD);
+ bldall.ADD(new_handle, urb_handle, brw_imm_ud(offset_in_bytes));
+ urb_handle = new_handle;
+ }
+
+ for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) {
+ fs_builder hbld = bld.group(write_size, q);
+
+ fs_reg payload_srcs[comps];
+
+ for (unsigned c = 0; c < comps; c++)
+ payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q);
+
+ fs_reg srcs[URB_LOGICAL_NUM_SRCS];
+ srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
+ srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
+ int nr = bld.shader->alloc.allocate(comps * runit);
+ srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, nr, BRW_REGISTER_TYPE_F);
+ srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps);
+ hbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
+
+ hbld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
+ reg_undef, srcs, ARRAY_SIZE(srcs));
+ }
+}
+
+static void
+emit_urb_direct_writes_xe2(const fs_builder &bld, nir_intrinsic_instr *instr,
+ const fs_reg &src, fs_reg urb_handle)
+{
+ assert(nir_src_bit_size(instr->src[0]) == 32);
+
+ nir_src *offset_nir_src = nir_get_io_offset_src(instr);
+ assert(nir_src_is_const(*offset_nir_src));
+
+ const unsigned comps = nir_src_num_components(instr->src[0]);
+ assert(comps <= 4);
+
+ const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
+ nir_src_as_uint(*offset_nir_src) +
+ component_from_intrinsic(instr);
+
+ const unsigned mask = nir_intrinsic_write_mask(instr);
+
+ emit_urb_direct_vec4_write_xe2(bld, offset_in_dwords * 4, src,
+ urb_handle, comps, mask);
+}
+
+static void
emit_urb_indirect_vec4_write(const fs_builder &bld,
const fs_reg &offset_src,
unsigned base,
}
static void
+emit_urb_indirect_writes_xe2(const fs_builder &bld, nir_intrinsic_instr *instr,
+ const fs_reg &src, const fs_reg &offset_src,
+ fs_reg urb_handle)
+{
+ assert(nir_src_bit_size(instr->src[0]) == 32);
+
+ const struct intel_device_info *devinfo = bld.shader->devinfo;
+ const unsigned runit = reg_unit(devinfo);
+ const unsigned write_size = 8 * runit;
+
+ const unsigned comps = nir_src_num_components(instr->src[0]);
+ assert(comps <= 4);
+
+ const unsigned base_in_dwords = nir_intrinsic_base(instr) +
+ component_from_intrinsic(instr);
+
+ if (base_in_dwords > 0) {
+ fs_builder bldall = bld.group(write_size, 0).exec_all();
+ fs_reg new_handle = bldall.vgrf(BRW_REGISTER_TYPE_UD);
+ bldall.ADD(new_handle, urb_handle, brw_imm_ud(base_in_dwords * 4));
+ urb_handle = new_handle;
+ }
+
+ const unsigned mask = nir_intrinsic_write_mask(instr);
+
+ for (unsigned q = 0; q < bld.dispatch_width() / write_size; q++) {
+ fs_builder wbld = bld.group(write_size, q);
+
+ fs_reg payload_srcs[comps];
+
+ for (unsigned c = 0; c < comps; c++)
+ payload_srcs[c] = horiz_offset(offset(src, bld, c), write_size * q);
+
+ fs_reg addr = wbld.vgrf(BRW_REGISTER_TYPE_UD);
+ wbld.SHL(addr, horiz_offset(offset_src, write_size * q), brw_imm_ud(2));
+ wbld.ADD(addr, addr, urb_handle);
+
+ fs_reg srcs[URB_LOGICAL_NUM_SRCS];
+ srcs[URB_LOGICAL_SRC_HANDLE] = addr;
+ srcs[URB_LOGICAL_SRC_CHANNEL_MASK] = brw_imm_ud(mask << 16);
+ int nr = bld.shader->alloc.allocate(comps * runit);
+ srcs[URB_LOGICAL_SRC_DATA] = fs_reg(VGRF, nr, BRW_REGISTER_TYPE_F);
+ srcs[URB_LOGICAL_SRC_COMPONENTS] = brw_imm_ud(comps);
+ wbld.LOAD_PAYLOAD(srcs[URB_LOGICAL_SRC_DATA], payload_srcs, comps, 0);
+
+ wbld.emit(SHADER_OPCODE_URB_WRITE_LOGICAL,
+ reg_undef, srcs, ARRAY_SIZE(srcs));
+ }
+}
+
+static void
emit_urb_indirect_writes(const fs_builder &bld, nir_intrinsic_instr *instr,
const fs_reg &src, const fs_reg &offset_src,
fs_reg urb_handle)
}
static void
+emit_urb_direct_reads_xe2(const fs_builder &bld, nir_intrinsic_instr *instr,
+ const fs_reg &dest, fs_reg urb_handle)
+{
+ assert(instr->def.bit_size == 32);
+
+ unsigned comps = instr->def.num_components;
+ if (comps == 0)
+ return;
+
+ nir_src *offset_nir_src = nir_get_io_offset_src(instr);
+ assert(nir_src_is_const(*offset_nir_src));
+
+ fs_builder ubld16 = bld.group(16, 0).exec_all();
+
+ const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
+ nir_src_as_uint(*offset_nir_src) +
+ component_from_intrinsic(instr);
+
+ if (offset_in_dwords > 0) {
+ fs_reg new_handle = ubld16.vgrf(BRW_REGISTER_TYPE_UD);
+ ubld16.ADD(new_handle, urb_handle, brw_imm_ud(offset_in_dwords * 4));
+ urb_handle = new_handle;
+ }
+
+ fs_reg data = ubld16.vgrf(BRW_REGISTER_TYPE_UD, comps);
+ fs_reg srcs[URB_LOGICAL_NUM_SRCS];
+ srcs[URB_LOGICAL_SRC_HANDLE] = urb_handle;
+
+ fs_inst *inst = ubld16.emit(SHADER_OPCODE_URB_READ_LOGICAL,
+ data, srcs, ARRAY_SIZE(srcs));
+ inst->size_written = 2 * comps * REG_SIZE;
+
+ for (unsigned c = 0; c < comps; c++) {
+ fs_reg dest_comp = offset(dest, bld, c);
+ fs_reg data_comp = horiz_stride(offset(data, ubld16, c), 0);
+ bld.MOV(retype(dest_comp, BRW_REGISTER_TYPE_UD), data_comp);
+ }
+}
+
+static void
emit_urb_indirect_reads(const fs_builder &bld, nir_intrinsic_instr *instr,
const fs_reg &dest, const fs_reg &offset_src, fs_reg urb_handle)
{
}
}
+static void
+emit_urb_indirect_reads_xe2(const fs_builder &bld, nir_intrinsic_instr *instr,
+ const fs_reg &dest, const fs_reg &offset_src,
+ fs_reg urb_handle)
+{
+ assert(instr->def.bit_size == 32);
+
+ unsigned comps = instr->def.num_components;
+ if (comps == 0)
+ return;
+
+ fs_builder ubld16 = bld.group(16, 0).exec_all();
+
+ const unsigned offset_in_dwords = nir_intrinsic_base(instr) +
+ component_from_intrinsic(instr);
+
+ if (offset_in_dwords > 0) {
+ fs_reg new_handle = ubld16.vgrf(BRW_REGISTER_TYPE_UD);
+ ubld16.ADD(new_handle, urb_handle, brw_imm_ud(offset_in_dwords * 4));
+ urb_handle = new_handle;
+ }
+
+ fs_reg data = ubld16.vgrf(BRW_REGISTER_TYPE_UD, comps);
+
+
+ for (unsigned q = 0; q < bld.dispatch_width() / 16; q++) {
+ fs_builder wbld = bld.group(16, q);
+
+ fs_reg addr = wbld.vgrf(BRW_REGISTER_TYPE_UD);
+ wbld.SHL(addr, horiz_offset(offset_src, 16 * q), brw_imm_ud(2));
+ wbld.ADD(addr, addr, urb_handle);
+
+ fs_reg srcs[URB_LOGICAL_NUM_SRCS];
+ srcs[URB_LOGICAL_SRC_HANDLE] = addr;
+
+ fs_inst *inst = wbld.emit(SHADER_OPCODE_URB_READ_LOGICAL,
+ data, srcs, ARRAY_SIZE(srcs));
+ inst->size_written = 2 * comps * REG_SIZE;
+
+ for (unsigned c = 0; c < comps; c++) {
+ fs_reg dest_comp = horiz_offset(offset(dest, bld, c), 16 * q);
+ fs_reg data_comp = offset(data, wbld, c);
+ wbld.MOV(retype(dest_comp, BRW_REGISTER_TYPE_UD), data_comp);
+ }
+ }
+}
+
void
fs_visitor::emit_task_mesh_store(const fs_builder &bld, nir_intrinsic_instr *instr,
const fs_reg &urb_handle)
nir_src *offset_nir_src = nir_get_io_offset_src(instr);
if (nir_src_is_const(*offset_nir_src)) {
- emit_urb_direct_writes(bld, instr, src, urb_handle);
+ if (bld.shader->devinfo->ver >= 20)
+ emit_urb_direct_writes_xe2(bld, instr, src, urb_handle);
+ else
+ emit_urb_direct_writes(bld, instr, src, urb_handle);
} else {
+ if (bld.shader->devinfo->ver >= 20) {
+ emit_urb_indirect_writes_xe2(bld, instr, src, get_nir_src(*offset_nir_src), urb_handle);
+ return;
+ }
bool use_mod = false;
unsigned mod;
* a single large aligned read instead one per component.
*/
- if (nir_src_is_const(*offset_nir_src))
- emit_urb_direct_reads(bld, instr, dest, urb_handle);
- else
- emit_urb_indirect_reads(bld, instr, dest, get_nir_src(*offset_nir_src), urb_handle);
+ if (nir_src_is_const(*offset_nir_src)) {
+ if (bld.shader->devinfo->ver >= 20)
+ emit_urb_direct_reads_xe2(bld, instr, dest, urb_handle);
+ else
+ emit_urb_direct_reads(bld, instr, dest, urb_handle);
+ } else {
+ if (bld.shader->devinfo->ver >= 20)
+ emit_urb_indirect_reads_xe2(bld, instr, dest, get_nir_src(*offset_nir_src), urb_handle);
+ else
+ emit_urb_indirect_reads(bld, instr, dest, get_nir_src(*offset_nir_src), urb_handle);
+ }
}
void