struct brw_reg desc,
unsigned desc_imm);
+void
+brw_send_indirect_split_message(struct brw_codegen *p,
+ unsigned sfid,
+ struct brw_reg dst,
+ struct brw_reg payload0,
+ struct brw_reg payload1,
+ struct brw_reg desc,
+ unsigned desc_imm,
+ struct brw_reg ex_desc,
+ unsigned ex_desc_imm);
+
void brw_ff_sync(struct brw_codegen *p,
struct brw_reg dest,
unsigned msg_reg_nr,
gen7_convert_mrf_to_grf(p, &dest);
- {
+ if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
+ assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
+ dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
+ assert(dest.address_mode == BRW_ADDRESS_DIRECT);
+ assert(dest.subnr % 16 == 0);
+ assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ dest.vstride == dest.width + 1);
+ assert(!dest.negate && !dest.abs);
+ brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
+ brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
+ brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file);
+ } else {
brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
gen7_convert_mrf_to_grf(p, ®);
- if (devinfo->gen >= 6 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
- brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
+ if (devinfo->gen >= 6 &&
+ (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) {
/* Any source modifiers or regions will be ignored, since this just
* identifies the MRF/GRF to start reading the message contents from.
* Check for some likely failures.
assert(reg.address_mode == BRW_ADDRESS_DIRECT);
}
- {
+ if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
+ assert(reg.file == BRW_GENERAL_REGISTER_FILE);
+ assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+ assert(reg.subnr % 16 == 0);
+ assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ reg.vstride == reg.width + 1);
+ assert(!reg.negate && !reg.abs);
+ brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
+ brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
+ } else {
brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
brw_inst_set_src0_abs(devinfo, inst, reg.abs);
brw_inst_set_src0_negate(devinfo, inst, reg.negate);
if (reg.file == BRW_GENERAL_REGISTER_FILE)
assert(reg.nr < 128);
- {
+ if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
+ assert(reg.file == BRW_GENERAL_REGISTER_FILE ||
+ reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
+ assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+ assert(reg.subnr == 0);
+ assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+ reg.vstride == reg.width + 1);
+ assert(!reg.negate && !reg.abs);
+ brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr);
+ brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file);
+ } else {
/* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
*
* "Accumulator registers may be accessed explicitly as src0
brw_inst_set_sfid(devinfo, send, sfid);
}
+void
+brw_send_indirect_split_message(struct brw_codegen *p,
+ unsigned sfid,
+ struct brw_reg dst,
+ struct brw_reg payload0,
+ struct brw_reg payload1,
+ struct brw_reg desc,
+ unsigned desc_imm,
+ struct brw_reg ex_desc,
+ unsigned ex_desc_imm)
+{
+ const struct gen_device_info *devinfo = p->devinfo;
+ struct brw_inst *send;
+
+ dst = retype(dst, BRW_REGISTER_TYPE_UW);
+
+ assert(desc.type == BRW_REGISTER_TYPE_UD);
+
+ if (desc.file == BRW_IMMEDIATE_VALUE) {
+ desc.ud |= desc_imm;
+ } else {
+ struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
+ brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* Load the indirect descriptor to an address register using OR so the
+ * caller can specify additional descriptor bits with the desc_imm
+ * immediate.
+ */
+ brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
+
+ brw_pop_insn_state(p);
+ desc = addr;
+ }
+
+ if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
+ ex_desc.ud |= ex_desc_imm;
+ } else {
+ struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD);
+
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_exec_size(p, BRW_EXECUTE_1);
+ brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+
+ /* Load the indirect extended descriptor to an address register using OR
+ * so the caller can specify additional descriptor bits with the
+ * desc_imm immediate.
+ *
+ * Even though the instruction dispatcher always pulls the SFID from the
+ * instruction itself, the extended descriptor sent to the actual unit
+ * gets the SFID from the extended descriptor which comes from the
+ * address register. If we don't OR it in, the external unit gets
+ * confused and hangs the GPU.
+ */
+ brw_OR(p, addr, ex_desc, brw_imm_ud(ex_desc_imm | sfid));
+
+ brw_pop_insn_state(p);
+ ex_desc = addr;
+ }
+
+ send = next_insn(p, BRW_OPCODE_SENDS);
+ brw_set_dest(p, send, dst);
+ brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD));
+ brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD));
+
+ if (desc.file == BRW_IMMEDIATE_VALUE) {
+ brw_inst_set_send_sel_reg32_desc(devinfo, send, 0);
+ brw_inst_set_send_desc(devinfo, send, desc.ud);
+ } else {
+ assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
+ assert(desc.nr == BRW_ARF_ADDRESS);
+ assert(desc.subnr == 0);
+ brw_inst_set_send_sel_reg32_desc(devinfo, send, 1);
+ }
+
+ if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
+ brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
+ brw_inst_set_send_ex_desc(devinfo, send, ex_desc.ud);
+ } else {
+ assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
+ assert(ex_desc.nr == BRW_ARF_ADDRESS);
+ assert((ex_desc.subnr & 0x3) == 0);
+ brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
+ brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2);
+ }
+
+ brw_inst_set_sfid(devinfo, send, sfid);
+}
+
static void
brw_send_indirect_surface_message(struct brw_codegen *p,
unsigned sfid,
}
}
+static bool
+inst_is_split_send(const struct gen_device_info *devinfo, const brw_inst *inst)
+{
+ switch (brw_inst_opcode(devinfo, inst)) {
+ case BRW_OPCODE_SENDS:
+ case BRW_OPCODE_SENDSC:
+ return true;
+ default:
+ return false;
+ }
+}
+
static unsigned
signed_type(unsigned type)
{
if (num_sources == 3)
return (struct string){};
+ /* Nothing to test. Split sends can only encode a file in sources that are
+ * allowed to be NULL.
+ */
+ if (inst_is_split_send(devinfo, inst))
+ return (struct string){};
+
if (num_sources >= 1)
ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
{
struct string error_msg = { .str = NULL, .len = 0 };
- if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
- brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) {
+ if (inst_is_split_send(devinfo, inst)) {
+ ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
+ brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL,
+ "src1 of split send must be a GRF or NULL");
+
+ ERROR_IF(brw_inst_eot(devinfo, inst) &&
+ brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
+ "send with EOT must use g112-g127");
+ ERROR_IF(brw_inst_eot(devinfo, inst) &&
+ brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE &&
+ brw_inst_send_src1_reg_nr(devinfo, inst) < 112,
+ "send with EOT must use g112-g127");
+
+ if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) {
+ /* Assume minimums if we don't know */
+ unsigned mlen = 1;
+ if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) {
+ const uint32_t desc = brw_inst_send_desc(devinfo, inst);
+ mlen = brw_message_desc_mlen(devinfo, desc);
+ }
+
+ unsigned ex_mlen = 1;
+ if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) {
+ const uint32_t ex_desc = brw_inst_send_ex_desc(devinfo, inst);
+ ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc);
+ }
+ const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst);
+ const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst);
+ ERROR_IF((src0_reg_nr <= src1_reg_nr &&
+ src1_reg_nr < src0_reg_nr + mlen) ||
+ (src1_reg_nr <= src0_reg_nr &&
+ src0_reg_nr < src1_reg_nr + ex_mlen),
+ "split send payloads must not overlap");
+ }
+ } else if (inst_is_send(devinfo, inst)) {
ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
"send must use direct addressing");
if (num_sources == 3)
return (struct string){};
+ /* Split sends don't have the bits in the instruction to encode regions so
+ * there's nothing to check.
+ */
+ if (inst_is_split_send(devinfo, inst))
+ return (struct string){};
+
if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) {
if (desc->ndst != 0 && !dst_is_null(devinfo, inst))
ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1,
if (num_sources == 3 || num_sources == 0)
return (struct string){};
+ /* Split sends don't have types so there's no doubles there. */
+ if (inst_is_split_send(devinfo, inst))
+ return (struct string){};
+
enum brw_reg_type exec_type = execution_type(devinfo, inst);
unsigned exec_type_size = brw_reg_type_to_size(exec_type);
FC(gen4_pop_count, 115, 112, devinfo->gen < 6)
/** @} */
+/**
+ * SEND instructions:
+ * @{
+ */
+FC(send_ex_desc_ia_subreg_nr, 82, 80, devinfo->gen >= 9)
+FC(send_src0_address_mode, 79, 79, devinfo->gen >= 9)
+FC(send_sel_reg32_desc, 77, 77, devinfo->gen >= 9)
+FC(send_sel_reg32_ex_desc, 61, 61, devinfo->gen >= 9)
+FC(send_src1_reg_nr, 51, 44, devinfo->gen >= 9)
+FC(send_src1_reg_file, 36, 36, devinfo->gen >= 9)
+FC(send_dst_reg_file, 35, 35, devinfo->gen >= 9)
+/** @} */
+
/* Message descriptor bits */
#define MD(x) ((x) + 96)
brw_inst *inst, uint32_t value)
{
assert(devinfo->gen >= 9);
- brw_inst_set_bits(inst, 94, 91, GET_BITS(value, 31, 28));
- brw_inst_set_bits(inst, 88, 85, GET_BITS(value, 27, 24));
- brw_inst_set_bits(inst, 83, 80, GET_BITS(value, 23, 20));
- brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 19, 16));
- assert(GET_BITS(value, 15, 0) == 0);
+ if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) {
+ brw_inst_set_bits(inst, 94, 91, GET_BITS(value, 31, 28));
+ brw_inst_set_bits(inst, 88, 85, GET_BITS(value, 27, 24));
+ brw_inst_set_bits(inst, 83, 80, GET_BITS(value, 23, 20));
+ brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 19, 16));
+ assert(GET_BITS(value, 15, 0) == 0);
+ } else {
+ assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC);
+ brw_inst_set_bits(inst, 95, 80, GET_BITS(value, 31, 16));
+ assert(GET_BITS(value, 15, 10) == 0);
+ brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 9, 6));
+ assert(GET_BITS(value, 5, 0) == 0);
+ }
}
/**
const brw_inst *inst)
{
assert(devinfo->gen >= 9);
- return (brw_inst_bits(inst, 94, 91) << 28 |
- brw_inst_bits(inst, 88, 85) << 24 |
- brw_inst_bits(inst, 83, 80) << 20 |
- brw_inst_bits(inst, 67, 64) << 16);
+ if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) {
+ return (brw_inst_bits(inst, 94, 91) << 28 |
+ brw_inst_bits(inst, 88, 85) << 24 |
+ brw_inst_bits(inst, 83, 80) << 20 |
+ brw_inst_bits(inst, 67, 64) << 16);
+ } else {
+ assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+ brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC);
+ return (brw_inst_bits(inst, 95, 80) << 16 |
+ brw_inst_bits(inst, 67, 64) << 6);
+ }
}
/**
* Compared to Align1, these are missing the low 4 bits.
* -Gen 4- ----Gen8----
*/
-BRW_IA16_ADDR_IMM(src1, 105, 96, 121, 104, 100)
-BRW_IA16_ADDR_IMM(src0, 73, 64, 95, 72, 68)
-BRW_IA16_ADDR_IMM(dst, 57, 52, 47, 56, 52)
+BRW_IA16_ADDR_IMM(src1, 105, 96, 121, 104, 100)
+BRW_IA16_ADDR_IMM(src0, 73, 64, 95, 72, 68)
+BRW_IA16_ADDR_IMM(dst, 57, 52, 47, 56, 52)
+BRW_IA16_ADDR_IMM(send_src0, -1, -1, 78, 72, 68)
+BRW_IA16_ADDR_IMM(send_dst, -1, -1, 62, 56, 52)
/**
* Fetch a set of contiguous bits from the instruction.