intel/eu: Add support for the SENDS[C] messages
authorJason Ekstrand <jason.ekstrand@intel.com>
Thu, 15 Nov 2018 21:17:06 +0000 (15:17 -0600)
committerJason Ekstrand <jason@jlekstrand.net>
Tue, 29 Jan 2019 18:43:55 +0000 (18:43 +0000)
Reviewed-by: Iago Toral Quiroga <itoral@igalia.com>
src/intel/compiler/brw_eu.h
src/intel/compiler/brw_eu_emit.c
src/intel/compiler/brw_eu_validate.c
src/intel/compiler/brw_inst.h

index 2c4ccae..a7041ea 100644 (file)
@@ -811,6 +811,17 @@ brw_send_indirect_message(struct brw_codegen *p,
                           struct brw_reg desc,
                           unsigned desc_imm);
 
+void
+brw_send_indirect_split_message(struct brw_codegen *p,
+                                unsigned sfid,
+                                struct brw_reg dst,
+                                struct brw_reg payload0,
+                                struct brw_reg payload1,
+                                struct brw_reg desc,
+                                unsigned desc_imm,
+                                struct brw_reg ex_desc,
+                                unsigned ex_desc_imm);
+
 void brw_ff_sync(struct brw_codegen *p,
                   struct brw_reg dest,
                   unsigned msg_reg_nr,
index 06084dc..9be82d1 100644 (file)
@@ -96,7 +96,19 @@ brw_set_dest(struct brw_codegen *p, brw_inst *inst, struct brw_reg dest)
 
    gen7_convert_mrf_to_grf(p, &dest);
 
-   {
+   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
+      assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
+             dest.file == BRW_ARCHITECTURE_REGISTER_FILE);
+      assert(dest.address_mode == BRW_ADDRESS_DIRECT);
+      assert(dest.subnr % 16 == 0);
+      assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+             dest.vstride == dest.width + 1);
+      assert(!dest.negate && !dest.abs);
+      brw_inst_set_dst_da_reg_nr(devinfo, inst, dest.nr);
+      brw_inst_set_dst_da16_subreg_nr(devinfo, inst, dest.subnr / 16);
+      brw_inst_set_send_dst_reg_file(devinfo, inst, dest.file);
+   } else {
       brw_inst_set_dst_file_type(devinfo, inst, dest.file, dest.type);
       brw_inst_set_dst_address_mode(devinfo, inst, dest.address_mode);
 
@@ -177,8 +189,11 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
 
    gen7_convert_mrf_to_grf(p, &reg);
 
-   if (devinfo->gen >= 6 && (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
-                             brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC)) {
+   if (devinfo->gen >= 6 &&
+       (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+        brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC ||
+        brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+        brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC)) {
       /* Any source modifiers or regions will be ignored, since this just
        * identifies the MRF/GRF to start reading the message contents from.
        * Check for some likely failures.
@@ -188,7 +203,17 @@ brw_set_src0(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
    }
 
-   {
+   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
+      assert(reg.file == BRW_GENERAL_REGISTER_FILE);
+      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+      assert(reg.subnr % 16 == 0);
+      assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+             reg.vstride == reg.width + 1);
+      assert(!reg.negate && !reg.abs);
+      brw_inst_set_src0_da_reg_nr(devinfo, inst, reg.nr);
+      brw_inst_set_src0_da16_subreg_nr(devinfo, inst, reg.subnr / 16);
+   } else {
       brw_inst_set_src0_file_type(devinfo, inst, reg.file, reg.type);
       brw_inst_set_src0_abs(devinfo, inst, reg.abs);
       brw_inst_set_src0_negate(devinfo, inst, reg.negate);
@@ -282,7 +307,18 @@ brw_set_src1(struct brw_codegen *p, brw_inst *inst, struct brw_reg reg)
    if (reg.file == BRW_GENERAL_REGISTER_FILE)
       assert(reg.nr < 128);
 
-   {
+   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC) {
+      assert(reg.file == BRW_GENERAL_REGISTER_FILE ||
+             reg.file == BRW_ARCHITECTURE_REGISTER_FILE);
+      assert(reg.address_mode == BRW_ADDRESS_DIRECT);
+      assert(reg.subnr == 0);
+      assert(reg.hstride == BRW_HORIZONTAL_STRIDE_1 &&
+             reg.vstride == reg.width + 1);
+      assert(!reg.negate && !reg.abs);
+      brw_inst_set_send_src1_reg_nr(devinfo, inst, reg.nr);
+      brw_inst_set_send_src1_reg_file(devinfo, inst, reg.file);
+   } else {
       /* From the IVB PRM Vol. 4, Pt. 3, Section 3.3.3.5:
        *
        *    "Accumulator registers may be accessed explicitly as src0
@@ -2484,6 +2520,101 @@ brw_send_indirect_message(struct brw_codegen *p,
    brw_inst_set_sfid(devinfo, send, sfid);
 }
 
+void
+brw_send_indirect_split_message(struct brw_codegen *p,
+                                unsigned sfid,
+                                struct brw_reg dst,
+                                struct brw_reg payload0,
+                                struct brw_reg payload1,
+                                struct brw_reg desc,
+                                unsigned desc_imm,
+                                struct brw_reg ex_desc,
+                                unsigned ex_desc_imm)
+{
+   const struct gen_device_info *devinfo = p->devinfo;
+   struct brw_inst *send;
+
+   dst = retype(dst, BRW_REGISTER_TYPE_UW);
+
+   assert(desc.type == BRW_REGISTER_TYPE_UD);
+
+   if (desc.file == BRW_IMMEDIATE_VALUE) {
+      desc.ud |= desc_imm;
+   } else {
+      struct brw_reg addr = retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
+
+      brw_push_insn_state(p);
+      brw_set_default_access_mode(p, BRW_ALIGN_1);
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_default_exec_size(p, BRW_EXECUTE_1);
+      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+
+      /* Load the indirect descriptor to an address register using OR so the
+       * caller can specify additional descriptor bits with the desc_imm
+       * immediate.
+       */
+      brw_OR(p, addr, desc, brw_imm_ud(desc_imm));
+
+      brw_pop_insn_state(p);
+      desc = addr;
+   }
+
+   if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
+      ex_desc.ud |= ex_desc_imm;
+   } else {
+      struct brw_reg addr = retype(brw_address_reg(2), BRW_REGISTER_TYPE_UD);
+
+      brw_push_insn_state(p);
+      brw_set_default_access_mode(p, BRW_ALIGN_1);
+      brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+      brw_set_default_exec_size(p, BRW_EXECUTE_1);
+      brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
+
+      /* Load the indirect extended descriptor to an address register using OR
+       * so the caller can specify additional descriptor bits with the
+       * desc_imm immediate.
+       *
+       * Even though the instruction dispatcher always pulls the SFID from the
+       * instruction itself, the extended descriptor sent to the actual unit
+       * gets the SFID from the extended descriptor which comes from the
+       * address register.  If we don't OR it in, the external unit gets
+       * confused and hangs the GPU.
+       */
+      brw_OR(p, addr, ex_desc, brw_imm_ud(ex_desc_imm | sfid));
+
+      brw_pop_insn_state(p);
+      ex_desc = addr;
+   }
+
+   send = next_insn(p, BRW_OPCODE_SENDS);
+   brw_set_dest(p, send, dst);
+   brw_set_src0(p, send, retype(payload0, BRW_REGISTER_TYPE_UD));
+   brw_set_src1(p, send, retype(payload1, BRW_REGISTER_TYPE_UD));
+
+   if (desc.file == BRW_IMMEDIATE_VALUE) {
+      brw_inst_set_send_sel_reg32_desc(devinfo, send, 0);
+      brw_inst_set_send_desc(devinfo, send, desc.ud);
+   } else {
+      assert(desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
+      assert(desc.nr == BRW_ARF_ADDRESS);
+      assert(desc.subnr == 0);
+      brw_inst_set_send_sel_reg32_desc(devinfo, send, 1);
+   }
+
+   if (ex_desc.file == BRW_IMMEDIATE_VALUE) {
+      brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 0);
+      brw_inst_set_send_ex_desc(devinfo, send, ex_desc.ud);
+   } else {
+      assert(ex_desc.file == BRW_ARCHITECTURE_REGISTER_FILE);
+      assert(ex_desc.nr == BRW_ARF_ADDRESS);
+      assert((ex_desc.subnr & 0x3) == 0);
+      brw_inst_set_send_sel_reg32_ex_desc(devinfo, send, 1);
+      brw_inst_set_send_ex_desc_ia_subreg_nr(devinfo, send, ex_desc.subnr >> 2);
+   }
+
+   brw_inst_set_sfid(devinfo, send, sfid);
+}
+
 static void
 brw_send_indirect_surface_message(struct brw_codegen *p,
                                   unsigned sfid,
index 5e50599..358a034 100644 (file)
@@ -102,6 +102,18 @@ inst_is_send(const struct gen_device_info *devinfo, const brw_inst *inst)
    }
 }
 
+static bool
+inst_is_split_send(const struct gen_device_info *devinfo, const brw_inst *inst)
+{
+   switch (brw_inst_opcode(devinfo, inst)) {
+   case BRW_OPCODE_SENDS:
+   case BRW_OPCODE_SENDSC:
+      return true;
+   default:
+      return false;
+   }
+}
+
 static unsigned
 signed_type(unsigned type)
 {
@@ -248,6 +260,12 @@ sources_not_null(const struct gen_device_info *devinfo,
    if (num_sources == 3)
       return (struct string){};
 
+   /* Nothing to test.  Split sends can only encode a file in sources that are
+    * allowed to be NULL.
+    */
+   if (inst_is_split_send(devinfo, inst))
+      return (struct string){};
+
    if (num_sources >= 1)
       ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
 
@@ -263,8 +281,41 @@ send_restrictions(const struct gen_device_info *devinfo,
 {
    struct string error_msg = { .str = NULL, .len = 0 };
 
-   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
-       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) {
+   if (inst_is_split_send(devinfo, inst)) {
+      ERROR_IF(brw_inst_send_src1_reg_file(devinfo, inst) == BRW_ARCHITECTURE_REGISTER_FILE &&
+               brw_inst_send_src1_reg_nr(devinfo, inst) != BRW_ARF_NULL,
+               "src1 of split send must be a GRF or NULL");
+
+      ERROR_IF(brw_inst_eot(devinfo, inst) &&
+               brw_inst_src0_da_reg_nr(devinfo, inst) < 112,
+               "send with EOT must use g112-g127");
+      ERROR_IF(brw_inst_eot(devinfo, inst) &&
+               brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE &&
+               brw_inst_send_src1_reg_nr(devinfo, inst) < 112,
+               "send with EOT must use g112-g127");
+
+      if (brw_inst_send_src1_reg_file(devinfo, inst) == BRW_GENERAL_REGISTER_FILE) {
+         /* Assume minimums if we don't know */
+         unsigned mlen = 1;
+         if (!brw_inst_send_sel_reg32_desc(devinfo, inst)) {
+            const uint32_t desc = brw_inst_send_desc(devinfo, inst);
+            mlen = brw_message_desc_mlen(devinfo, desc);
+         }
+
+         unsigned ex_mlen = 1;
+         if (!brw_inst_send_sel_reg32_ex_desc(devinfo, inst)) {
+            const uint32_t ex_desc = brw_inst_send_ex_desc(devinfo, inst);
+            ex_mlen = brw_message_ex_desc_ex_mlen(devinfo, ex_desc);
+         }
+         const unsigned src0_reg_nr = brw_inst_src0_da_reg_nr(devinfo, inst);
+         const unsigned src1_reg_nr = brw_inst_send_src1_reg_nr(devinfo, inst);
+         ERROR_IF((src0_reg_nr <= src1_reg_nr &&
+                   src1_reg_nr < src0_reg_nr + mlen) ||
+                  (src1_reg_nr <= src0_reg_nr &&
+                   src0_reg_nr < src1_reg_nr + ex_mlen),
+                   "split send payloads must not overlap");
+      }
+   } else if (inst_is_send(devinfo, inst)) {
       ERROR_IF(brw_inst_src0_address_mode(devinfo, inst) != BRW_ADDRESS_DIRECT,
                "send must use direct addressing");
 
@@ -534,6 +585,12 @@ general_restrictions_on_region_parameters(const struct gen_device_info *devinfo,
    if (num_sources == 3)
       return (struct string){};
 
+   /* Split sends don't have the bits in the instruction to encode regions so
+    * there's nothing to check.
+    */
+   if (inst_is_split_send(devinfo, inst))
+      return (struct string){};
+
    if (brw_inst_access_mode(devinfo, inst) == BRW_ALIGN_16) {
       if (desc->ndst != 0 && !dst_is_null(devinfo, inst))
          ERROR_IF(brw_inst_dst_hstride(devinfo, inst) != BRW_HORIZONTAL_STRIDE_1,
@@ -1124,6 +1181,10 @@ special_requirements_for_handling_double_precision_data_types(
    if (num_sources == 3 || num_sources == 0)
       return (struct string){};
 
+   /* Split sends don't have types so there's no doubles there. */
+   if (inst_is_split_send(devinfo, inst))
+      return (struct string){};
+
    enum brw_reg_type exec_type = execution_type(devinfo, inst);
    unsigned exec_type_size = brw_reg_type_to_size(exec_type);
 
index f60325c..71316f1 100644 (file)
@@ -455,6 +455,19 @@ FJ(gen4_jump_count, 111,  96, devinfo->gen < 6)
 FC(gen4_pop_count,  115, 112, devinfo->gen < 6)
 /** @} */
 
+/**
+ * SEND instructions:
+ *  @{
+ */
+FC(send_ex_desc_ia_subreg_nr, 82, 80, devinfo->gen >= 9)
+FC(send_src0_address_mode,    79, 79, devinfo->gen >= 9)
+FC(send_sel_reg32_desc,       77, 77, devinfo->gen >= 9)
+FC(send_sel_reg32_ex_desc,    61, 61, devinfo->gen >= 9)
+FC(send_src1_reg_nr,          51, 44, devinfo->gen >= 9)
+FC(send_src1_reg_file,        36, 36, devinfo->gen >= 9)
+FC(send_dst_reg_file,         35, 35, devinfo->gen >= 9)
+/** @} */
+
 /* Message descriptor bits */
 #define MD(x) ((x) + 96)
 
@@ -513,11 +526,21 @@ brw_inst_set_send_ex_desc(const struct gen_device_info *devinfo,
                           brw_inst *inst, uint32_t value)
 {
    assert(devinfo->gen >= 9);
-   brw_inst_set_bits(inst, 94, 91, GET_BITS(value, 31, 28));
-   brw_inst_set_bits(inst, 88, 85, GET_BITS(value, 27, 24));
-   brw_inst_set_bits(inst, 83, 80, GET_BITS(value, 23, 20));
-   brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 19, 16));
-   assert(GET_BITS(value, 15, 0) == 0);
+   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) {
+      brw_inst_set_bits(inst, 94, 91, GET_BITS(value, 31, 28));
+      brw_inst_set_bits(inst, 88, 85, GET_BITS(value, 27, 24));
+      brw_inst_set_bits(inst, 83, 80, GET_BITS(value, 23, 20));
+      brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 19, 16));
+      assert(GET_BITS(value, 15, 0) == 0);
+   } else {
+      assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+             brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC);
+      brw_inst_set_bits(inst, 95, 80, GET_BITS(value, 31, 16));
+      assert(GET_BITS(value, 15, 10) == 0);
+      brw_inst_set_bits(inst, 67, 64, GET_BITS(value, 9, 6));
+      assert(GET_BITS(value, 5, 0) == 0);
+   }
 }
 
 /**
@@ -530,10 +553,18 @@ brw_inst_send_ex_desc(const struct gen_device_info *devinfo,
                       const brw_inst *inst)
 {
    assert(devinfo->gen >= 9);
-   return (brw_inst_bits(inst, 94, 91) << 28 |
-           brw_inst_bits(inst, 88, 85) << 24 |
-           brw_inst_bits(inst, 83, 80) << 20 |
-           brw_inst_bits(inst, 67, 64) << 16);
+   if (brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SEND ||
+       brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDC) {
+      return (brw_inst_bits(inst, 94, 91) << 28 |
+              brw_inst_bits(inst, 88, 85) << 24 |
+              brw_inst_bits(inst, 83, 80) << 20 |
+              brw_inst_bits(inst, 67, 64) << 16);
+   } else {
+      assert(brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDS ||
+             brw_inst_opcode(devinfo, inst) == BRW_OPCODE_SENDSC);
+      return (brw_inst_bits(inst, 95, 80) << 16 |
+              brw_inst_bits(inst, 67, 64) << 6);
+   }
 }
 
 /**
@@ -956,9 +987,11 @@ brw_inst_##reg##_ia16_addr_imm(const struct gen_device_info *devinfo,     \
  * Compared to Align1, these are missing the low 4 bits.
  *                     -Gen 4-  ----Gen8----
  */
-BRW_IA16_ADDR_IMM(src1, 105, 96, 121, 104, 100)
-BRW_IA16_ADDR_IMM(src0,  73, 64,  95,  72,  68)
-BRW_IA16_ADDR_IMM(dst,   57, 52,  47,  56,  52)
+BRW_IA16_ADDR_IMM(src1,       105, 96, 121, 104, 100)
+BRW_IA16_ADDR_IMM(src0,        73, 64,  95,  72,  68)
+BRW_IA16_ADDR_IMM(dst,         57, 52,  47,  56,  52)
+BRW_IA16_ADDR_IMM(send_src0,   -1, -1,  78,  72,  68)
+BRW_IA16_ADDR_IMM(send_dst,    -1, -1,  62,  56,  52)
 
 /**
  * Fetch a set of contiguous bits from the instruction.