intel/fs: Use split sends for surface writes on gen9+

author Jason Ekstrand <jason.ekstrand@intel.com>

Fri, 16 Nov 2018 16:46:27 +0000 (10:46 -0600)

committer Jason Ekstrand <jason@jlekstrand.net>

Tue, 29 Jan 2019 18:43:55 +0000 (18:43 +0000)
author Jason Ekstrand <jason.ekstrand@intel.com>
Fri, 16 Nov 2018 16:46:27 +0000 (10:46 -0600)
committer Jason Ekstrand <jason@jlekstrand.net>
Tue, 29 Jan 2019 18:43:55 +0000 (18:43 +0000)
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp

index d82572a..8dd3b94 100644 (file)
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -5016,28 +5016,42 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
      * Gen11+ the header has been removed so we can only use predication.
      */
     const unsigned header_sz = devinfo->gen < 9 && is_typed_access ? 1 : 0;
-   const unsigned sz = header_sz + addr_sz + src_sz;
-
-   /* Allocate space for the payload. */
-   fs_reg *const components = new fs_reg[sz];
-   const fs_reg payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
-   unsigned n = 0;
  
     const bool has_side_effects = inst->has_side_effects();
     fs_reg sample_mask = has_side_effects ? bld.sample_mask_reg() :
                                             fs_reg(brw_imm_d(0xffff));
  
-   /* Construct the payload. */
-   if (header_sz)
-      components[n++] = emit_surface_header(bld, sample_mask);
+   fs_reg payload, payload2;
+   unsigned mlen, ex_mlen = 0;
+   if (devinfo->gen >= 9) {
+      /* We have split sends on gen9 and above */
+      assert(header_sz == 0);
+      payload = bld.move_to_vgrf(addr, addr_sz);
+      payload2 = bld.move_to_vgrf(src, src_sz);
+      mlen = addr_sz * (inst->exec_size / 8);
+      ex_mlen = src_sz * (inst->exec_size / 8);
+   } else {
+      /* Allocate space for the payload. */
+      const unsigned sz = header_sz + addr_sz + src_sz;
+      payload = bld.vgrf(BRW_REGISTER_TYPE_UD, sz);
+      fs_reg *const components = new fs_reg[sz];
+      unsigned n = 0;
+
+      /* Construct the payload. */
+      if (header_sz)
+         components[n++] = emit_surface_header(bld, sample_mask);
+
+      for (unsigned i = 0; i < addr_sz; i++)
+         components[n++] = offset(addr, bld, i);
  
-   for (unsigned i = 0; i < addr_sz; i++)
-      components[n++] = offset(addr, bld, i);
+      for (unsigned i = 0; i < src_sz; i++)
+         components[n++] = offset(src, bld, i);
  
-   for (unsigned i = 0; i < src_sz; i++)
-      components[n++] = offset(src, bld, i);
+      bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
+      mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
  
-   bld.LOAD_PAYLOAD(payload, components, sz, header_sz);
+      delete[] components;
+   }
  
     /* Predicate the instruction on the sample mask if no header is
      * provided.
@@ -5162,7 +5176,8 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
  
     /* Update the original instruction. */
     inst->opcode = SHADER_OPCODE_SEND;
-   inst->mlen = header_sz + (addr_sz + src_sz) * inst->exec_size / 8;
+   inst->mlen = mlen;
+   inst->ex_mlen = ex_mlen;
     inst->header_size = header_sz;
     inst->send_has_side_effects = has_side_effects;
     inst->send_is_volatile = !has_side_effects;
@@ -5183,10 +5198,9 @@ lower_surface_logical_send(const fs_builder &bld, fs_inst *inst)
  
     /* Finally, the payload */
     inst->src[2] = payload;
+   inst->src[3] = payload2;
  
-   inst->resize_sources(3);
-
-   delete[] components;
+   inst->resize_sources(4);
  }
  
  static void
diff --git a/src/intel/compiler/brw_fs_builder.h b/src/intel/compiler/brw_fs_builder.h

index c50af4c..3fc6e5f 100644 (file)
--- a/src/intel/compiler/brw_fs_builder.h
+++ b/src/intel/compiler/brw_fs_builder.h
@@ -426,6 +426,21 @@ namespace brw {
           return src_reg(component(dst, 0));
        }
  
+      src_reg
+      move_to_vgrf(const src_reg &src, unsigned num_components) const
+      {
+         src_reg *const src_comps = new src_reg[num_components];
+         for (unsigned i = 0; i < num_components; i++)
+            src_comps[i] = offset(src, dispatch_width(), i);
+
+         const dst_reg dst = vgrf(src.type, num_components);
+         LOAD_PAYLOAD(dst, src_comps, num_components, 0);
+
+         delete[] src_comps;
+
+         return src_reg(dst);
+      }
+
        void
        emit_scan(enum opcode opcode, const dst_reg &tmp,
                  unsigned cluster_size, brw_conditional_mod mod) const
author	Jason Ekstrand <jason.ekstrand@intel.com>
	Fri, 16 Nov 2018 16:46:27 +0000 (10:46 -0600)
committer	Jason Ekstrand <jason@jlekstrand.net>
	Tue, 29 Jan 2019 18:43:55 +0000 (18:43 +0000)
src/intel/compiler/brw_fs.cpp		patch \| blob \| history
src/intel/compiler/brw_fs_builder.h		patch \| blob \| history