intel/fs: Lower Byte scattered r/w messages to LSC when available
authorSagar Ghuge <sagar.ghuge@intel.com>
Fri, 30 Apr 2021 06:58:26 +0000 (23:58 -0700)
committerMarge Bot <eric+marge@anholt.net>
Wed, 30 Jun 2021 16:17:18 +0000 (16:17 +0000)
v2 (Jason Ekstrand):
 - Squash in brw_scheduler changes
 - Update brw_ir_performance

Co-authored-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
Reviewed-by: Sagar Ghuge <sagar.ghuge@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11600>

src/intel/compiler/brw_fs.cpp
src/intel/compiler/brw_ir_performance.cpp
src/intel/compiler/brw_schedule_instructions.cpp

index da894a8..dff3a81 100644 (file)
@@ -5893,6 +5893,19 @@ brw_atomic_op_to_lsc_fatomic_op(uint32_t aop)
    }
 }
 
+static enum lsc_data_size
+lsc_bits_to_data_size(unsigned bit_size)
+{
+   switch (bit_size / 8) {
+   case 1:  return LSC_DATA_SIZE_D8U32;
+   case 2:  return LSC_DATA_SIZE_D16U32;
+   case 4:  return LSC_DATA_SIZE_D32;
+   case 8:  return LSC_DATA_SIZE_D64;
+   default:
+      unreachable("Unsupported data size.");
+   }
+}
+
 static void
 lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
 {
@@ -5986,6 +5999,26 @@ lower_lsc_surface_logical_send(const fs_builder &bld, fs_inst *inst)
                                 !inst->dst.is_null());
       break;
    }
+   case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
+      inst->desc = lsc_msg_desc(devinfo, LSC_OP_LOAD, inst->exec_size,
+                                surf_type, LSC_ADDR_SIZE_A32,
+                                1 /* num_coordinates */,
+                                lsc_bits_to_data_size(arg.ud),
+                                1 /* num_channels */,
+                                false /* transpose */,
+                                LSC_CACHE_LOAD_L1STATE_L3MOCS,
+                                true /* has_dest */);
+      break;
+   case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
+      inst->desc = lsc_msg_desc(devinfo, LSC_OP_STORE, inst->exec_size,
+                                surf_type, LSC_ADDR_SIZE_A32,
+                                1 /* num_coordinates */,
+                                lsc_bits_to_data_size(arg.ud),
+                                1 /* num_channels */,
+                                false /* transpose */,
+                                LSC_CACHE_STORE_L1STATE_L3MOCS,
+                                false /* has_dest */);
+      break;
    default:
       unreachable("Unknown surface logical instruction");
    }
@@ -6603,12 +6636,12 @@ fs_visitor::lower_logical_sends()
       case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
       case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
       case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+      case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
+      case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
          if (devinfo->has_lsc) {
             lower_lsc_surface_logical_send(ibld, inst);
             break;
          }
-      case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
-      case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
       case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
       case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
       case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
index 9ae0932..d4a0a87 100644 (file)
@@ -1097,6 +1097,8 @@ namespace {
          case GFX12_SFID_TGM:
          case GFX12_SFID_SLM:
             switch (lsc_msg_desc_opcode(devinfo, info.desc)) {
+            case LSC_OP_LOAD:
+            case LSC_OP_STORE:
             case LSC_OP_LOAD_CMASK:
             case LSC_OP_STORE_CMASK:
                return calculate_desc(info, unit_dp_dc, 2, 0, 0,
index b7cd4d8..797c209 100644 (file)
@@ -532,6 +532,8 @@ schedule_node::set_latency_gfx7(bool is_haswell)
       case GFX12_SFID_TGM:
       case GFX12_SFID_SLM:
          switch (lsc_msg_desc_opcode(devinfo, inst->desc)) {
+         case LSC_OP_LOAD:
+         case LSC_OP_STORE:
          case LSC_OP_LOAD_CMASK:
          case LSC_OP_STORE_CMASK:
             latency = 300;