intel/fs: rework dss_id opcode into generic opcode
authorLionel Landwerlin <lionel.g.landwerlin@intel.com>
Fri, 18 Jun 2021 11:10:06 +0000 (14:10 +0300)
committerMarge Bot <emma+marge@anholt.net>
Tue, 8 Feb 2022 12:55:24 +0000 (12:55 +0000)
We'll want different types of IDs based on topology. Let's make this
more flexible and also move the bit shifting code a layer above where
it's easier to do bitshifting operations, especially if you need to
stash things into temporary registers.

v2: Keep previous comment.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Caio Oliveira <caio.oliveira@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13719>

src/intel/compiler/brw_eu_defines.h
src/intel/compiler/brw_fs_generator.cpp
src/intel/compiler/brw_fs_nir.cpp
src/intel/compiler/brw_ir_performance.cpp
src/intel/compiler/brw_shader.cpp

index 4d3c832..d7deb60 100644 (file)
@@ -806,10 +806,11 @@ enum opcode {
    TES_OPCODE_CREATE_INPUT_READ_HEADER,
    TES_OPCODE_ADD_INDIRECT_URB_OFFSET,
 
-   SHADER_OPCODE_GET_DSS_ID,
    SHADER_OPCODE_BTD_SPAWN_LOGICAL,
    SHADER_OPCODE_BTD_RETIRE_LOGICAL,
 
+   SHADER_OPCODE_READ_SR_REG,
+
    RT_OPCODE_TRACE_RAY_LOGICAL,
 };
 
index aab50a1..55f302a 100644 (file)
@@ -2561,32 +2561,21 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width,
          brw_float_controls_mode(p, src[0].d, src[1].d);
          break;
 
-      case SHADER_OPCODE_GET_DSS_ID:
-         /* The Slice, Dual-SubSlice, SubSlice, EU, and Thread IDs are all
-          * stored in sr0.0.  Normally, for reading from HW regs, we'd just do
-          * this in the IR and let the back-end generate some code but these
-          * live in the state register which tends to have special rules.
-          *
-          * For convenience, we combine Slice ID and Dual-SubSlice ID into a
-          * single ID.
-          */
-         if (devinfo->ver == 12) {
+      case SHADER_OPCODE_READ_SR_REG:
+         if (devinfo->ver >= 12) {
             /* There is a SWSB restriction that requires that any time sr0 is
              * accessed both the instruction doing the access and the next one
              * have SWSB set to RegDist(1).
              */
             if (brw_get_default_swsb(p).mode != TGL_SBID_NULL)
                brw_SYNC(p, TGL_SYNC_NOP);
+            assert(src[0].file == BRW_IMMEDIATE_VALUE);
             brw_set_default_swsb(p, tgl_swsb_regdist(1));
-            brw_SHR(p, dst, brw_sr0_reg(0), brw_imm_ud(9));
+            brw_MOV(p, dst, brw_sr0_reg(src[0].ud));
             brw_set_default_swsb(p, tgl_swsb_regdist(1));
-            brw_AND(p, dst, dst, brw_imm_ud(0x1f));
+            brw_AND(p, dst, dst, brw_imm_ud(0xffffffff));
          } else {
-            /* These move around basically every hardware generation, so don't
-             * do any >= checks and fail if the platform hasn't explicitly
-             * been enabled here.
-             */
-            unreachable("Unsupported platform");
+            brw_MOV(p, dst, brw_sr0_reg(src[0].ud));
          }
          break;
 
index 4050c57..96012ee 100644 (file)
@@ -5727,11 +5727,35 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
-   case nir_intrinsic_load_topology_id_intel:
-      assert(nir_intrinsic_base(instr) == BRW_TOPOLOGY_ID_DSS);
-      bld.emit(SHADER_OPCODE_GET_DSS_ID,
-               retype(dest, BRW_REGISTER_TYPE_UD));
+   case nir_intrinsic_load_topology_id_intel: {
+       /* These move around basically every hardware generation, so don'
+        * do any >= checks and fail if the platform hasn't explicitly
+        * been enabled here.
+        */
+      assert(devinfo->ver == 12);
+
+      /* Here is what the layout of SR0 looks like on Gfx12 :
+       *   [13:11] : Slice ID.
+       *   [10:9]  : Dual-SubSlice ID
+       *   [8]     : SubSlice ID
+       *   [7]     : EUID[2] (aka EU Row ID)
+       *   [6]     : Reserved
+       *   [5:4]   : EUID[1:0]
+       *   [2:0]   : Thread ID
+       */
+      fs_reg raw_id = bld.vgrf(BRW_REGISTER_TYPE_UD);
+      bld.emit(SHADER_OPCODE_READ_SR_REG, raw_id, brw_imm_ud(0));
+      switch (nir_intrinsic_base(instr)) {
+      case BRW_TOPOLOGY_ID_DSS:
+         bld.AND(raw_id, raw_id, brw_imm_ud(0x3fff));
+         /* Get rid of anything below dualsubslice */
+         bld.SHR(retype(dest, BRW_REGISTER_TYPE_UD), raw_id, brw_imm_ud(9));
+         break;
+      default:
+         unreachable("Invalid topology id type");
+      }
       break;
+   }
 
    case nir_intrinsic_load_btd_stack_id_intel:
       if (stage == MESA_SHADER_COMPUTE) {
index de5357b..a211889 100644 (file)
@@ -357,7 +357,7 @@ namespace {
       case TCS_OPCODE_SRC0_010_IS_ZERO:
       case TCS_OPCODE_GET_PRIMITIVE_ID:
       case TES_OPCODE_GET_PRIMITIVE_ID:
-      case SHADER_OPCODE_GET_DSS_ID:
+      case SHADER_OPCODE_READ_SR_REG:
          if (devinfo->ver >= 11) {
             return calculate_desc(info, EU_UNIT_FPU, 0, 2, 0, 0, 2,
                                   0, 10, 6 /* XXX */, 14, 0, 0);
index c278b38..c41152f 100644 (file)
@@ -557,12 +557,12 @@ brw_instruction_name(const struct intel_device_info *devinfo, enum opcode op)
       return "rnd_mode";
    case SHADER_OPCODE_FLOAT_CONTROL_MODE:
       return "float_control_mode";
-   case SHADER_OPCODE_GET_DSS_ID:
-      return "get_dss_id";
    case SHADER_OPCODE_BTD_SPAWN_LOGICAL:
       return "btd_spawn_logical";
    case SHADER_OPCODE_BTD_RETIRE_LOGICAL:
       return "btd_retire_logical";
+   case SHADER_OPCODE_READ_SR_REG:
+      return "read_sr_reg";
    }
 
    unreachable("not reached");