aco: update sendmsg enum from LLVM

author Rhys Perry <pendingchaos02@gmail.com>

Thu, 21 Jul 2022 14:45:11 +0000 (15:45 +0100)

committer Marge Bot <emma+marge@anholt.net>

Fri, 30 Sep 2022 20:57:02 +0000 (20:57 +0000)
author Rhys Perry <pendingchaos02@gmail.com>
Thu, 21 Jul 2022 14:45:11 +0000 (15:45 +0100)
committer Marge Bot <emma+marge@anholt.net>
Fri, 30 Sep 2022 20:57:02 +0000 (20:57 +0000)
diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py

index db0c4e3..8445940 100644 (file)
--- a/src/amd/compiler/aco_builder_h.py
+++ b/src/amd/compiler/aco_builder_h.py
@@ -85,15 +85,26 @@ aco_ptr<Instruction> create_s_mov(Definition dst, Operand src);
  
  enum sendmsg {
     sendmsg_none = 0,
-   _sendmsg_gs = 2,
-   _sendmsg_gs_done = 3,
-   sendmsg_save_wave = 4,
-   sendmsg_stall_wave_gen = 5,
-   sendmsg_halt_waves = 6,
-   sendmsg_ordered_ps_done = 7,
-   sendmsg_early_prim_dealloc = 8,
-   sendmsg_gs_alloc_req = 9,
-   sendmsg_id_mask = 0xf,
+   _sendmsg_gs = 2, /* gfx6 to gfx10.3 */
+   _sendmsg_gs_done = 3, /* gfx6 to gfx10.3 */
+   sendmsg_hs_tessfactor = 2, /* gfx11+ */
+   sendmsg_dealloc_vgprs = 3, /* gfx11+ */
+   sendmsg_save_wave = 4, /* gfx8 to gfx10.3 */
+   sendmsg_stall_wave_gen = 5, /* gfx9+ */
+   sendmsg_halt_waves = 6, /* gfx9+ */
+   sendmsg_ordered_ps_done = 7, /* gfx9+ */
+   sendmsg_early_prim_dealloc = 8, /* gfx9 to gfx10 */
+   sendmsg_gs_alloc_req = 9, /* gfx9+ */
+   sendmsg_get_doorbell = 10, /* gfx9 to gfx10.3 */
+   sendmsg_get_ddid = 11, /* gfx10 to gfx10.3 */
+   sendmsg_rtn_get_doorbell = 128, /* gfx11+ */
+   sendmsg_rtn_get_ddid = 129, /* gfx11+ */
+   sendmsg_rtn_get_tma = 130, /* gfx11+ */
+   sendmsg_rtn_get_realtime = 131, /* gfx11+ */
+   sendmsg_rtn_save_wave = 132, /* gfx11+ */
+   sendmsg_rtn_get_tba = 133, /* gfx11+ */
+   sendmsg_id_mask_gfx6 = 0xf,
+   sendmsg_id_mask_gfx11 = 0xff,
  };
  
  inline sendmsg
diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp

index dee0f89..9b77a69 100644 (file)
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@@ -333,16 +333,25 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
           break;
        }
        case aco_opcode::s_sendmsg: {
-         unsigned id = imm & sendmsg_id_mask;
+         unsigned id =
+            gfx_level >= GFX11 ? (imm & sendmsg_id_mask_gfx11) : (imm & sendmsg_id_mask_gfx6);
+         static_assert(_sendmsg_gs == sendmsg_hs_tessfactor);
+         static_assert(_sendmsg_gs_done == sendmsg_dealloc_vgprs);
           switch (id) {
           case sendmsg_none: fprintf(output, " sendmsg(MSG_NONE)"); break;
           case _sendmsg_gs:
-            fprintf(output, " sendmsg(gs%s%s, %u)", imm & 0x10 ? ", cut" : "",
-                    imm & 0x20 ? ", emit" : "", imm >> 8);
+            if (gfx_level >= GFX11)
+               fprintf(output, " sendmsg(hs_tessfactor)");
+            else
+               fprintf(output, " sendmsg(gs%s%s, %u)", imm & 0x10 ? ", cut" : "",
+                       imm & 0x20 ? ", emit" : "", imm >> 8);
              break;
           case _sendmsg_gs_done:
-            fprintf(output, " sendmsg(gs_done%s%s, %u)", imm & 0x10 ? ", cut" : "",
-                    imm & 0x20 ? ", emit" : "", imm >> 8);
+            if (gfx_level >= GFX11)
+               fprintf(output, " sendmsg(dealloc_vgprs)");
+            else
+               fprintf(output, " sendmsg(gs_done%s%s, %u)", imm & 0x10 ? ", cut" : "",
+                       imm & 0x20 ? ", emit" : "", imm >> 8);
              break;
           case sendmsg_save_wave: fprintf(output, " sendmsg(save_wave)"); break;
           case sendmsg_stall_wave_gen: fprintf(output, " sendmsg(stall_wave_gen)"); break;
@@ -350,6 +359,15 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
           case sendmsg_ordered_ps_done: fprintf(output, " sendmsg(ordered_ps_done)"); break;
           case sendmsg_early_prim_dealloc: fprintf(output, " sendmsg(early_prim_dealloc)"); break;
           case sendmsg_gs_alloc_req: fprintf(output, " sendmsg(gs_alloc_req)"); break;
+         case sendmsg_get_doorbell: fprintf(output, " sendmsg(get_doorbell)"); break;
+         case sendmsg_get_ddid: fprintf(output, " sendmsg(get_ddid)"); break;
+         case sendmsg_rtn_get_doorbell: fprintf(output, " sendmsg(rtn_get_doorbell)"); break;
+         case sendmsg_rtn_get_ddid: fprintf(output, " sendmsg(rtn_get_ddid)"); break;
+         case sendmsg_rtn_get_tma: fprintf(output, " sendmsg(rtn_get_Tma)"); break;
+         case sendmsg_rtn_get_realtime: fprintf(output, " sendmsg(rtn_get_realtime)"); break;
+         case sendmsg_rtn_save_wave: fprintf(output, " sendmsg(rtn_save_wave)"); break;
+         case sendmsg_rtn_get_tba: fprintf(output, " sendmsg(rtn_get_Tba)"); break;
+         default: fprintf(output, " imm:%u", imm);
           }
           break;
        }
diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp

index 6cebcf9..88498ef 100644 (file)
--- a/src/amd/compiler/aco_scheduler.cpp
+++ b/src/amd/compiler/aco_scheduler.cpp
@@ -122,6 +122,7 @@ struct MoveState {
  };
  
  struct sched_ctx {
+   amd_gfx_level gfx_level;
     int16_t num_waves;
     int16_t last_SMEM_stall;
     int last_SMEM_dep_idx;
@@ -420,20 +421,10 @@ MoveState::upwards_skip(UpwardsCursor& cursor)
  }
  
  bool
-is_gs_or_done_sendmsg(const Instruction* instr)
+is_done_sendmsg(amd_gfx_level gfx_level, const Instruction* instr)
  {
-   if (instr->opcode == aco_opcode::s_sendmsg) {
-      uint16_t imm = instr->sopp().imm;
-      return (imm & sendmsg_id_mask) == _sendmsg_gs || (imm & sendmsg_id_mask) == _sendmsg_gs_done;
-   }
-   return false;
-}
-
-bool
-is_done_sendmsg(const Instruction* instr)
-{
-   if (instr->opcode == aco_opcode::s_sendmsg)
-      return (instr->sopp().imm & sendmsg_id_mask) == _sendmsg_gs_done;
+   if (gfx_level <= GFX10_3 && instr->opcode == aco_opcode::s_sendmsg)
+      return (instr->sopp().imm & sendmsg_id_mask_gfx6) == _sendmsg_gs_done;
     return false;
  }
  
@@ -464,6 +455,7 @@ struct memory_event_set {
  };
  
  struct hazard_query {
+   amd_gfx_level gfx_level;
     bool contains_spill;
     bool contains_sendmsg;
     bool uses_exec;
@@ -473,8 +465,9 @@ struct hazard_query {
  };
  
  void
-init_hazard_query(hazard_query* query)
+init_hazard_query(const sched_ctx& ctx, hazard_query* query)
  {
+   query->gfx_level = ctx.gfx_level;
     query->contains_spill = false;
     query->contains_sendmsg = false;
     query->uses_exec = false;
@@ -484,9 +477,10 @@ init_hazard_query(hazard_query* query)
  }
  
  void
-add_memory_event(memory_event_set* set, Instruction* instr, memory_sync_info* sync)
+add_memory_event(amd_gfx_level gfx_level, memory_event_set* set, Instruction* instr,
+                 memory_sync_info* sync)
  {
-   set->has_control_barrier |= is_done_sendmsg(instr);
+   set->has_control_barrier |= is_done_sendmsg(gfx_level, instr);
     if (instr->opcode == aco_opcode::p_barrier) {
        Pseudo_barrier_instruction& bar = instr->barrier();
        if (bar.sync.semantics & semantic_acquire)
@@ -524,7 +518,7 @@ add_to_hazard_query(hazard_query* query, Instruction* instr)
  
     memory_sync_info sync = get_sync_info_with_hack(instr);
  
-   add_memory_event(&query->mem_events, instr, &sync);
+   add_memory_event(query->gfx_level, &query->mem_events, instr, &sync);
  
     if (!(sync.semantics & semantic_can_reorder)) {
        unsigned storage = sync.storage;
@@ -580,7 +574,7 @@ perform_hazard_query(hazard_query* query, Instruction* instr, bool upwards)
     memory_event_set instr_set;
     memset(&instr_set, 0, sizeof(instr_set));
     memory_sync_info sync = get_sync_info_with_hack(instr);
-   add_memory_event(&instr_set, instr, &sync);
+   add_memory_event(query->gfx_level, &instr_set, instr, &sync);
  
     memory_event_set* first = &instr_set;
     memory_event_set* second = &query->mem_events;
@@ -655,7 +649,7 @@ schedule_SMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
  
     /* first, check if we have instructions before current to move down */
     hazard_query hq;
-   init_hazard_query(&hq);
+   init_hazard_query(ctx, &hq);
     add_to_hazard_query(&hq, current);
  
     DownwardsCursor cursor = ctx.mv.downwards_init(idx, false, false);
@@ -751,7 +745,7 @@ schedule_SMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
        if (is_dependency) {
           if (!found_dependency) {
              ctx.mv.upwards_update_insert_idx(up_cursor);
-            init_hazard_query(&hq);
+            init_hazard_query(ctx, &hq);
              found_dependency = true;
           }
        }
@@ -797,8 +791,8 @@ schedule_VMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
     /* first, check if we have instructions before current to move down */
     hazard_query indep_hq;
     hazard_query clause_hq;
-   init_hazard_query(&indep_hq);
-   init_hazard_query(&clause_hq);
+   init_hazard_query(ctx, &indep_hq);
+   init_hazard_query(ctx, &clause_hq);
     add_to_hazard_query(&indep_hq, current);
  
     DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, true);
@@ -923,7 +917,7 @@ schedule_VMEM(sched_ctx& ctx, Block* block, std::vector<RegisterDemand>& registe
        if (is_dependency) {
           if (!found_dependency) {
              ctx.mv.upwards_update_insert_idx(up_cursor);
-            init_hazard_query(&indep_hq);
+            init_hazard_query(ctx, &indep_hq);
              found_dependency = true;
           }
        } else if (is_vmem) {
@@ -967,7 +961,7 @@ schedule_position_export(sched_ctx& ctx, Block* block, std::vector<RegisterDeman
     DownwardsCursor cursor = ctx.mv.downwards_init(idx, true, false);
  
     hazard_query hq;
-   init_hazard_query(&hq);
+   init_hazard_query(ctx, &hq);
     add_to_hazard_query(&hq, current);
  
     for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int)idx - window_size;
@@ -1054,6 +1048,7 @@ schedule_program(Program* program, live& live_vars)
     demand.vgpr += program->config->num_shared_vgprs / 2;
  
     sched_ctx ctx;
+   ctx.gfx_level = program->gfx_level;
     ctx.mv.depends_on.resize(program->peekAllocationId());
     ctx.mv.RAR_dependencies.resize(program->peekAllocationId());
     ctx.mv.RAR_dependencies_clause.resize(program->peekAllocationId());
author	Rhys Perry <pendingchaos02@gmail.com>
	Thu, 21 Jul 2022 14:45:11 +0000 (15:45 +0100)
committer	Marge Bot <emma+marge@anholt.net>
	Fri, 30 Sep 2022 20:57:02 +0000 (20:57 +0000)
src/amd/compiler/aco_builder_h.py		patch \| blob \| history
src/amd/compiler/aco_print_ir.cpp		patch \| blob \| history
src/amd/compiler/aco_scheduler.cpp		patch \| blob \| history