aco: Use ac_hw_stage instead of aco-specific HWStage.

author Timur Kristóf <timur.kristof@gmail.com>

Sat, 13 May 2023 15:55:54 +0000 (17:55 +0200)

committer Marge Bot <emma+marge@anholt.net>

Fri, 23 Jun 2023 12:49:04 +0000 (12:49 +0000)
author Timur Kristóf <timur.kristof@gmail.com>
Sat, 13 May 2023 15:55:54 +0000 (17:55 +0200)
committer Marge Bot <emma+marge@anholt.net>
Fri, 23 Jun 2023 12:49:04 +0000 (12:49 +0000)
diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp

index 300ebdf..45065b1 100644 (file)
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -994,7 +994,8 @@ fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
        while (it != block.instructions.rend()) {
           if ((*it)->isEXP()) {
              Export_instruction& exp = (*it)->exp();
-            if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG) {
+            if (program->stage.hw == AC_HW_VERTEX_SHADER ||
+                program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
                 if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= (V_008DFC_SQ_EXP_POS + 3)) {
                    exp.done = true;
                    exported = true;
@@ -1014,19 +1015,19 @@ fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
              /* Do not abort if the main FS has an epilog because it only
               * exports MRTZ (if present) and the epilog exports colors.
               */
-            exported |= program->stage.hw == HWStage::FS && program->info.ps.has_epilog;
+            exported |= program->stage.hw == AC_HW_PIXEL_SHADER && program->info.ps.has_epilog;
           }
           ++it;
        }
     }
  
     /* GFX10+ FS may not export anything if no discard is used. */
-   bool may_skip_export = program->stage.hw == HWStage::FS && program->gfx_level >= GFX10;
+   bool may_skip_export = program->stage.hw == AC_HW_PIXEL_SHADER && program->gfx_level >= GFX10;
  
     if (!exported && !may_skip_export) {
        /* Abort in order to avoid a GPU hang. */
-      bool is_vertex_or_ngg =
-         (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::NGG);
+      bool is_vertex_or_ngg = (program->stage.hw == AC_HW_VERTEX_SHADER ||
+                               program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER);
        aco_err(program,
                "Missing export in %s shader:", is_vertex_or_ngg ? "vertex or NGG" : "fragment");
        aco_print_program(program, stderr);
@@ -1221,8 +1222,8 @@ emit_program(Program* program, std::vector<uint32_t>& code, std::vector<struct a
  {
     asm_context ctx(program, symbols);
  
-   if (program->stage.hw == HWStage::VS || program->stage.hw == HWStage::FS ||
-       program->stage.hw == HWStage::NGG)
+   if (program->stage.hw == AC_HW_VERTEX_SHADER || program->stage.hw == AC_HW_PIXEL_SHADER ||
+       program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER)
        fix_exports(ctx, code, program);
  
     for (Block& block : program->blocks) {
diff --git a/src/amd/compiler/aco_insert_exec_mask.cpp b/src/amd/compiler/aco_insert_exec_mask.cpp

index 2008270..859a3cd 100644 (file)
--- a/src/amd/compiler/aco_insert_exec_mask.cpp
+++ b/src/amd/compiler/aco_insert_exec_mask.cpp
@@ -261,7 +261,8 @@ add_coupling_code(exec_ctx& ctx, Block* block, std::vector<aco_ptr<Instruction>>
        Operand start_exec(bld.lm);
  
        /* exec seems to need to be manually initialized with combined shaders */
-      if (ctx.program->stage.num_sw_stages() > 1 || ctx.program->stage.hw == HWStage::NGG) {
+      if (ctx.program->stage.num_sw_stages() > 1 ||
+          ctx.program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
           start_exec = Operand::c32_or_c64(-1u, bld.lm == s2);
           bld.copy(Definition(exec, bld.lm), start_exec);
        }
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp

index c3d3a97..9037480 100644 (file)
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -7239,10 +7239,11 @@ emit_scoped_barrier(isel_context* ctx, nir_intrinsic_instr* instr)
      * - when GS is used on GFX9+, VS->GS and TES->GS I/O is lowered to shared memory
      * - additionally, when NGG is used on GFX10+, shared memory is used for certain features
      */
-   bool shared_storage_used = ctx->stage.hw == HWStage::CS || ctx->stage.hw == HWStage::LS ||
-                              ctx->stage.hw == HWStage::HS ||
-                              (ctx->stage.hw == HWStage::GS && ctx->program->gfx_level >= GFX9) ||
-                              ctx->stage.hw == HWStage::NGG;
+   bool shared_storage_used =
+      ctx->stage.hw == AC_HW_COMPUTE_SHADER || ctx->stage.hw == AC_HW_LOCAL_SHADER ||
+      ctx->stage.hw == AC_HW_HULL_SHADER ||
+      (ctx->stage.hw == AC_HW_LEGACY_GEOMETRY_SHADER && ctx->program->gfx_level >= GFX9) ||
+      ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER;
  
     if (shared_storage_used)
        storage_allowed |= storage_shared;
@@ -7252,15 +7253,16 @@ emit_scoped_barrier(isel_context* ctx, nir_intrinsic_instr* instr)
        storage_allowed |= storage_task_payload;
  
     /* Allow VMEM output for all stages that can have outputs. */
-   if ((ctx->stage.hw != HWStage::CS && ctx->stage.hw != HWStage::FS) ||
+   if ((ctx->stage.hw != AC_HW_COMPUTE_SHADER && ctx->stage.hw != AC_HW_PIXEL_SHADER) ||
         ctx->stage.has(SWStage::TS))
        storage_allowed |= storage_vmem_output;
  
     /* Workgroup barriers can hang merged shaders that can potentially have 0 threads in either half.
      * They are allowed in CS, TCS, and in any NGG shader.
      */
-   ASSERTED bool workgroup_scope_allowed =
-      ctx->stage.hw == HWStage::CS || ctx->stage.hw == HWStage::HS || ctx->stage.hw == HWStage::NGG;
+   ASSERTED bool workgroup_scope_allowed = ctx->stage.hw == AC_HW_COMPUTE_SHADER ||
+                                           ctx->stage.hw == AC_HW_HULL_SHADER ||
+                                           ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER;
  
     unsigned nir_storage = nir_intrinsic_memory_modes(instr);
     unsigned storage = aco_storage_mode_from_nir_mem_mode(nir_storage);
@@ -7510,7 +7512,7 @@ get_scratch_resource(isel_context* ctx)
        Temp addr_hi =
           bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
        scratch_addr = bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
-   } else if (ctx->stage.hw != HWStage::CS) {
+   } else if (ctx->stage.hw != AC_HW_COMPUTE_SHADER) {
        scratch_addr =
           bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), scratch_addr, Operand::zero());
     }
@@ -8284,7 +8286,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
     }
     case nir_intrinsic_load_workgroup_id: {
        Temp dst = get_ssa_temp(ctx, &instr->dest.ssa);
-      if (ctx->stage.hw == HWStage::CS) {
+      if (ctx->stage.hw == AC_HW_COMPUTE_SHADER) {
           const struct ac_arg* ids = ctx->args->workgroup_ids;
           bld.pseudo(aco_opcode::p_create_vector, Definition(dst),
                      ids[0].used ? Operand(get_arg(ctx, ids[0])) : Operand::zero(),
@@ -8297,7 +8299,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
        break;
     }
     case nir_intrinsic_load_local_invocation_index: {
-      if (ctx->stage.hw == HWStage::LS || ctx->stage.hw == HWStage::HS) {
+      if (ctx->stage.hw == AC_HW_LOCAL_SHADER || ctx->stage.hw == AC_HW_HULL_SHADER) {
           if (ctx->options->gfx_level >= GFX11) {
              /* On GFX11, RelAutoIndex is WaveID * WaveSize + ThreadID. */
              Temp wave_id =
@@ -8312,7 +8314,8 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
                       get_arg(ctx, ctx->args->vs_rel_patch_id));
           }
           break;
-      } else if (ctx->stage.hw == HWStage::GS || ctx->stage.hw == HWStage::NGG) {
+      } else if (ctx->stage.hw == AC_HW_LEGACY_GEOMETRY_SHADER ||
+                 ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
           bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), thread_id_in_threadgroup(ctx));
           break;
        } else if (ctx->program->workgroup_size <= ctx->program->wave_size) {
@@ -8343,11 +8346,11 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
        break;
     }
     case nir_intrinsic_load_subgroup_id: {
-      if (ctx->stage.hw == HWStage::CS) {
+      if (ctx->stage.hw == AC_HW_COMPUTE_SHADER) {
           bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
                    bld.def(s1, scc), get_arg(ctx, ctx->args->tg_size),
                    Operand::c32(0x6u | (0x6u << 16)));
-      } else if (ctx->stage.hw == HWStage::NGG) {
+      } else if (ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) {
           /* Get the id of the current wave within the threadgroup (workgroup) */
           bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
                    bld.def(s1, scc), get_arg(ctx, ctx->args->merged_wave_info),
@@ -8362,10 +8365,10 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
        break;
     }
     case nir_intrinsic_load_num_subgroups: {
-      if (ctx->stage.hw == HWStage::CS)
+      if (ctx->stage.hw == AC_HW_COMPUTE_SHADER)
           bld.sop2(aco_opcode::s_and_b32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
                    bld.def(s1, scc), Operand::c32(0x3fu), get_arg(ctx, ctx->args->tg_size));
-      else if (ctx->stage.hw == HWStage::NGG)
+      else if (ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER)
           bld.sop2(aco_opcode::s_bfe_u32, Definition(get_ssa_temp(ctx, &instr->dest.ssa)),
                    bld.def(s1, scc), get_arg(ctx, ctx->args->merged_wave_info),
                    Operand::c32(28u | (4u << 16)));
@@ -8922,7 +8925,7 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
           bld.copy(Definition(dst), get_arg(ctx, ctx->args->tes_patch_id));
           break;
        default:
-         if (ctx->stage.hw == HWStage::NGG && !ctx->stage.has(SWStage::GS)) {
+         if (ctx->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER && !ctx->stage.has(SWStage::GS)) {
              /* In case of NGG, the GS threads always have the primitive ID
               * even if there is no SW GS. */
              bld.copy(Definition(dst), get_arg(ctx, ctx->args->gs_prim_id));
@@ -11249,7 +11252,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
        return select_program_rt(ctx, shader_count, shaders, args);
  
     if_context ic_merged_wave_info;
-   bool ngg_gs = ctx.stage.hw == HWStage::NGG && ctx.stage.has(SWStage::GS);
+   bool ngg_gs = ctx.stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER && ctx.stage.has(SWStage::GS);
  
     for (unsigned i = 0; i < shader_count; i++) {
        nir_shader* nir = shaders[i];
@@ -11286,7 +11289,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
        bool endif_merged_wave_info =
           ctx.tcs_in_out_eq ? i == 1 : (check_merged_wave_info && !(ngg_gs && i == 1));
  
-      if (program->gfx_level == GFX10 && program->stage.hw == HWStage::NGG &&
+      if (program->gfx_level == GFX10 && program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER &&
            program->stage.num_sw_stages() == 1) {
           /* Workaround for Navi1x HW bug to ensure that all NGG waves launch before
            * s_sendmsg(GS_ALLOC_REQ). */
diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp

index 0108b2c..a707bc3 100644 (file)
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -691,45 +691,45 @@ setup_isel_context(Program* program, unsigned shader_count, struct nir_shader* c
  
     bool gfx9_plus = options->gfx_level >= GFX9;
     bool ngg = info->is_ngg && options->gfx_level >= GFX10;
-   HWStage hw_stage{};
+   ac_hw_stage hw_stage;
     if (sw_stage == SWStage::VS && info->vs.as_es && !ngg)
-      hw_stage = HWStage::ES;
+      hw_stage = AC_HW_EXPORT_SHADER;
     else if (sw_stage == SWStage::VS && !info->vs.as_ls && !ngg)
-      hw_stage = HWStage::VS;
+      hw_stage = AC_HW_VERTEX_SHADER;
     else if (sw_stage == SWStage::VS && ngg)
-      hw_stage = HWStage::NGG; /* GFX10/NGG: VS without GS uses the HW GS stage */
+      hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
     else if (sw_stage == SWStage::GS)
-      hw_stage = HWStage::GS;
+      hw_stage = AC_HW_LEGACY_GEOMETRY_SHADER;
     else if (sw_stage == SWStage::FS)
-      hw_stage = HWStage::FS;
+      hw_stage = AC_HW_PIXEL_SHADER;
     else if (sw_stage == SWStage::CS)
-      hw_stage = HWStage::CS;
+      hw_stage = AC_HW_COMPUTE_SHADER;
     else if (sw_stage == SWStage::TS)
-      hw_stage = HWStage::CS; /* Task shaders are implemented with compute shaders. */
+      hw_stage = AC_HW_COMPUTE_SHADER;
     else if (sw_stage == SWStage::MS)
-      hw_stage = HWStage::NGG; /* Mesh shaders only work on NGG and on GFX10.3+. */
+      hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
     else if (sw_stage == SWStage::VS_GS && gfx9_plus && !ngg)
-      hw_stage = HWStage::GS; /* GFX6-9: VS+GS merged into a GS (and GFX10/legacy) */
+      hw_stage = AC_HW_LEGACY_GEOMETRY_SHADER;
     else if (sw_stage == SWStage::VS_GS && ngg)
-      hw_stage = HWStage::NGG; /* GFX10+: VS+GS merged into an NGG GS */
+      hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
     else if (sw_stage == SWStage::VS && info->vs.as_ls)
-      hw_stage = HWStage::LS; /* GFX6-8: VS is a Local Shader, when tessellation is used */
+      hw_stage = AC_HW_LOCAL_SHADER;
     else if (sw_stage == SWStage::TCS)
-      hw_stage = HWStage::HS; /* GFX6-8: TCS is a Hull Shader */
+      hw_stage = AC_HW_HULL_SHADER;
     else if (sw_stage == SWStage::VS_TCS)
-      hw_stage = HWStage::HS; /* GFX9-10: VS+TCS merged into a Hull Shader */
+      hw_stage = AC_HW_HULL_SHADER;
     else if (sw_stage == SWStage::TES && !info->tes.as_es && !ngg)
-      hw_stage = HWStage::VS; /* GFX6-9: TES without GS uses the HW VS stage (and GFX10/legacy) */
+      hw_stage = AC_HW_VERTEX_SHADER;
     else if (sw_stage == SWStage::TES && !info->tes.as_es && ngg)
-      hw_stage = HWStage::NGG; /* GFX10/NGG: TES without GS */
+      hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
     else if (sw_stage == SWStage::TES && info->tes.as_es && !ngg)
-      hw_stage = HWStage::ES; /* GFX6-8: TES is an Export Shader */
+      hw_stage = AC_HW_EXPORT_SHADER;
     else if (sw_stage == SWStage::TES_GS && gfx9_plus && !ngg)
-      hw_stage = HWStage::GS; /* GFX9: TES+GS merged into a GS (and GFX10/legacy) */
+      hw_stage = AC_HW_LEGACY_GEOMETRY_SHADER;
     else if (sw_stage == SWStage::TES_GS && ngg)
-      hw_stage = HWStage::NGG; /* GFX10+: TES+GS merged into an NGG GS */
+      hw_stage = AC_HW_NEXT_GEN_GEOMETRY_SHADER;
     else if (sw_stage == SWStage::RT)
-      hw_stage = HWStage::CS; /* Raytracing shaders run as CS */
+      hw_stage = AC_HW_COMPUTE_SHADER;
     else
        unreachable("Shader stage not implemented");
  
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h

index 08a9a2a..e04fcae 100644 (file)
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -32,6 +32,7 @@
  #include "util/compiler.h"
  
  #include "ac_binary.h"
+#include "ac_shader_util.h"
  #include "amd_family.h"
  #include <algorithm>
  #include <bitset>
@@ -2004,31 +2005,13 @@ operator|(SWStage a, SWStage b)
  }
  
  /*
- * Shader stages as running on the AMD GPU.
- *
- * The relation between HWStages and SWStages is not a one-to-one mapping:
- * Some SWStages are merged by ACO to run on a single HWStage.
- * See README.md for details.
- */
-enum class HWStage : uint8_t {
-   VS,
-   ES, /* Export shader: pre-GS (VS or TES) on GFX6-8. Combined into GS on GFX9 (and GFX10/legacy). */
-   GS,  /* Geometry shader on GFX10/legacy and GFX6-9. */
-   NGG, /* Primitive shader, used to implement VS, TES, GS. */
-   LS,  /* Local shader: pre-TCS (VS) on GFX6-8. Combined into HS on GFX9 (and GFX10/legacy). */
-   HS,  /* Hull shader: TCS on GFX6-8. Merged VS and TCS on GFX9-10. */
-   FS,
-   CS,
-};
-
-/*
   * Set of SWStages to be merged into a single shader paired with the
   * HWStage it will run on.
   */
  struct Stage {
     constexpr Stage() = default;
  
-   explicit constexpr Stage(HWStage hw_, SWStage sw_) : sw(sw_), hw(hw_) {}
+   explicit constexpr Stage(ac_hw_stage hw_, SWStage sw_) : sw(sw_), hw(hw_) {}
  
     /* Check if the given SWStage is included */
     constexpr bool has(SWStage stage) const
@@ -2046,35 +2029,36 @@ struct Stage {
     SWStage sw = SWStage::None;
  
     /* Active hardware stage */
-   HWStage hw{};
+   ac_hw_stage hw{};
  };
  
  /* possible settings of Program::stage */
-static constexpr Stage vertex_vs(HWStage::VS, SWStage::VS);
-static constexpr Stage fragment_fs(HWStage::FS, SWStage::FS);
-static constexpr Stage compute_cs(HWStage::CS, SWStage::CS);
-static constexpr Stage tess_eval_vs(HWStage::VS, SWStage::TES);
+static constexpr Stage vertex_vs(AC_HW_VERTEX_SHADER, SWStage::VS);
+static constexpr Stage fragment_fs(AC_HW_PIXEL_SHADER, SWStage::FS);
+static constexpr Stage compute_cs(AC_HW_COMPUTE_SHADER, SWStage::CS);
+static constexpr Stage tess_eval_vs(AC_HW_VERTEX_SHADER, SWStage::TES);
  /* Mesh shading pipeline */
-static constexpr Stage task_cs(HWStage::CS, SWStage::TS);
-static constexpr Stage mesh_ngg(HWStage::NGG, SWStage::MS);
+static constexpr Stage task_cs(AC_HW_COMPUTE_SHADER, SWStage::TS);
+static constexpr Stage mesh_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::MS);
  /* GFX10/NGG */
-static constexpr Stage vertex_ngg(HWStage::NGG, SWStage::VS);
-static constexpr Stage vertex_geometry_ngg(HWStage::NGG, SWStage::VS_GS);
-static constexpr Stage tess_eval_ngg(HWStage::NGG, SWStage::TES);
-static constexpr Stage tess_eval_geometry_ngg(HWStage::NGG, SWStage::TES_GS);
+static constexpr Stage vertex_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::VS);
+static constexpr Stage vertex_geometry_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::VS_GS);
+static constexpr Stage tess_eval_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::TES);
+static constexpr Stage tess_eval_geometry_ngg(AC_HW_NEXT_GEN_GEOMETRY_SHADER, SWStage::TES_GS);
  /* GFX9 (and GFX10 if NGG isn't used) */
-static constexpr Stage vertex_geometry_gs(HWStage::GS, SWStage::VS_GS);
-static constexpr Stage vertex_tess_control_hs(HWStage::HS, SWStage::VS_TCS);
-static constexpr Stage tess_eval_geometry_gs(HWStage::GS, SWStage::TES_GS);
+static constexpr Stage vertex_geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::VS_GS);
+static constexpr Stage vertex_tess_control_hs(AC_HW_HULL_SHADER, SWStage::VS_TCS);
+static constexpr Stage tess_eval_geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::TES_GS);
  /* pre-GFX9 */
-static constexpr Stage vertex_ls(HWStage::LS, SWStage::VS); /* vertex before tessellation control */
-static constexpr Stage vertex_es(HWStage::ES, SWStage::VS); /* vertex before geometry */
-static constexpr Stage tess_control_hs(HWStage::HS, SWStage::TCS);
-static constexpr Stage tess_eval_es(HWStage::ES,
+static constexpr Stage vertex_ls(AC_HW_LOCAL_SHADER,
+                                 SWStage::VS); /* vertex before tessellation control */
+static constexpr Stage vertex_es(AC_HW_EXPORT_SHADER, SWStage::VS); /* vertex before geometry */
+static constexpr Stage tess_control_hs(AC_HW_HULL_SHADER, SWStage::TCS);
+static constexpr Stage tess_eval_es(AC_HW_EXPORT_SHADER,
                                      SWStage::TES); /* tessellation evaluation before geometry */
-static constexpr Stage geometry_gs(HWStage::GS, SWStage::GS);
+static constexpr Stage geometry_gs(AC_HW_LEGACY_GEOMETRY_SHADER, SWStage::GS);
  /* Raytracing */
-static constexpr Stage raytracing_cs(HWStage::CS, SWStage::RT);
+static constexpr Stage raytracing_cs(AC_HW_COMPUTE_SHADER, SWStage::RT);
  
  struct DeviceInfo {
     uint16_t lds_encoding_granule;
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp

index a971e45..8ddb2c7 100644 (file)
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -2595,7 +2595,7 @@ lower_to_hw_instr(Program* program)
                    bld.sop1(aco_opcode::p_load_symbol, Definition(reg.advance(4), s1),
                             Operand::c32(aco_symbol_scratch_addr_hi));
                    scratch_addr.setFixed(reg);
-               } else if (program->stage.hw != HWStage::CS) {
+               } else if (program->stage.hw != AC_HW_COMPUTE_SHADER) {
                    bld.smem(aco_opcode::s_load_dwordx2, instr->definitions[0], scratch_addr,
                             Operand::zero());
                    scratch_addr.setFixed(instr->definitions[0].physReg());
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp

index ad7dc02..c60b681 100644 (file)
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -2005,7 +2005,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
                 ctx.info[instr->operands[0].tempId()].instr->definitions[1].getTemp());
              break;
           } else if ((ctx.program->stage.num_sw_stages() > 1 ||
-                     ctx.program->stage.hw == HWStage::NGG) &&
+                     ctx.program->stage.hw == AC_HW_NEXT_GEN_GEOMETRY_SHADER) &&
                      instr->pass_flags == 1) {
              /* In case of merged shaders, pass_flags=1 means that all lanes are active (exec=-1), so
               * s_and is unnecessary. */
diff --git a/src/amd/compiler/aco_spill.cpp b/src/amd/compiler/aco_spill.cpp

index 1b64d60..93b7c97 100644 (file)
--- a/src/amd/compiler/aco_spill.cpp
+++ b/src/amd/compiler/aco_spill.cpp
@@ -1431,7 +1431,7 @@ load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset, Block& block,
           bld.sop1(aco_opcode::p_load_symbol, bld.def(s1), Operand::c32(aco_symbol_scratch_addr_hi));
        private_segment_buffer =
           bld.pseudo(aco_opcode::p_create_vector, bld.def(s2), addr_lo, addr_hi);
-   } else if (ctx.program->stage.hw != HWStage::CS) {
+   } else if (ctx.program->stage.hw != AC_HW_COMPUTE_SHADER) {
        private_segment_buffer =
           bld.smem(aco_opcode::s_load_dwordx2, bld.def(s2), private_segment_buffer, Operand::zero());
     }
author	Timur Kristóf <timur.kristof@gmail.com>
	Sat, 13 May 2023 15:55:54 +0000 (17:55 +0200)
committer	Marge Bot <emma+marge@anholt.net>
	Fri, 23 Jun 2023 12:49:04 +0000 (12:49 +0000)
src/amd/compiler/aco_assembler.cpp		patch \| blob \| history
src/amd/compiler/aco_insert_exec_mask.cpp		patch \| blob \| history
src/amd/compiler/aco_instruction_selection.cpp		patch \| blob \| history
src/amd/compiler/aco_instruction_selection_setup.cpp		patch \| blob \| history
src/amd/compiler/aco_ir.h		patch \| blob \| history
src/amd/compiler/aco_lower_to_hw_instr.cpp		patch \| blob \| history
src/amd/compiler/aco_optimizer.cpp		patch \| blob \| history
src/amd/compiler/aco_spill.cpp		patch \| blob \| history