amd: Cleanup old GS intrinsics code.

author Timur Kristóf <timur.kristof@gmail.com>

Tue, 25 Apr 2023 16:40:18 +0000 (18:40 +0200)

committer Marge Bot <emma+marge@anholt.net>

Thu, 4 May 2023 19:08:59 +0000 (19:08 +0000)
author Timur Kristóf <timur.kristof@gmail.com>
Tue, 25 Apr 2023 16:40:18 +0000 (18:40 +0200)
committer Marge Bot <emma+marge@anholt.net>
Thu, 4 May 2023 19:08:59 +0000 (19:08 +0000)
diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py

index 9a23867..ea4065d 100644 (file)
--- a/src/amd/compiler/aco_builder_h.py
+++ b/src/amd/compiler/aco_builder_h.py
@@ -101,8 +101,8 @@ aco_ptr<Instruction> create_s_mov(Definition dst, Operand src);
  
  enum sendmsg {
     sendmsg_none = 0,
-   _sendmsg_gs = 2, /* gfx6 to gfx10.3 */
-   _sendmsg_gs_done = 3, /* gfx6 to gfx10.3 */
+   sendmsg_gs = 2, /* gfx6 to gfx10.3 */
+   sendmsg_gs_done = 3, /* gfx6 to gfx10.3 */
     sendmsg_hs_tessfactor = 2, /* gfx11+ */
     sendmsg_dealloc_vgprs = 3, /* gfx11+ */
     sendmsg_save_wave = 4, /* gfx8 to gfx10.3 */
@@ -127,20 +127,6 @@ enum sendmsg_rtn {
     sendmsg_rtn_mask = 0xff,
  };
  
-inline sendmsg
-sendmsg_gs(bool cut, bool emit, unsigned stream)
-{
-    assert(stream < 4);
-    return (sendmsg)((unsigned)_sendmsg_gs | (cut << 4) | (emit << 5) | (stream << 8));
-}
-
-inline sendmsg
-sendmsg_gs_done(bool cut, bool emit, unsigned stream)
-{
-    assert(stream < 4);
-    return (sendmsg)((unsigned)_sendmsg_gs_done | (cut << 4) | (emit << 5) | (stream << 8));
-}
-
  enum bperm_swiz {
     bperm_b1_sign = 8,
     bperm_b3_sign = 9,
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp

index db33b83..2c9b19e 100644 (file)
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -8998,20 +8998,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
  
        break;
     }
-   case nir_intrinsic_emit_vertex_with_counter: {
-      assert(ctx->stage.hw == HWStage::GS);
-      unsigned stream = nir_intrinsic_stream_id(instr);
-      bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1, sendmsg_gs(false, true, stream));
-      break;
-   }
-   case nir_intrinsic_end_primitive_with_counter: {
-      if (ctx->stage.hw != HWStage::NGG) {
-         unsigned stream = nir_intrinsic_stream_id(instr);
-         bld.sopp(aco_opcode::s_sendmsg, bld.m0(ctx->gs_wave_id), -1,
-                  sendmsg_gs(true, false, stream));
-      }
-      break;
-   }
     case nir_intrinsic_sendmsg_amd: {
        unsigned imm = nir_intrinsic_base(instr);
        Temp m0_content = bld.as_uniform(get_ssa_temp(ctx, instr->src[0].ssa));
@@ -9035,24 +9021,6 @@ visit_intrinsic(isel_context* ctx, nir_intrinsic_instr* instr)
        bld.copy(Definition(get_ssa_temp(ctx, &instr->dest.ssa)), lanecount_to_mask(ctx, src));
        break;
     }
-   case nir_intrinsic_alloc_vertices_and_primitives_amd: {
-      assert(ctx->stage.hw == HWStage::NGG);
-      Temp num_vertices = get_ssa_temp(ctx, instr->src[0].ssa);
-      Temp num_primitives = get_ssa_temp(ctx, instr->src[1].ssa);
-
-      /* Put the number of vertices and primitives into m0 for the GS_ALLOC_REQ */
-      Temp tmp =
-         bld.sop2(aco_opcode::s_lshl_b32, bld.def(s1), bld.def(s1, scc),
-                  num_primitives, Operand::c32(12u));
-      tmp = bld.sop2(aco_opcode::s_or_b32, bld.m0(bld.def(s1)), bld.def(s1, scc),
-                     tmp, num_vertices);
-
-      /* Request the SPI to allocate space for the primitives and vertices
-       * that will be exported by the threadgroup.
-       */
-      bld.sopp(aco_opcode::s_sendmsg, bld.m0(tmp), -1, sendmsg_gs_alloc_req);
-      break;
-   }
     case nir_intrinsic_gds_atomic_add_amd: {
        Temp store_val = get_ssa_temp(ctx, instr->src[0].ssa);
        Temp gds_addr = get_ssa_temp(ctx, instr->src[1].ssa);
@@ -11410,14 +11378,7 @@ select_program(Program* program, unsigned shader_count, struct nir_shader* const
              bld.barrier(aco_opcode::p_barrier,
                          memory_sync_info(storage_shared, semantic_acqrel, scope), scope);
           }
-
-         if (ctx.stage == vertex_geometry_gs || ctx.stage == tess_eval_geometry_gs) {
-            ctx.gs_wave_id = bld.pseudo(aco_opcode::p_extract, bld.def(s1, m0), bld.def(s1, scc),
-                                        get_arg(&ctx, args->merged_wave_info), Operand::c32(2u),
-                                        Operand::c32(8u), Operand::zero());
-         }
-      } else if (ctx.stage == geometry_gs)
-         ctx.gs_wave_id = get_arg(&ctx, args->gs_wave_id);
+      }
  
        visit_cf_list(&ctx, &func->body);
  
diff --git a/src/amd/compiler/aco_instruction_selection.h b/src/amd/compiler/aco_instruction_selection.h

index 65a32a9..771c608 100644 (file)
--- a/src/amd/compiler/aco_instruction_selection.h
+++ b/src/amd/compiler/aco_instruction_selection.h
@@ -91,9 +91,6 @@ struct isel_context {
  
     Temp arg_temps[AC_MAX_ARGS];
  
-   /* GS inputs */
-   Temp gs_wave_id;
-
     /* VS output information */
     bool export_clip_dists;
     unsigned num_clip_distances;
diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp

index d5e24b1..c069baa 100644 (file)
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@@ -334,18 +334,18 @@ print_instr_format_specific(enum amd_gfx_level gfx_level, const Instruction* ins
        }
        case aco_opcode::s_sendmsg: {
           unsigned id = imm & sendmsg_id_mask;
-         static_assert(_sendmsg_gs == sendmsg_hs_tessfactor);
-         static_assert(_sendmsg_gs_done == sendmsg_dealloc_vgprs);
+         static_assert(sendmsg_gs == sendmsg_hs_tessfactor);
+         static_assert(sendmsg_gs_done == sendmsg_dealloc_vgprs);
           switch (id) {
           case sendmsg_none: fprintf(output, " sendmsg(MSG_NONE)"); break;
-         case _sendmsg_gs:
+         case sendmsg_gs:
              if (gfx_level >= GFX11)
                 fprintf(output, " sendmsg(hs_tessfactor)");
              else
                 fprintf(output, " sendmsg(gs%s%s, %u)", imm & 0x10 ? ", cut" : "",
                         imm & 0x20 ? ", emit" : "", imm >> 8);
              break;
-         case _sendmsg_gs_done:
+         case sendmsg_gs_done:
              if (gfx_level >= GFX11)
                 fprintf(output, " sendmsg(dealloc_vgprs)");
              else
diff --git a/src/amd/compiler/aco_scheduler.cpp b/src/amd/compiler/aco_scheduler.cpp

index 69225ee..3531c05 100644 (file)
--- a/src/amd/compiler/aco_scheduler.cpp
+++ b/src/amd/compiler/aco_scheduler.cpp
@@ -424,7 +424,7 @@ bool
  is_done_sendmsg(amd_gfx_level gfx_level, const Instruction* instr)
  {
     if (gfx_level <= GFX10_3 && instr->opcode == aco_opcode::s_sendmsg)
-      return (instr->sopp().imm & sendmsg_id_mask) == _sendmsg_gs_done;
+      return (instr->sopp().imm & sendmsg_id_mask) == sendmsg_gs_done;
     return false;
  }
  
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c

index ce6a97e..55a178e 100644 (file)
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -3832,16 +3832,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
                                         nir_intrinsic_io_semantics(instr).high_16bits);
        break;
     }
-   case nir_intrinsic_emit_vertex_with_counter: {
-      unsigned stream = nir_intrinsic_stream_id(instr);
-      LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
-      ctx->abi->emit_vertex_with_counter(ctx->abi, stream, next_vertex, ctx->abi->outputs);
-      break;
-   }
-   case nir_intrinsic_end_primitive:
-   case nir_intrinsic_end_primitive_with_counter:
-      ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
-      break;
     case nir_intrinsic_sendmsg_amd: {
        unsigned imm = nir_intrinsic_base(instr);
        LLVMValueRef m0_content = get_src(ctx, instr->src[0]);
@@ -4122,22 +4112,6 @@ static bool visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *ins
     case nir_intrinsic_load_workgroup_num_input_primitives_amd:
        result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->gs_tg_info), 22, 9);
        break;
-   case nir_intrinsic_alloc_vertices_and_primitives_amd: {
-      /* The caller should only call this conditionally for wave 0.
-       *
-       * Send GS Alloc Req message from the first wave of the group to SPI.
-       * Message payload is:
-       * - bits 0..10: vertices in group
-       * - bits 12..22: primitives in group
-       */
-      LLVMValueRef vtx_cnt = get_src(ctx, instr->src[0]);
-      LLVMValueRef prim_cnt = get_src(ctx, instr->src[1]);
-      LLVMValueRef msg = LLVMBuildShl(ctx->ac.builder, prim_cnt,
-                                      LLVMConstInt(ctx->ac.i32, 12, false), "");
-      msg = LLVMBuildOr(ctx->ac.builder, msg, vtx_cnt, "");
-      ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_ALLOC_REQ, msg);
-      break;
-   }
     case nir_intrinsic_overwrite_vs_arguments_amd:
        ctx->abi->vertex_id_replaced = get_src(ctx, instr->src[0]);
        ctx->abi->instance_id_replaced = get_src(ctx, instr->src[1]);
diff --git a/src/amd/llvm/ac_shader_abi.h b/src/amd/llvm/ac_shader_abi.h

index ec37a00..a17ad6c 100644 (file)
--- a/src/amd/llvm/ac_shader_abi.h
+++ b/src/amd/llvm/ac_shader_abi.h
@@ -59,11 +59,6 @@ struct ac_shader_abi {
     /* Varying -> attribute number mapping. Also NIR-only */
     unsigned fs_input_attr_indices[MAX_VARYING];
  
-   void (*emit_primitive)(struct ac_shader_abi *abi, unsigned stream);
-
-   void (*emit_vertex_with_counter)(struct ac_shader_abi *abi, unsigned stream,
-                                    LLVMValueRef vertexidx, LLVMValueRef *addrs);
-
     LLVMValueRef (*load_tess_varyings)(struct ac_shader_abi *abi, LLVMTypeRef type,
                                        LLVMValueRef vertex_index, LLVMValueRef param_index,
                                        unsigned driver_location, unsigned component,
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c

index ffb4141..81b1931 100644 (file)
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -168,23 +168,6 @@ create_function(struct radv_shader_context *ctx, gl_shader_stage stage, bool has
     }
  }
  
-static void
-visit_emit_vertex_with_counter(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef vertexidx,
-                               LLVMValueRef *addrs)
-{
-   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-   ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
-                    ctx->gs_wave_id);
-}
-
-static void
-visit_end_primitive(struct ac_shader_abi *abi, unsigned stream)
-{
-   struct radv_shader_context *ctx = radv_shader_context_from_abi(abi);
-   ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8),
-                    ctx->gs_wave_id);
-}
-
  static LLVMValueRef
  radv_load_base_vertex(struct ac_shader_abi *abi, bool non_indexed_is_zero)
  {
@@ -438,11 +421,6 @@ ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
        ctx.shader = shaders[shader_idx];
        ctx.output_mask = 0;
  
-      if (shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && !ctx.shader_info->is_ngg) {
-         ctx.abi.emit_vertex_with_counter = visit_emit_vertex_with_counter;
-         ctx.abi.emit_primitive = visit_end_primitive;
-      }
-
        if (shader_idx && !(shaders[shader_idx]->info.stage == MESA_SHADER_GEOMETRY && info->is_ngg)) {
           /* Execute a barrier before the second shader in
            * a merged shader.
diff --git a/src/compiler/nir/nir_intrinsics.py b/src/compiler/nir/nir_intrinsics.py

index f7dc238..57645cb 100644 (file)
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -1478,8 +1478,6 @@ intrinsic("load_cull_any_enabled_amd", dest_comp=1, bit_sizes=[1], flags=[CAN_EL
  intrinsic("load_cull_small_prim_precision_amd", dest_comp=1, bit_sizes=[32], flags=[CAN_ELIMINATE, CAN_REORDER])
  # Initial edge flags in a Vertex Shader, packed into the format the HW needs for primitive export.
  intrinsic("load_initial_edgeflags_amd", src_comp=[], dest_comp=1, bit_sizes=[32], indices=[])
-# Allocates export space for vertices and primitives. src[] = {num_vertices, num_primitives}.
-intrinsic("alloc_vertices_and_primitives_amd", src_comp=[1, 1], indices=[])
  # Corresponds to s_sendmsg in the GCN/RDNA ISA, src[] = { m0_content }, BASE = imm
  intrinsic("sendmsg_amd", src_comp=[1], indices=[BASE])
  # Overwrites VS input registers, for use with vertex compaction after culling. src = {vertex_id, instance_id}.
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h

index f57cab7..94ab629 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -218,7 +218,6 @@ void si_llvm_es_build_end(struct si_shader_context *ctx);
  void si_preload_esgs_ring(struct si_shader_context *ctx);
  void si_preload_gs_rings(struct si_shader_context *ctx);
  void si_llvm_gs_build_end(struct si_shader_context *ctx);
-void si_llvm_init_gs_callbacks(struct si_shader_context *ctx);
  
  /* si_shader_llvm_tess.c */
  LLVMValueRef si_get_rel_patch_id(struct si_shader_context *ctx);
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm.c b/src/gallium/drivers/radeonsi/si_shader_llvm.c

index 9625aea..e3ee71d 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader_llvm.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm.c
@@ -808,8 +808,6 @@ static bool si_llvm_translate_nir(struct si_shader_context *ctx, struct si_shade
        break;
  
     case MESA_SHADER_GEOMETRY:
-      si_llvm_init_gs_callbacks(ctx);
-
        if (ctx->shader->key.ge.as_ngg) {
           LLVMTypeRef ai32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader));
           ctx->gs_ngg_scratch = (struct ac_llvm_pointer) {
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c

index 1b94a3e..58aaf94 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
@@ -88,45 +88,12 @@ void si_llvm_es_build_end(struct si_shader_context *ctx)
        si_set_es_return_value_for_gs(ctx);
  }
  
-static LLVMValueRef si_get_gs_wave_id(struct si_shader_context *ctx)
-{
-   if (ctx->screen->info.gfx_level >= GFX9)
-      return si_unpack_param(ctx, ctx->args->ac.merged_wave_info, 16, 8);
-   else
-      return ac_get_arg(&ctx->ac, ctx->args->ac.gs_wave_id);
-}
-
  void si_llvm_gs_build_end(struct si_shader_context *ctx)
  {
     if (ctx->screen->info.gfx_level >= GFX9)
        ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
  }
  
-/* Emit one vertex from the geometry shader */
-static void si_llvm_emit_vertex(struct ac_shader_abi *abi, unsigned stream,
-                                LLVMValueRef vertexidx, LLVMValueRef *addrs)
-{
-   struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-
-   assert(!ctx->shader->key.ge.as_ngg);
-
-   /* Signal vertex emission */
-   ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (stream << 8),
-                    si_get_gs_wave_id(ctx));
-}
-
-/* Cut one primitive from the geometry shader */
-static void si_llvm_emit_primitive(struct ac_shader_abi *abi, unsigned stream)
-{
-   struct si_shader_context *ctx = si_shader_context_from_abi(abi);
-
-   assert(!ctx->shader->key.ge.as_ngg);
-
-   /* Signal primitive cut */
-   ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8),
-                    si_get_gs_wave_id(ctx));
-}
-
  void si_preload_esgs_ring(struct si_shader_context *ctx)
  {
     LLVMBuilderRef builder = ctx->ac.builder;
@@ -247,9 +214,3 @@ void si_preload_gs_rings(struct si_shader_context *ctx)
        ctx->gsvs_ring[stream] = ring;
     }
  }
-
-void si_llvm_init_gs_callbacks(struct si_shader_context *ctx)
-{
-   ctx->abi.emit_vertex_with_counter = si_llvm_emit_vertex;
-   ctx->abi.emit_primitive = si_llvm_emit_primitive;
-}
author	Timur Kristóf <timur.kristof@gmail.com>
	Tue, 25 Apr 2023 16:40:18 +0000 (18:40 +0200)
committer	Marge Bot <emma+marge@anholt.net>
	Thu, 4 May 2023 19:08:59 +0000 (19:08 +0000)
src/amd/compiler/aco_builder_h.py		patch \| blob \| history
src/amd/compiler/aco_instruction_selection.cpp		patch \| blob \| history
src/amd/compiler/aco_instruction_selection.h		patch \| blob \| history
src/amd/compiler/aco_print_ir.cpp		patch \| blob \| history
src/amd/compiler/aco_scheduler.cpp		patch \| blob \| history
src/amd/llvm/ac_nir_to_llvm.c		patch \| blob \| history
src/amd/llvm/ac_shader_abi.h		patch \| blob \| history
src/amd/vulkan/radv_nir_to_llvm.c		patch \| blob \| history
src/compiler/nir/nir_intrinsics.py		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader_internal.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader_llvm.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader_llvm_gs.c		patch \| blob \| history