radeonsi: change si_shader_output_values::vertex_stream to a bitmask

author Marek Olšák <marek.olsak@amd.com>

Mon, 13 Dec 2021 06:14:44 +0000 (01:14 -0500)

committer Marge Bot <emma+marge@anholt.net>

Wed, 5 Jan 2022 12:46:31 +0000 (12:46 +0000)
author Marek Olšák <marek.olsak@amd.com>
Mon, 13 Dec 2021 06:14:44 +0000 (01:14 -0500)
committer Marge Bot <emma+marge@anholt.net>
Wed, 5 Jan 2022 12:46:31 +0000 (12:46 +0000)
diff --git a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c

index 0dcb62d..6c56722 100644 (file)
--- a/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
+++ b/src/gallium/drivers/radeonsi/gfx10_shader_ngg.c
@@ -244,7 +244,7 @@ static void build_streamout_vertex(struct si_shader_context *ctx, LLVMValueRef *
        for (unsigned comp = 0; comp < 4; comp++) {
           tmp = ac_build_gep0(&ctx->ac, vertexptr, LLVMConstInt(ctx->ac.i32, 4 * reg + comp, false));
           out.values[comp] = LLVMBuildLoad(builder, tmp, "");
-         out.vertex_stream[comp] = (info->output_streams[reg] >> (2 * comp)) & 3;
+         out.vertex_streams = info->output_streams[reg];
        }
  
        si_llvm_streamout_store_output(ctx, so_buffer, offset, &so->output[i], &out);
@@ -1425,7 +1425,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
        outputs[i].semantic = info->output_semantic[i];
  
        for (unsigned j = 0; j < 4; j++) {
-         outputs[i].vertex_stream[j] = (info->output_streams[i] >> (2 * j)) & 3;
+         outputs[i].vertex_streams = info->output_streams[i];
  
           /* TODO: we may store more outputs than streamout needs,
            * but streamout performance isn't that important.
@@ -1613,6 +1613,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
  
        if (ctx->shader->key.ge.mono.u.vs_export_prim_id) {
           outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID;
+         outputs[i].vertex_streams = 0;
  
           if (ctx->stage == MESA_SHADER_VERTEX) {
              /* Wait for GS stores to finish. */
@@ -1629,8 +1630,6 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi)
           outputs[i].values[0] = ac_to_float(&ctx->ac, outputs[i].values[0]);
           for (unsigned j = 1; j < 4; j++)
              outputs[i].values[j] = LLVMGetUndef(ctx->ac.f32);
-
-         memset(outputs[i].vertex_stream, 0, sizeof(outputs[i].vertex_stream));
           i++;
        }
  
@@ -2148,7 +2147,7 @@ void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx)
              tmp = ngg_gs_get_emit_output_ptr(ctx, vertexptr, out_idx);
              tmp = LLVMBuildLoad(builder, tmp, "");
              outputs[i].values[j] = ac_to_float(&ctx->ac, tmp);
-            outputs[i].vertex_stream[j] = (info->output_streams[i] >> (2 * j)) & 3;
+            outputs[i].vertex_streams = info->output_streams[i];
           }
        }
  
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h

index fc923df..70f23d8 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -32,7 +32,7 @@ struct pipe_debug_callback;
  
  struct si_shader_output_values {
     LLVMValueRef values[4];
-   ubyte vertex_stream[4];
+   ubyte vertex_streams;
     ubyte semantic;
  };
  
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c

index 44ac3a5..5a980a5 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_gs.c
@@ -469,10 +469,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
     /* Fill in output information. */
     for (i = 0; i < gsinfo->num_outputs; ++i) {
        outputs[i].semantic = gsinfo->output_semantic[i];
-
-      for (int chan = 0; chan < 4; chan++) {
-         outputs[i].vertex_stream[chan] = (gsinfo->output_streams[i] >> (2 * chan)) & 3;
-      }
+      outputs[i].vertex_streams = gsinfo->output_streams[i];
     }
  
     LLVMBasicBlockRef end_bb;
@@ -500,7 +497,7 @@ struct si_shader *si_generate_gs_copy_shader(struct si_screen *sscreen,
        for (i = 0; i < gsinfo->num_outputs; ++i) {
           for (unsigned chan = 0; chan < 4; chan++) {
              if (!(gsinfo->output_usagemask[i] & (1 << chan)) ||
-                outputs[i].vertex_stream[chan] != stream) {
+                ((outputs[i].vertex_streams >> (chan * 2)) & 0x3) != stream) {
                 outputs[i].values[chan] = LLVMGetUndef(ctx.ac.f32);
                 continue;
              }
diff --git a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c

index 9b77c8b..e3a5a0e 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
+++ b/src/gallium/drivers/radeonsi/si_shader_llvm_vs.c
@@ -284,7 +284,7 @@ void si_llvm_streamout_store_output(struct si_shader_context *ctx, LLVMValueRef
  
     /* Load the output as int. */
     for (int j = 0; j < num_comps; j++) {
-      assert(stream_out->stream == shader_out->vertex_stream[start + j]);
+      assert(stream_out->stream == ((shader_out->vertex_streams >> ((start + j) * 2)) & 0x3));
  
        out[j] = ac_to_integer(&ctx->ac, shader_out->values[start + j]);
     }
@@ -452,8 +452,11 @@ static void si_prepare_param_exports(struct si_shader_context *ctx,
     for (unsigned i = 0; i < noutput; i++) {
        unsigned semantic = outputs[i].semantic;
  
-      if (outputs[i].vertex_stream[0] != 0 && outputs[i].vertex_stream[1] != 0 &&
-          outputs[i].vertex_stream[2] != 0 && outputs[i].vertex_stream[3] != 0)
+      /* Skip if no channel writes to stream 0. */
+      if (outputs[i].vertex_streams & 0x03 &&
+          outputs[i].vertex_streams & 0x0c &&
+          outputs[i].vertex_streams & 0x30 &&
+          outputs[i].vertex_streams & 0xc0)
           continue;
  
        switch (semantic) {
@@ -768,7 +771,7 @@ void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi)
  
        for (j = 0; j < 4; j++) {
           outputs[i].values[j] = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + j], "");
-         outputs[i].vertex_stream[j] = (info->output_streams[i] >> (2 * j)) & 3;
+         outputs[i].vertex_streams = info->output_streams[i];
        }
     }
  
@@ -778,11 +781,10 @@ void si_llvm_emit_vs_epilogue(struct ac_shader_abi *abi)
     /* Export PrimitiveID. */
     if (ctx->shader->key.ge.mono.u.vs_export_prim_id) {
        outputs[i].semantic = VARYING_SLOT_PRIMITIVE_ID;
+      outputs[i].vertex_streams = 0;
        outputs[i].values[0] = ac_to_float(&ctx->ac, si_get_primitive_id(ctx, 0));
        for (j = 1; j < 4; j++)
           outputs[i].values[j] = LLVMConstReal(ctx->ac.f32, 0);
-
-      memset(outputs[i].vertex_stream, 0, sizeof(outputs[i].vertex_stream));
        i++;
     }
author	Marek Olšák <marek.olsak@amd.com>
	Mon, 13 Dec 2021 06:14:44 +0000 (01:14 -0500)
committer	Marge Bot <emma+marge@anholt.net>
	Wed, 5 Jan 2022 12:46:31 +0000 (12:46 +0000)
src/gallium/drivers/radeonsi/gfx10_shader_ngg.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader_internal.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader_llvm_gs.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader_llvm_vs.c		patch \| blob \| history