zink: implement passthrough tcs shader injection
authorMike Blumenkrantz <michael.blumenkrantz@gmail.com>
Sun, 19 Jul 2020 13:12:40 +0000 (09:12 -0400)
committerMarge Bot <eric+marge@anholt.net>
Tue, 22 Dec 2020 13:46:38 +0000 (13:46 +0000)
GL allows the pipeline to "infer" a tcs shader if a tes shader is bound using
API-specified default values for gl_TessLevelOuter and gl_TessLevelInner,
but VK requires that both shaders be explicitly present

to handle this, create a generic tcs which translates all vs outputs to
invocation-based arrays and copy the appropriate value to the expected tes
input array location. also emit the default inner/outer values as push constants
so we don't have to recompile the shaders whenever the api calls occur

Reviewed-by: Dave Airlie <airlied@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8152>

src/gallium/drivers/zink/nir_to_spirv/nir_to_spirv.c
src/gallium/drivers/zink/zink_compiler.c
src/gallium/drivers/zink/zink_compiler.h
src/gallium/drivers/zink/zink_context.h
src/gallium/drivers/zink/zink_draw.c
src/gallium/drivers/zink/zink_program.c
src/gallium/drivers/zink/zink_shader_keys.h

index 73a6e8d..f52bd3c 100644 (file)
@@ -72,7 +72,8 @@ struct ntv_context {
    SpvId front_face_var, instance_id_var, vertex_id_var,
          primitive_id_var, invocation_id_var, // geometry
          sample_mask_type, sample_id_var, sample_pos_var,
-         tess_patch_vertices_in, tess_coord_var; // tess
+         tess_patch_vertices_in, tess_coord_var, // tess
+         push_const_var;
 };
 
 static SpvId
@@ -320,19 +321,28 @@ handle_handle_slot(struct ntv_context *ctx, struct nir_variable *var)
    return handle_slot(ctx, var->data.location);
 }
 
-static void
-emit_input(struct ntv_context *ctx, struct nir_variable *var)
+static SpvId
+input_var_init(struct ntv_context *ctx, struct nir_variable *var)
 {
    SpvId var_type = get_glsl_type(ctx, var->type);
+   SpvStorageClass sc = get_storage_class(var);
+   if (sc == SpvStorageClassPushConstant)
+      spirv_builder_emit_decoration(&ctx->builder, var_type, SpvDecorationBlock);
    SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
-                                                   SpvStorageClassInput,
-                                                   var_type);
-   SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type,
-                                         SpvStorageClassInput);
+                                                   sc, var_type);
+   SpvId var_id = spirv_builder_emit_var(&ctx->builder, pointer_type, sc);
 
    if (var->name)
       spirv_builder_emit_name(&ctx->builder, var_id, var->name);
+   if (var->data.mode == nir_var_mem_push_const)
+      ctx->push_const_var = var_id;
+   return var_id;
+}
 
+static void
+emit_input(struct ntv_context *ctx, struct nir_variable *var)
+{
+   SpvId var_id = input_var_init(ctx, var);
    unsigned slot = var->data.location;
    if (ctx->stage == MESA_SHADER_VERTEX)
       spirv_builder_emit_location(&ctx->builder, var_id,
@@ -1768,6 +1778,81 @@ emit_store_deref(struct ntv_context *ctx, nir_intrinsic_instr *intr)
    spirv_builder_emit_store(&ctx->builder, ptr, result);
 }
 
+/* FIXME: this is currently VERY specific to injected TCS usage */
+static void
+emit_load_push_const(struct ntv_context *ctx, nir_intrinsic_instr *intr)
+{
+   unsigned bit_size = nir_dest_bit_size(intr->dest);
+   SpvId uint_type = get_uvec_type(ctx, 32, 1);
+   SpvId load_type = get_uvec_type(ctx, 32, 1);
+
+   /* number of components being loaded */
+   unsigned num_components = nir_dest_num_components(intr->dest);
+   /* we need to grab 2x32 to fill the 64bit value */
+   if (bit_size == 64)
+      num_components *= 2;
+   SpvId constituents[num_components];
+   SpvId result;
+
+   /* destination type for the load */
+   SpvId type = get_dest_uvec_type(ctx, &intr->dest);
+   /* an id of an array member in bytes */
+   SpvId uint_size = emit_uint_const(ctx, 32, sizeof(uint32_t));
+   SpvId one = emit_uint_const(ctx, 32, 1);
+
+   /* we grab a single array member at a time, so it's a pointer to a uint */
+   SpvId pointer_type = spirv_builder_type_pointer(&ctx->builder,
+                                                   SpvStorageClassPushConstant,
+                                                   load_type);
+
+   SpvId member = emit_uint_const(ctx, 32, 0);
+   /* this is the offset (in bytes) that we're accessing:
+    * it may be a const value or it may be dynamic in the shader
+    */
+   SpvId offset = get_src(ctx, &intr->src[0]);
+   offset = emit_binop(ctx, SpvOpUDiv, uint_type, offset, uint_size);
+   /* OpAccessChain takes an array of indices that drill into a hierarchy based on the type:
+    * index 0 is accessing 'base'
+    * index 1 is accessing 'base[index 1]'
+    *
+    */
+   for (unsigned i = 0; i < num_components; i++) {
+      SpvId indices[2] = { member, offset };
+      SpvId ptr = spirv_builder_emit_access_chain(&ctx->builder, pointer_type,
+                                                  ctx->push_const_var, indices,
+                                                  ARRAY_SIZE(indices));
+      /* load a single value into the constituents array */
+      constituents[i] = spirv_builder_emit_load(&ctx->builder, load_type, ptr);
+      /* increment to the next vec4 member index for the next load */
+      offset = emit_binop(ctx, SpvOpIAdd, uint_type, offset, one);
+   }
+
+   /* if we're loading a 64bit value, we have to reassemble all the u32 values we've loaded into u64 values
+    * by creating uvec2 composites and bitcasting them to u64 values
+    */
+   if (bit_size == 64) {
+      num_components /= 2;
+      type = get_uvec_type(ctx, 64, num_components);
+      SpvId u64_type = get_uvec_type(ctx, 64, 1);
+      for (unsigned i = 0; i < num_components; i++) {
+         constituents[i] = spirv_builder_emit_composite_construct(&ctx->builder, get_uvec_type(ctx, 32, 2), constituents + i * 2, 2);
+         constituents[i] = emit_bitcast(ctx, u64_type, constituents[i]);
+      }
+   }
+   /* if loading more than 1 value, reassemble the results into the desired type,
+    * otherwise just use the loaded result
+    */
+   if (num_components > 1) {
+      result = spirv_builder_emit_composite_construct(&ctx->builder,
+                                                      type,
+                                                      constituents,
+                                                      num_components);
+   } else
+      result = constituents[0];
+
+   store_dest(ctx, &intr->dest, result, nir_type_uint);
+}
+
 static SpvId
 create_builtin_var(struct ntv_context *ctx, SpvId var_type,
                    SpvStorageClass storage_class,
@@ -1882,6 +1967,10 @@ emit_intrinsic(struct ntv_context *ctx, nir_intrinsic_instr *intr)
       emit_store_deref(ctx, intr);
       break;
 
+   case nir_intrinsic_load_push_constant:
+      emit_load_push_const(ctx, intr);
+      break;
+
    case nir_intrinsic_load_front_face:
       emit_load_front_face(ctx, intr);
       break;
@@ -2660,6 +2749,9 @@ nir_to_spirv(struct nir_shader *s, const struct zink_so_info *so_info,
    ctx.so_outputs = _mesa_hash_table_create(ctx.mem_ctx, _mesa_hash_u32,
                                             _mesa_key_u32_equal);
 
+   nir_foreach_variable_with_modes(var, s, nir_var_mem_push_const)
+      input_var_init(&ctx, var);
+
    nir_foreach_shader_in_variable(var, s)
       emit_input(&ctx, var);
 
index 83722e0..5e2516e 100644 (file)
@@ -507,6 +507,9 @@ zink_shader_free(struct zink_context *ctx, struct zink_shader *shader)
       struct zink_gfx_program *prog = (void*)entry->key;
       _mesa_hash_table_remove_key(ctx->program_cache, prog->shaders);
       prog->shaders[pipe_shader_type_from_mesa(shader->nir->info.stage)] = NULL;
+      if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated)
+            /* automatically destroy generated tcs shaders when tes is destroyed */
+            zink_shader_free(ctx, shader->generated);
       zink_gfx_program_reference(screen, &prog, NULL);
    }
    _mesa_set_destroy(shader->programs, NULL);
@@ -514,3 +517,135 @@ zink_shader_free(struct zink_context *ctx, struct zink_shader *shader)
    ralloc_free(shader->nir);
    FREE(shader);
 }
+
+
+/* creating a passthrough tcs shader that's roughly:
+
+#version 150
+#extension GL_ARB_tessellation_shader : require
+
+in vec4 some_var[gl_MaxPatchVertices];
+out vec4 some_var_out;
+
+layout(push_constant) uniform tcsPushConstants {
+    layout(offset = 0) float TessLevelInner[2];
+    layout(offset = 8) float TessLevelOuter[4];
+} u_tcsPushConstants;
+layout(vertices = $vertices_per_patch) out;
+void main()
+{
+  gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
+  gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
+  some_var_out = some_var[gl_InvocationID];
+}
+
+*/
+struct zink_shader *
+zink_shader_tcs_create(struct zink_context *ctx, struct zink_shader *vs)
+{
+   unsigned vertices_per_patch = ctx->gfx_pipeline_state.vertices_per_patch;
+   struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
+   ret->shader_id = 0; //special value for internal shaders
+   ret->programs = _mesa_pointer_set_create(NULL);
+
+   nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &nir_options, NULL);
+   nir_function *fn = nir_function_create(nir, "main");
+   fn->is_entrypoint = true;
+   nir_function_impl *impl = nir_function_impl_create(fn);
+
+   nir_builder b;
+   nir_builder_init(&b, impl);
+   b.cursor = nir_before_block(nir_start_block(impl));
+
+   nir_intrinsic_instr *invocation_id = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_invocation_id);
+   nir_ssa_dest_init(&invocation_id->instr, &invocation_id->dest, 1, 32, "gl_InvocationID");
+   nir_builder_instr_insert(&b, &invocation_id->instr);
+
+   nir_foreach_shader_out_variable(var, vs->nir) {
+      const struct glsl_type *type = var->type;
+      const struct glsl_type *in_type = var->type;
+      const struct glsl_type *out_type = var->type;
+      char buf[1024];
+      snprintf(buf, sizeof(buf), "%s_out", var->name);
+      in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
+      out_type = glsl_array_type(type, vertices_per_patch, 0);
+
+      nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
+      nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
+      out->data.location = in->data.location = var->data.location;
+      out->data.location_frac = in->data.location_frac = var->data.location_frac;
+
+      /* gl_in[] receives values from equivalent built-in output
+         variables written by the vertex shader (section 2.14.7).  Each array
+         element of gl_in[] is a structure holding values for a specific vertex of
+         the input patch.  The length of gl_in[] is equal to the
+         implementation-dependent maximum patch size (gl_MaxPatchVertices).
+         - ARB_tessellation_shader
+       */
+      for (unsigned i = 0; i < vertices_per_patch; i++) {
+         /* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
+         nir_if *start_block = nir_push_if(&b, nir_ieq(&b, &invocation_id->dest.ssa, nir_imm_int(&b, i)));
+         nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), &invocation_id->dest.ssa);
+         nir_ssa_def *load = nir_load_deref(&b, in_array_var);
+         nir_deref_instr *out_array_var = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, out), i);
+         nir_store_deref(&b, out_array_var, load, 0xff);
+         nir_pop_if(&b, start_block);
+      }
+   }
+   nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
+   gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
+   gl_TessLevelInner->data.patch = 1;
+   nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
+   gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
+   gl_TessLevelOuter->data.patch = 1;
+
+   /* hacks so we can size these right for now */
+   struct glsl_struct_field *fields = ralloc_size(nir, 2 * sizeof(struct glsl_struct_field));
+   fields[0].type = glsl_array_type(glsl_uint_type(), 2, 0);
+   fields[0].name = ralloc_asprintf(nir, "gl_TessLevelInner");
+   fields[0].offset = 0;
+   fields[1].type = glsl_array_type(glsl_uint_type(), 4, 0);
+   fields[1].name = ralloc_asprintf(nir, "gl_TessLevelOuter");
+   fields[1].offset = 8;
+   nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const,
+                                                 glsl_struct_type(fields, 2, "struct", false), "pushconst");
+   pushconst->data.location = VARYING_SLOT_VAR0;
+
+   nir_intrinsic_instr *load_inner = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+   load_inner->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+   nir_intrinsic_set_base(load_inner, 0);
+   nir_intrinsic_set_range(load_inner, 8);
+   load_inner->num_components = 2;
+   nir_ssa_dest_init(&load_inner->instr, &load_inner->dest, 2, 32, "TessLevelInner");
+   nir_builder_instr_insert(&b, &load_inner->instr);
+
+   nir_intrinsic_instr *load_outer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+   load_outer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
+   nir_intrinsic_set_base(load_outer, 8);
+   nir_intrinsic_set_range(load_outer, 16);
+   load_outer->num_components = 4;
+   nir_ssa_dest_init(&load_outer->instr, &load_outer->dest, 4, 32, "TessLevelOuter");
+   nir_builder_instr_insert(&b, &load_outer->instr);
+
+   for (unsigned i = 0; i < 2; i++) {
+      nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
+      nir_store_deref(&b, store_idx, nir_channel(&b, &load_inner->dest.ssa, i), 0xff);
+   }
+   for (unsigned i = 0; i < 4; i++) {
+      nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
+      nir_store_deref(&b, store_idx, nir_channel(&b, &load_outer->dest.ssa, i), 0xff);
+   }
+
+   nir->info.tess.tcs_vertices_out = vertices_per_patch;
+   nir_validate_shader(nir, "created");
+
+   NIR_PASS_V(nir, nir_lower_regs_to_ssa);
+   optimize_nir(nir);
+   NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
+   NIR_PASS_V(nir, lower_discard_if);
+   NIR_PASS_V(nir, nir_convert_from_ssa, true);
+
+   ret->nir = nir;
+   ret->is_generated = true;
+   return ret;
+}
index 47e6bcf..3795958 100644 (file)
@@ -74,6 +74,10 @@ struct zink_shader {
 
    bool has_tess_shader; // vertex shaders need to know if a tesseval shader exists
    bool has_geometry_shader; // vertex shaders need to know if a geometry shader exists
+   union {
+      struct zink_shader *generated; // a generated shader that this shader "owns"
+      bool is_generated; // if this is a driver-created shader (e.g., tcs)
+   };
 };
 
 VkShaderModule
@@ -87,4 +91,7 @@ zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
 void
 zink_shader_free(struct zink_context *ctx, struct zink_shader *shader);
 
+struct zink_shader *
+zink_shader_tcs_create(struct zink_context *ctx, struct zink_shader *vs);
+
 #endif
index 04a1a2f..bb03800 100644 (file)
@@ -128,8 +128,13 @@ struct zink_context {
 
    struct pipe_stencil_ref stencil_ref;
 
-   float default_inner_level[2];
-   float default_outer_level[4];
+   union {
+      struct {
+         float default_inner_level[2];
+         float default_outer_level[4];
+      };
+      float tess_levels[6];
+   };
 
    struct list_head suspended_queries;
    struct list_head primitives_generated_queries;
index 86c26dc..15281b4 100644 (file)
@@ -241,7 +241,9 @@ zink_draw_vbo(struct pipe_context *pctx,
       util_primconvert_draw_vbo(ctx->primconvert, dinfo, &draws[0]);
       return;
    }
-
+   if (ctx->gfx_pipeline_state.vertices_per_patch != dinfo->vertices_per_patch)
+      ctx->gfx_pipeline_state.hash = 0;
+   ctx->gfx_pipeline_state.vertices_per_patch = dinfo->vertices_per_patch;
    struct zink_gfx_program *gfx_program = get_gfx_program(ctx);
    if (!gfx_program)
       return;
@@ -249,7 +251,6 @@ zink_draw_vbo(struct pipe_context *pctx,
    if (ctx->gfx_pipeline_state.primitive_restart != !!dinfo->primitive_restart)
       ctx->gfx_pipeline_state.hash = 0;
    ctx->gfx_pipeline_state.primitive_restart = !!dinfo->primitive_restart;
-   ctx->gfx_pipeline_state.vertices_per_patch = dinfo->vertices_per_patch;
 
    VkPipeline pipeline = zink_get_gfx_pipeline(screen, gfx_program,
                                                &ctx->gfx_pipeline_state,
@@ -477,6 +478,11 @@ zink_draw_vbo(struct pipe_context *pctx,
                            gfx_program->layout, 0, 1, &desc_set, 0, NULL);
    zink_bind_vertex_buffers(batch, ctx);
 
+   if (gfx_program->shaders[PIPE_SHADER_TESS_CTRL] && gfx_program->shaders[PIPE_SHADER_TESS_CTRL]->is_generated)
+      vkCmdPushConstants(batch->cmdbuf, gfx_program->layout, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
+                         0, sizeof(float) * 6,
+                         &ctx->tess_levels[0]);
+
    zink_query_update_gs_states(ctx);
 
    if (ctx->num_so_targets) {
index 55a428e..89ef837 100644 (file)
@@ -151,6 +151,14 @@ create_pipeline_layout(VkDevice dev, VkDescriptorSetLayout dsl)
    plci.pSetLayouts = &dsl;
    plci.setLayoutCount = 1;
 
+
+   VkPushConstantRange pcr = {};
+   pcr.stageFlags = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
+   pcr.offset = 0;
+   pcr.size = sizeof(float) * 6;
+   plci.pushConstantRangeCount = 1;
+   plci.pPushConstantRanges = &pcr;
+
    VkPipelineLayout layout;
    if (vkCreatePipelineLayout(dev, &plci, NULL, &layout) != VK_SUCCESS) {
       debug_printf("vkCreatePipelineLayout failed!\n");
@@ -180,6 +188,18 @@ shader_key_fs_gen(struct zink_context *ctx, struct zink_shader *zs,
 }
 
 static void
+shader_key_tcs_gen(struct zink_context *ctx, struct zink_shader *zs,
+                   struct zink_shader *shaders[ZINK_SHADER_COUNT], struct zink_shader_key *key)
+{
+   struct zink_tcs_key *tcs_key = &key->key.tcs;
+   key->size = sizeof(struct zink_tcs_key);
+
+   tcs_key->shader_id = zs->shader_id;
+   tcs_key->vertices_per_patch = ctx->gfx_pipeline_state.vertices_per_patch;
+   tcs_key->vs_outputs_written = shaders[PIPE_SHADER_VERTEX]->nir->info.outputs_written;
+}
+
+static void
 shader_key_dummy_gen(struct zink_context *ctx, struct zink_shader *zs,
                      struct zink_shader *shaders[ZINK_SHADER_COUNT], struct zink_shader_key *key)
 {
@@ -195,7 +215,7 @@ typedef void (*zink_shader_key_gen)(struct zink_context *ctx, struct zink_shader
 static zink_shader_key_gen shader_key_vtbl[] =
 {
    [MESA_SHADER_VERTEX] = shader_key_dummy_gen,
-   [MESA_SHADER_TESS_CTRL] = shader_key_dummy_gen,
+   [MESA_SHADER_TESS_CTRL] = shader_key_tcs_gen,
    [MESA_SHADER_TESS_EVAL] = shader_key_dummy_gen,
    [MESA_SHADER_GEOMETRY] = shader_key_dummy_gen,
    [MESA_SHADER_FRAGMENT] = shader_key_fs_gen,
@@ -298,6 +318,12 @@ update_shader_modules(struct zink_context *ctx, struct zink_shader *stages[ZINK_
       unsigned type = u_bit_scan(&dirty_shader_stages);
       dirty[tgsi_processor_to_shader_stage(type)] = stages[type];
    }
+   if (ctx->dirty_shader_stages & (1 << PIPE_SHADER_TESS_EVAL)) {
+      if (dirty[MESA_SHADER_TESS_EVAL] && !dirty[MESA_SHADER_TESS_CTRL]) {
+         dirty[MESA_SHADER_TESS_CTRL] = stages[PIPE_SHADER_TESS_CTRL] = zink_shader_tcs_create(ctx, stages[PIPE_SHADER_VERTEX]);
+         dirty[MESA_SHADER_TESS_EVAL]->generated = stages[PIPE_SHADER_TESS_CTRL];
+      }
+   }
 
    for (int i = 0; i < ZINK_SHADER_COUNT; ++i) {
       enum pipe_shader_type type = pipe_shader_type_from_mesa(i);
index 92836b6..962651f 100644 (file)
@@ -32,6 +32,12 @@ struct zink_fs_key {
    bool samples;
 };
 
+struct zink_tcs_key {
+   unsigned shader_id;
+   unsigned vertices_per_patch;
+   uint64_t vs_outputs_written;
+};
+
 /* a shader key is used for swapping out shader modules based on pipeline states,
  * e.g., if sampleCount changes, we must verify that the fs doesn't need a recompile
  *       to account for GL ignoring gl_SampleMask in some cases when VK will not
@@ -40,6 +46,7 @@ struct zink_fs_key {
 struct zink_shader_key {
    union {
       struct zink_fs_key fs;
+      struct zink_tcs_key tcs;
    } key;
    uint32_t size;
 };