From 1542f3eb470ffefe4b0b30c0547eb72f4fca712c Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Tue, 24 Aug 2021 16:01:56 -0400 Subject: [PATCH] zink: decompose vertex attribs into single components when not supported this avoids vbuf in a lot more cases on radv where 3component attribs aren't supported Reviewed-by: Dave Airlie Part-of: --- src/gallium/drivers/zink/zink_compiler.c | 89 +++++++++++++++++++++++++++-- src/gallium/drivers/zink/zink_pipeline.h | 2 + src/gallium/drivers/zink/zink_program.c | 25 +++++--- src/gallium/drivers/zink/zink_screen.c | 12 +++- src/gallium/drivers/zink/zink_shader_keys.h | 16 +++++- src/gallium/drivers/zink/zink_state.c | 70 +++++++++++++++++++++-- src/gallium/drivers/zink/zink_state.h | 3 + 7 files changed, 196 insertions(+), 21 deletions(-) diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c index 3b02d29..17d5f9d 100644 --- a/src/gallium/drivers/zink/zink_compiler.c +++ b/src/gallium/drivers/zink/zink_compiler.c @@ -564,6 +564,79 @@ update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_ zs->streamout.have_xfb = !!zs->streamout.so_info.num_outputs; } +struct decompose_state { + nir_variable **split; + bool needs_w; +}; + +static bool +lower_attrib(nir_builder *b, nir_instr *instr, void *data) +{ + struct decompose_state *state = data; + nir_variable **split = state->split; + if (instr->type != nir_instr_type_intrinsic) + return false; + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if (intr->intrinsic != nir_intrinsic_load_deref) + return false; + nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); + nir_variable *var = nir_deref_instr_get_variable(deref); + if (var != split[0]) + return false; + unsigned num_components = glsl_get_vector_elements(split[0]->type); + b->cursor = nir_after_instr(instr); + nir_ssa_def *loads[4]; + for (unsigned i = 0; i < (state->needs_w ? num_components - 1 : num_components); i++) + loads[i] = nir_load_deref(b, nir_build_deref_var(b, split[i+1])); + if (state->needs_w) { + /* oob load w comopnent to get correct value for int/float */ + loads[3] = nir_channel(b, loads[0], 3); + loads[0] = nir_channel(b, loads[0], 0); + } + nir_ssa_def *new_load = nir_vec(b, loads, num_components); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, new_load); + nir_instr_remove_v(instr); + return true; +} + +static bool +decompose_attribs(nir_shader *nir, uint32_t decomposed_attrs, uint32_t decomposed_attrs_without_w) +{ + uint32_t bits = 0; + nir_foreach_variable_with_modes(var, nir, nir_var_shader_in) + bits |= BITFIELD_BIT(var->data.driver_location); + bits = ~bits; + u_foreach_bit(location, decomposed_attrs | decomposed_attrs_without_w) { + nir_variable *split[5]; + struct decompose_state state; + state.split = split; + nir_variable *var = nir_find_variable_with_driver_location(nir, nir_var_shader_in, location); + assert(var); + split[0] = var; + bits |= BITFIELD_BIT(var->data.driver_location); + const struct glsl_type *new_type = glsl_type_is_scalar(var->type) ? var->type : glsl_get_array_element(var->type); + unsigned num_components = glsl_get_vector_elements(var->type); + state.needs_w = (decomposed_attrs_without_w & BITFIELD_BIT(location)) != 0 && num_components == 4; + for (unsigned i = 0; i < (state.needs_w ? num_components - 1 : num_components); i++) { + split[i+1] = nir_variable_clone(var, nir); + split[i+1]->name = ralloc_asprintf(nir, "%s_split%u", var->name, i); + if (decomposed_attrs_without_w & BITFIELD_BIT(location)) + split[i+1]->type = !i && num_components == 4 ? var->type : new_type; + else + split[i+1]->type = new_type; + split[i+1]->data.driver_location = ffs(bits) - 1; + bits &= ~BITFIELD_BIT(split[i+1]->data.driver_location); + nir_shader_add_variable(nir, split[i+1]); + } + var->data.mode = nir_var_shader_temp; + nir_shader_instructions_pass(nir, lower_attrib, nir_metadata_dominance, &state); + } + nir_fixup_deref_modes(nir); + NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL); + optimize_nir(nir); + return true; +} + static void assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map) { @@ -731,17 +804,25 @@ zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shad /* TODO: use a separate mem ctx here for ralloc */ switch (zs->nir->info.stage) { - case MESA_SHADER_VERTEX: + case MESA_SHADER_VERTEX: { + uint32_t decomposed_attrs = 0, decomposed_attrs_without_w = 0; + const struct zink_vs_key *vs_key = zink_vs_key(key); + decomposed_attrs = vs_key->decomposed_attrs; + decomposed_attrs_without_w = vs_key->decomposed_attrs_without_w; + if (decomposed_attrs || decomposed_attrs_without_w) + NIR_PASS_V(nir, decompose_attribs, decomposed_attrs, decomposed_attrs_without_w); + FALLTHROUGH; + } case MESA_SHADER_TESS_EVAL: case MESA_SHADER_GEOMETRY: - if (zink_vs_key(key)->last_vertex_stage) { + if (zink_vs_key_base(key)->last_vertex_stage) { if (zs->streamout.have_xfb) streamout = &zs->streamout; - if (!zink_vs_key(key)->clip_halfz) { + if (!zink_vs_key_base(key)->clip_halfz) { NIR_PASS_V(nir, nir_lower_clip_halfz); } - if (zink_vs_key(key)->push_drawid) { + if (zink_vs_key_base(key)->push_drawid) { NIR_PASS_V(nir, lower_drawid); } } diff --git a/src/gallium/drivers/zink/zink_pipeline.h b/src/gallium/drivers/zink/zink_pipeline.h index d4239b0..eccfa44 100644 --- a/src/gallium/drivers/zink/zink_pipeline.h +++ b/src/gallium/drivers/zink/zink_pipeline.h @@ -78,6 +78,8 @@ struct zink_gfx_pipeline_state { uint8_t coord_replace_bits; bool coord_replace_yinvert; bool drawid_broken; + uint32_t decomposed_attrs; + uint32_t decomposed_attrs_without_w; struct zink_blend_state *blend_state; struct zink_render_pass *render_pass; VkPipeline pipeline; diff --git a/src/gallium/drivers/zink/zink_program.c b/src/gallium/drivers/zink/zink_program.c index d012176..fd569f4 100644 --- a/src/gallium/drivers/zink/zink_program.c +++ b/src/gallium/drivers/zink/zink_program.c @@ -105,11 +105,11 @@ keybox_equals(const void *void_a, const void *void_b) } static void -shader_key_vs_gen(struct zink_context *ctx, struct zink_shader *zs, - struct zink_shader *shaders[ZINK_SHADER_COUNT], struct zink_shader_key *key) +shader_key_vs_base_gen(struct zink_context *ctx, struct zink_shader *zs, + struct zink_shader *shaders[ZINK_SHADER_COUNT], struct zink_shader_key *key) { - struct zink_vs_key *vs_key = &key->key.vs; - key->size = sizeof(struct zink_vs_key); + struct zink_vs_key_base *vs_key = &key->key.vs_base; + key->size = sizeof(struct zink_vs_key_base); vs_key->clip_halfz = ctx->rast_state && ctx->rast_state->base.clip_halfz; switch (zs->nir->info.stage) { @@ -129,6 +129,17 @@ shader_key_vs_gen(struct zink_context *ctx, struct zink_shader *zs, } static void +shader_key_vs_gen(struct zink_context *ctx, struct zink_shader *zs, + struct zink_shader *shaders[ZINK_SHADER_COUNT], struct zink_shader_key *key) +{ + struct zink_vs_key *vs_key = &key->key.vs; + shader_key_vs_base_gen(ctx, zs, shaders, key); + vs_key->decomposed_attrs = ctx->element_state->decomposed_attrs; + vs_key->decomposed_attrs_without_w = ctx->element_state->decomposed_attrs_without_w; + key->size += 2 * 4; +} + +static void shader_key_fs_gen(struct zink_context *ctx, struct zink_shader *zs, struct zink_shader *shaders[ZINK_SHADER_COUNT], struct zink_shader_key *key) { @@ -167,8 +178,8 @@ static zink_shader_key_gen shader_key_vtbl[] = [MESA_SHADER_VERTEX] = shader_key_vs_gen, [MESA_SHADER_TESS_CTRL] = shader_key_tcs_gen, /* reusing vs key for now since we're only using clip_halfz */ - [MESA_SHADER_TESS_EVAL] = shader_key_vs_gen, - [MESA_SHADER_GEOMETRY] = shader_key_vs_gen, + [MESA_SHADER_TESS_EVAL] = shader_key_vs_base_gen, + [MESA_SHADER_GEOMETRY] = shader_key_vs_base_gen, [MESA_SHADER_FRAGMENT] = shader_key_fs_gen, }; @@ -179,7 +190,7 @@ get_default_shader_module_ptr(struct zink_gfx_program *prog, struct zink_shader if (zs->nir->info.stage == MESA_SHADER_VERTEX || zs->nir->info.stage == MESA_SHADER_TESS_EVAL) { /* no streamout or halfz */ - if (!zink_vs_key(key)->last_vertex_stage) + if (!zink_vs_key_base(key)->last_vertex_stage) return &prog->default_variants[zs->nir->info.stage][1]; } return &prog->default_variants[zs->nir->info.stage][0]; diff --git a/src/gallium/drivers/zink/zink_screen.c b/src/gallium/drivers/zink/zink_screen.c index a08010f..184ff08 100644 --- a/src/gallium/drivers/zink/zink_screen.c +++ b/src/gallium/drivers/zink/zink_screen.c @@ -994,9 +994,15 @@ zink_is_format_supported(struct pipe_screen *pscreen, VkFormatProperties props = screen->format_props[format]; if (target == PIPE_BUFFER) { - if (bind & PIPE_BIND_VERTEX_BUFFER && - !(props.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT)) - return false; + if (bind & PIPE_BIND_VERTEX_BUFFER) { + if (!(props.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT)) { + enum pipe_format new_format = zink_decompose_vertex_format(format); + if (!new_format) + return false; + if (!(screen->format_props[new_format].bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT)) + return false; + } + } if (bind & PIPE_BIND_SAMPLER_VIEW && !(props.bufferFeatures & VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) diff --git a/src/gallium/drivers/zink/zink_shader_keys.h b/src/gallium/drivers/zink/zink_shader_keys.h index f0dc144..61ad37f 100644 --- a/src/gallium/drivers/zink/zink_shader_keys.h +++ b/src/gallium/drivers/zink/zink_shader_keys.h @@ -26,12 +26,19 @@ #ifndef ZINK_SHADER_KEYS_H # define ZINK_SHADER_KEYS_H -struct zink_vs_key { +struct zink_vs_key_base { bool clip_halfz; bool push_drawid; bool last_vertex_stage; }; +struct zink_vs_key { + struct zink_vs_key_base base; + uint8_t pad; + uint32_t decomposed_attrs; + uint32_t decomposed_attrs_without_w; +}; + struct zink_fs_key { uint8_t coord_replace_bits; bool coord_replace_yinvert; @@ -52,6 +59,7 @@ struct zink_shader_key { union { /* reuse vs key for now with tes/gs since we only use clip_halfz */ struct zink_vs_key vs; + struct zink_vs_key_base vs_base; struct zink_fs_key fs; } key; struct zink_shader_key_base base; @@ -67,6 +75,12 @@ zink_fs_key(const struct zink_shader_key *key) return &key->key.fs; } +static inline const struct zink_vs_key_base * +zink_vs_key_base(const struct zink_shader_key *key) +{ + return &key->key.vs_base; +} + static inline const struct zink_vs_key * zink_vs_key(const struct zink_shader_key *key) { diff --git a/src/gallium/drivers/zink/zink_state.c b/src/gallium/drivers/zink/zink_state.c index e36197c..9810f5c1 100644 --- a/src/gallium/drivers/zink/zink_state.c +++ b/src/gallium/drivers/zink/zink_state.c @@ -24,6 +24,7 @@ #include "zink_state.h" #include "zink_context.h" +#include "zink_format.h" #include "zink_screen.h" #include "compiler/shader_enums.h" @@ -49,6 +50,10 @@ zink_create_vertex_elements_state(struct pipe_context *pctx, buffer_map[i] = -1; int num_bindings = 0; + unsigned num_decomposed = 0; + uint32_t size8 = 0; + uint32_t size16 = 0; + uint32_t size32 = 0; for (i = 0; i < num_elements; ++i) { const struct pipe_vertex_element *elem = elements + i; @@ -59,7 +64,6 @@ zink_create_vertex_elements_state(struct pipe_context *pctx, } binding = buffer_map[binding]; - ves->bindings[binding].binding = binding; ves->bindings[binding].inputRate = elem->instance_divisor ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; @@ -68,24 +72,73 @@ zink_create_vertex_elements_state(struct pipe_context *pctx, debug_printf("zink: clamping instance divisor %u to %u\n", elem->instance_divisor, screen->info.vdiv_props.maxVertexAttribDivisor); ves->divisor[binding] = MIN2(elem->instance_divisor, screen->info.vdiv_props.maxVertexAttribDivisor); + VkFormat format; + if (screen->format_props[elem->src_format].bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) + format = zink_get_format(screen, elem->src_format); + else { + enum pipe_format new_format = zink_decompose_vertex_format(elem->src_format); + assert(new_format); + num_decomposed++; + assert(screen->format_props[new_format].bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT); + if (util_format_get_blocksize(new_format) == 4) + size32 |= BITFIELD_BIT(i); + else if (util_format_get_blocksize(new_format) == 2) + size16 |= BITFIELD_BIT(i); + else + size8 |= BITFIELD_BIT(i); + format = zink_get_format(screen, new_format); + unsigned size; + if (i < 8) + size = 1; + else if (i < 16) + size = 2; + else + size = 4; + if (util_format_get_nr_components(elem->src_format) == 4) { + ves->decomposed_attrs |= BITFIELD_BIT(i); + ves->decomposed_attrs_size = size; + } else { + ves->decomposed_attrs_without_w |= BITFIELD_BIT(i); + } + } + if (screen->info.have_EXT_vertex_input_dynamic_state) { ves->hw_state.dynattribs[i].sType = VK_STRUCTURE_TYPE_VERTEX_INPUT_ATTRIBUTE_DESCRIPTION_2_EXT; ves->hw_state.dynattribs[i].binding = binding; ves->hw_state.dynattribs[i].location = i; - ves->hw_state.dynattribs[i].format = zink_get_format(screen, - elem->src_format); + ves->hw_state.dynattribs[i].format = format; assert(ves->hw_state.dynattribs[i].format != VK_FORMAT_UNDEFINED); ves->hw_state.dynattribs[i].offset = elem->src_offset; } else { ves->hw_state.attribs[i].binding = binding; ves->hw_state.attribs[i].location = i; - ves->hw_state.attribs[i].format = zink_get_format(screen, - elem->src_format); + ves->hw_state.attribs[i].format = format; assert(ves->hw_state.attribs[i].format != VK_FORMAT_UNDEFINED); ves->hw_state.attribs[i].offset = elem->src_offset; } } - + assert(num_decomposed + num_elements <= PIPE_MAX_ATTRIBS); + u_foreach_bit(i, ves->decomposed_attrs | ves->decomposed_attrs_without_w) { + const struct pipe_vertex_element *elem = elements + i; + const struct util_format_description *desc = util_format_description(elem->src_format); + unsigned size = 1; + if (size32 & BITFIELD_BIT(i)) + size = 4; + else if (size16 & BITFIELD_BIT(i)) + size = 2; + for (unsigned j = 1; j < desc->nr_channels; j++) { + if (screen->info.have_EXT_vertex_input_dynamic_state) { + memcpy(&ves->hw_state.dynattribs[num_elements], &ves->hw_state.dynattribs[i], sizeof(VkVertexInputAttributeDescription2EXT)); + ves->hw_state.dynattribs[num_elements].location = num_elements; + ves->hw_state.dynattribs[num_elements].offset += j * size; + } else { + memcpy(&ves->hw_state.attribs[num_elements], &ves->hw_state.attribs[i], sizeof(VkVertexInputAttributeDescription)); + ves->hw_state.attribs[num_elements].location = num_elements; + ves->hw_state.attribs[num_elements].offset += j * size; + } + num_elements++; + } + } ves->hw_state.num_bindings = num_bindings; ves->hw_state.num_attribs = num_elements; if (screen->info.have_EXT_vertex_input_dynamic_state) { @@ -124,6 +177,11 @@ zink_bind_vertex_elements_state(struct pipe_context *pctx, ctx->vertex_state_changed = !zink_screen(pctx->screen)->info.have_EXT_vertex_input_dynamic_state; ctx->vertex_buffers_dirty = ctx->element_state->hw_state.num_bindings > 0; } + if (ctx->element_state->decomposed_attrs != state->decomposed_attrs || + ctx->element_state->decomposed_attrs_without_w != state->decomposed_attrs_without_w) + ctx->dirty_shader_stages |= BITFIELD_BIT(PIPE_SHADER_VERTEX); + state->decomposed_attrs = ctx->element_state->decomposed_attrs; + state->decomposed_attrs_without_w = ctx->element_state->decomposed_attrs_without_w; state->element_state = &ctx->element_state->hw_state; } else { state->element_state = NULL; diff --git a/src/gallium/drivers/zink/zink_state.h b/src/gallium/drivers/zink/zink_state.h index e9c33c1..9efcadf 100644 --- a/src/gallium/drivers/zink/zink_state.h +++ b/src/gallium/drivers/zink/zink_state.h @@ -52,6 +52,9 @@ struct zink_vertex_elements_state { } bindings[PIPE_MAX_ATTRIBS]; uint32_t divisor[PIPE_MAX_ATTRIBS]; uint8_t binding_map[PIPE_MAX_ATTRIBS]; + uint32_t decomposed_attrs; + unsigned decomposed_attrs_size; + uint32_t decomposed_attrs_without_w; struct zink_vertex_elements_hw_state hw_state; }; -- 2.7.4