From 6e5fe71599b658a400602fac043e5d174fa37bc4 Mon Sep 17 00:00:00 2001 From: Mike Blumenkrantz Date: Mon, 6 Mar 2023 14:40:58 -0500 Subject: [PATCH] lavapipe: split out shader struct members into their own struct kinda gross but simplifies some code Reviewed-by: Dave Airlie Part-of: --- src/gallium/frontends/lavapipe/lvp_execute.c | 91 ++++++------- .../frontends/lavapipe/lvp_inline_uniforms.c | 18 +-- src/gallium/frontends/lavapipe/lvp_pipeline.c | 142 +++++++++++---------- src/gallium/frontends/lavapipe/lvp_private.h | 29 +++-- 4 files changed, 143 insertions(+), 137 deletions(-) diff --git a/src/gallium/frontends/lavapipe/lvp_execute.c b/src/gallium/frontends/lavapipe/lvp_execute.c index 20cde77..eb6f30a 100644 --- a/src/gallium/frontends/lavapipe/lvp_execute.c +++ b/src/gallium/frontends/lavapipe/lvp_execute.c @@ -279,21 +279,21 @@ update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type uint32_t inline_uniforms[MAX_INLINABLE_UNIFORMS]; unsigned stage = tgsi_processor_to_shader_stage(sh); state->inlines_dirty[sh] = false; - if (!state->pipeline[is_compute]->inlines[stage].can_inline) + if (!state->pipeline[is_compute]->shaders[stage].inlines.can_inline) return; struct lvp_pipeline *pipeline = state->pipeline[is_compute]; /* these buffers have already been flushed in llvmpipe, so they're safe to read */ - nir_shader *base_nir = pipeline->pipeline_nir[stage]->nir; + nir_shader *base_nir = pipeline->shaders[stage].pipeline_nir->nir; if (stage == PIPE_SHADER_TESS_EVAL && state->tess_ccw) - base_nir = pipeline->tess_ccw->nir; - nir_shader *nir = nir_shader_clone(pipeline->pipeline_nir[stage]->nir, base_nir); + base_nir = pipeline->shaders[stage].tess_ccw->nir; + nir_shader *nir = nir_shader_clone(pipeline->shaders[stage].pipeline_nir->nir, base_nir); nir_function_impl *impl = nir_shader_get_entrypoint(nir); unsigned ssa_alloc = impl->ssa_alloc; - unsigned count = pipeline->inlines[stage].count[0]; + unsigned count = pipeline->shaders[stage].inlines.count[0]; if (count && pcbuf_dirty) { unsigned push_size = get_pcbuf_size(state, sh); for (unsigned i = 0; i < count; i++) { - unsigned offset = pipeline->inlines[stage].uniform_offsets[0][i]; + unsigned offset = pipeline->shaders[stage].inlines.uniform_offsets[0][i]; if (offset < push_size) { memcpy(&inline_uniforms[i], &state->push_constants[offset], sizeof(uint32_t)); } else { @@ -312,8 +312,8 @@ update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type } if (constbuf_dirty) { struct pipe_box box = {0}; - u_foreach_bit(slot, pipeline->inlines[stage].can_inline) { - unsigned count = pipeline->inlines[stage].count[slot]; + u_foreach_bit(slot, pipeline->shaders[stage].inlines.can_inline) { + unsigned count = pipeline->shaders[stage].inlines.count[slot]; struct pipe_constant_buffer *cbuf = &state->const_buffer[sh][slot - 1]; struct pipe_resource *pres = cbuf->buffer; box.x = cbuf->buffer_offset; @@ -321,7 +321,7 @@ update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type struct pipe_transfer *xfer; uint8_t *map = state->pctx->buffer_map(state->pctx, pres, 0, PIPE_MAP_READ, &box, &xfer); for (unsigned i = 0; i < count; i++) { - unsigned offset = pipeline->inlines[stage].uniform_offsets[slot][i]; + unsigned offset = pipeline->shaders[stage].inlines.uniform_offsets[slot][i]; memcpy(&inline_uniforms[i], map + offset, sizeof(uint32_t)); } state->pctx->buffer_unmap(state->pctx, xfer); @@ -332,12 +332,12 @@ update_inline_shader_state(struct rendering_state *state, enum pipe_shader_type impl = nir_shader_get_entrypoint(nir); void *shader_state; if (ssa_alloc - impl->ssa_alloc < ssa_alloc / 2 && - !pipeline->inlines[stage].must_inline) { + !pipeline->shaders[stage].inlines.must_inline) { /* not enough change; don't inline further */ - pipeline->inlines[stage].can_inline = 0; + pipeline->shaders[stage].inlines.can_inline = 0; ralloc_free(nir); - pipeline->shader_cso[sh] = lvp_pipeline_compile(pipeline, nir_shader_clone(NULL, pipeline->pipeline_nir[stage]->nir)); - shader_state = pipeline->shader_cso[sh]; + pipeline->shaders[sh].shader_cso = lvp_pipeline_compile(pipeline, nir_shader_clone(NULL, pipeline->shaders[stage].pipeline_nir->nir)); + shader_state = pipeline->shaders[sh].shader_cso; } else { shader_state = lvp_pipeline_compile(pipeline, nir); } @@ -567,18 +567,18 @@ static void handle_compute_pipeline(struct vk_cmd_queue_entry *cmd, state->pcbuf_dirty[PIPE_SHADER_COMPUTE] = false; state->iv_dirty[MESA_SHADER_COMPUTE] |= state->num_shader_images[MESA_SHADER_COMPUTE] && - (state->access[MESA_SHADER_COMPUTE].images_read != pipeline->access[MESA_SHADER_COMPUTE].images_read || - state->access[MESA_SHADER_COMPUTE].images_written != pipeline->access[MESA_SHADER_COMPUTE].images_written); + (state->access[MESA_SHADER_COMPUTE].images_read != pipeline->shaders[MESA_SHADER_COMPUTE].access.images_read || + state->access[MESA_SHADER_COMPUTE].images_written != pipeline->shaders[MESA_SHADER_COMPUTE].access.images_written); state->sb_dirty[MESA_SHADER_COMPUTE] |= state->num_shader_buffers[MESA_SHADER_COMPUTE] && - state->access[MESA_SHADER_COMPUTE].buffers_written != pipeline->access[MESA_SHADER_COMPUTE].buffers_written; - memcpy(&state->access[MESA_SHADER_COMPUTE], &pipeline->access[MESA_SHADER_COMPUTE], sizeof(struct lvp_access_info)); + state->access[MESA_SHADER_COMPUTE].buffers_written != pipeline->shaders[MESA_SHADER_COMPUTE].access.buffers_written; + memcpy(&state->access[MESA_SHADER_COMPUTE], &pipeline->shaders[MESA_SHADER_COMPUTE].access, sizeof(struct lvp_access_info)); - state->dispatch_info.block[0] = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->nir->info.workgroup_size[0]; - state->dispatch_info.block[1] = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->nir->info.workgroup_size[1]; - state->dispatch_info.block[2] = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->nir->info.workgroup_size[2]; - state->inlines_dirty[PIPE_SHADER_COMPUTE] = pipeline->inlines[MESA_SHADER_COMPUTE].can_inline; - if (!pipeline->inlines[MESA_SHADER_COMPUTE].can_inline) - state->pctx->bind_compute_state(state->pctx, pipeline->shader_cso[PIPE_SHADER_COMPUTE]); + state->dispatch_info.block[0] = pipeline->shaders[MESA_SHADER_COMPUTE].pipeline_nir->nir->info.workgroup_size[0]; + state->dispatch_info.block[1] = pipeline->shaders[MESA_SHADER_COMPUTE].pipeline_nir->nir->info.workgroup_size[1]; + state->dispatch_info.block[2] = pipeline->shaders[MESA_SHADER_COMPUTE].pipeline_nir->nir->info.workgroup_size[2]; + state->inlines_dirty[PIPE_SHADER_COMPUTE] = pipeline->shaders[MESA_SHADER_COMPUTE].inlines.can_inline; + if (!pipeline->shaders[MESA_SHADER_COMPUTE].inlines.can_inline) + state->pctx->bind_compute_state(state->pctx, pipeline->shaders[PIPE_SHADER_COMPUTE].shader_cso); } static void @@ -645,11 +645,12 @@ static void handle_graphics_pipeline(struct vk_cmd_queue_entry *cmd, for (enum pipe_shader_type sh = PIPE_SHADER_VERTEX; sh < PIPE_SHADER_COMPUTE; sh++) { state->iv_dirty[sh] |= state->num_shader_images[sh] && - (state->access[sh].images_read != pipeline->access[sh].images_read || - state->access[sh].images_written != pipeline->access[sh].images_written); - state->sb_dirty[sh] |= state->num_shader_buffers[sh] && state->access[sh].buffers_written != pipeline->access[sh].buffers_written; + (state->access[sh].images_read != pipeline->shaders[sh].access.images_read || + state->access[sh].images_written != pipeline->shaders[sh].access.images_written); + state->sb_dirty[sh] |= state->num_shader_buffers[sh] && state->access[sh].buffers_written != pipeline->shaders[sh].access.buffers_written; } - memcpy(state->access, pipeline->access, sizeof(struct lvp_access_info) * 5); //4 vertex stages + fragment + for (unsigned i = 0; i < ARRAY_SIZE(state->access); i++) + memcpy(&state->access[i], &pipeline->shaders[i].access, sizeof(struct lvp_access_info)); for (enum pipe_shader_type sh = PIPE_SHADER_VERTEX; sh < PIPE_SHADER_COMPUTE; sh++) state->has_pcbuf[sh] = false; @@ -682,39 +683,39 @@ static void handle_graphics_pipeline(struct vk_cmd_queue_entry *cmd, VkShaderStageFlagBits vk_stage = (1 << b); switch (vk_stage) { case VK_SHADER_STAGE_FRAGMENT_BIT: - state->inlines_dirty[PIPE_SHADER_FRAGMENT] = pipeline->inlines[MESA_SHADER_FRAGMENT].can_inline; - if (!pipeline->inlines[MESA_SHADER_FRAGMENT].can_inline) - state->pctx->bind_fs_state(state->pctx, pipeline->shader_cso[PIPE_SHADER_FRAGMENT]); + state->inlines_dirty[PIPE_SHADER_FRAGMENT] = pipeline->shaders[MESA_SHADER_FRAGMENT].inlines.can_inline; + if (!pipeline->shaders[MESA_SHADER_FRAGMENT].inlines.can_inline) + state->pctx->bind_fs_state(state->pctx, pipeline->shaders[PIPE_SHADER_FRAGMENT].shader_cso); has_stage[PIPE_SHADER_FRAGMENT] = true; break; case VK_SHADER_STAGE_VERTEX_BIT: - state->inlines_dirty[PIPE_SHADER_VERTEX] = pipeline->inlines[MESA_SHADER_VERTEX].can_inline; - if (!pipeline->inlines[MESA_SHADER_VERTEX].can_inline) - state->pctx->bind_vs_state(state->pctx, pipeline->shader_cso[PIPE_SHADER_VERTEX]); + state->inlines_dirty[PIPE_SHADER_VERTEX] = pipeline->shaders[MESA_SHADER_VERTEX].inlines.can_inline; + if (!pipeline->shaders[MESA_SHADER_VERTEX].inlines.can_inline) + state->pctx->bind_vs_state(state->pctx, pipeline->shaders[PIPE_SHADER_VERTEX].shader_cso); has_stage[PIPE_SHADER_VERTEX] = true; break; case VK_SHADER_STAGE_GEOMETRY_BIT: - state->inlines_dirty[PIPE_SHADER_GEOMETRY] = pipeline->inlines[MESA_SHADER_GEOMETRY].can_inline; - if (!pipeline->inlines[MESA_SHADER_GEOMETRY].can_inline) - state->pctx->bind_gs_state(state->pctx, pipeline->shader_cso[PIPE_SHADER_GEOMETRY]); + state->inlines_dirty[PIPE_SHADER_GEOMETRY] = pipeline->shaders[MESA_SHADER_GEOMETRY].inlines.can_inline; + if (!pipeline->shaders[MESA_SHADER_GEOMETRY].inlines.can_inline) + state->pctx->bind_gs_state(state->pctx, pipeline->shaders[PIPE_SHADER_GEOMETRY].shader_cso); state->gs_output_lines = pipeline->gs_output_lines ? GS_OUTPUT_LINES : GS_OUTPUT_NOT_LINES; has_stage[PIPE_SHADER_GEOMETRY] = true; break; case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: - state->inlines_dirty[PIPE_SHADER_TESS_CTRL] = pipeline->inlines[MESA_SHADER_TESS_CTRL].can_inline; - if (!pipeline->inlines[MESA_SHADER_TESS_CTRL].can_inline) - state->pctx->bind_tcs_state(state->pctx, pipeline->shader_cso[PIPE_SHADER_TESS_CTRL]); + state->inlines_dirty[PIPE_SHADER_TESS_CTRL] = pipeline->shaders[MESA_SHADER_TESS_CTRL].inlines.can_inline; + if (!pipeline->shaders[MESA_SHADER_TESS_CTRL].inlines.can_inline) + state->pctx->bind_tcs_state(state->pctx, pipeline->shaders[PIPE_SHADER_TESS_CTRL].shader_cso); has_stage[PIPE_SHADER_TESS_CTRL] = true; break; case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: - state->inlines_dirty[PIPE_SHADER_TESS_EVAL] = pipeline->inlines[MESA_SHADER_TESS_EVAL].can_inline; - if (!pipeline->inlines[MESA_SHADER_TESS_EVAL].can_inline) { + state->inlines_dirty[PIPE_SHADER_TESS_EVAL] = pipeline->shaders[MESA_SHADER_TESS_EVAL].inlines.can_inline; + if (!pipeline->shaders[MESA_SHADER_TESS_EVAL].inlines.can_inline) { if (BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) { - state->tess_states[0] = pipeline->shader_cso[PIPE_SHADER_TESS_EVAL]; - state->tess_states[1] = pipeline->tess_ccw_cso; + state->tess_states[0] = pipeline->shaders[PIPE_SHADER_TESS_EVAL].shader_cso; + state->tess_states[1] = pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw_cso; state->pctx->bind_tes_state(state->pctx, state->tess_states[state->tess_ccw]); } else { - state->pctx->bind_tes_state(state->pctx, pipeline->shader_cso[PIPE_SHADER_TESS_EVAL]); + state->pctx->bind_tes_state(state->pctx, pipeline->shaders[PIPE_SHADER_TESS_EVAL].shader_cso); } } if (!BITSET_TEST(ps->dynamic, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) @@ -730,7 +731,7 @@ static void handle_graphics_pipeline(struct vk_cmd_queue_entry *cmd, /* there should always be a dummy fs. */ if (!has_stage[PIPE_SHADER_FRAGMENT]) - state->pctx->bind_fs_state(state->pctx, pipeline->shader_cso[PIPE_SHADER_FRAGMENT]); + state->pctx->bind_fs_state(state->pctx, pipeline->shaders[PIPE_SHADER_FRAGMENT].shader_cso); if (state->pctx->bind_gs_state && !has_stage[PIPE_SHADER_GEOMETRY]) state->pctx->bind_gs_state(state->pctx, NULL); if (state->pctx->bind_tcs_state && !has_stage[PIPE_SHADER_TESS_CTRL]) diff --git a/src/gallium/frontends/lavapipe/lvp_inline_uniforms.c b/src/gallium/frontends/lavapipe/lvp_inline_uniforms.c index e7b6173..756c875 100644 --- a/src/gallium/frontends/lavapipe/lvp_inline_uniforms.c +++ b/src/gallium/frontends/lavapipe/lvp_inline_uniforms.c @@ -138,7 +138,7 @@ lvp_find_inlinable_uniforms(struct lvp_pipeline *pipeline, nir_shader *shader) nir_metadata_require(function->impl, nir_metadata_loop_analysis, nir_var_all); foreach_list_typed(nir_cf_node, node, node, &function->impl->body) - process_node(node, NULL, (uint32_t*)pipeline->inlines[shader->info.stage].uniform_offsets, pipeline->inlines[shader->info.stage].count, stores); + process_node(node, NULL, (uint32_t*)pipeline->shaders[shader->info.stage].inlines.uniform_offsets, pipeline->shaders[shader->info.stage].inlines.count, stores); } } const unsigned threshold = 5; @@ -152,21 +152,21 @@ lvp_find_inlinable_uniforms(struct lvp_pipeline *pipeline, nir_shader *shader) } if (counter >= threshold) { uint8_t new_num[PIPE_MAX_CONSTANT_BUFFERS]; - memcpy(new_num, pipeline->inlines[shader->info.stage].count, sizeof(new_num)); + memcpy(new_num, pipeline->shaders[shader->info.stage].inlines.count, sizeof(new_num)); uint32_t *uni_offsets = - (uint32_t *) pipeline->inlines[shader->info.stage].uniform_offsets; + (uint32_t *) pipeline->shaders[shader->info.stage].inlines.uniform_offsets; if (nir_collect_src_uniforms(src, 0, uni_offsets, new_num, PIPE_MAX_CONSTANT_BUFFERS, UINT_MAX)) { ret = true; - memcpy(pipeline->inlines[shader->info.stage].count, new_num, sizeof(new_num)); + memcpy(pipeline->shaders[shader->info.stage].inlines.count, new_num, sizeof(new_num)); } } } for (unsigned i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { - if (pipeline->inlines[shader->info.stage].count[i]) { - pipeline->inlines[shader->info.stage].can_inline |= BITFIELD_BIT(i); + if (pipeline->shaders[shader->info.stage].inlines.count[i]) { + pipeline->shaders[shader->info.stage].inlines.can_inline |= BITFIELD_BIT(i); break; } } @@ -176,7 +176,7 @@ lvp_find_inlinable_uniforms(struct lvp_pipeline *pipeline, nir_shader *shader) void lvp_inline_uniforms(nir_shader *shader, const struct lvp_pipeline *pipeline, const uint32_t *uniform_values, uint32_t ubo) { - if (!pipeline->inlines[shader->info.stage].can_inline) + if (!pipeline->shaders[shader->info.stage].inlines.can_inline) return; nir_foreach_function(function, shader) { @@ -199,8 +199,8 @@ lvp_inline_uniforms(nir_shader *shader, const struct lvp_pipeline *pipeline, con intr->dest.ssa.bit_size == 32) { int num_components = intr->dest.ssa.num_components; uint32_t offset = nir_src_as_uint(intr->src[1]); - const unsigned num_uniforms = pipeline->inlines[shader->info.stage].count[ubo]; - const unsigned *uniform_dw_offsets = pipeline->inlines[shader->info.stage].uniform_offsets[ubo]; + const unsigned num_uniforms = pipeline->shaders[shader->info.stage].inlines.count[ubo]; + const unsigned *uniform_dw_offsets = pipeline->shaders[shader->info.stage].inlines.uniform_offsets[ubo]; if (num_components == 1) { /* Just replace the uniform load to constant load. */ diff --git a/src/gallium/frontends/lavapipe/lvp_pipeline.c b/src/gallium/frontends/lavapipe/lvp_pipeline.c index aa45d27..61c1ddc 100644 --- a/src/gallium/frontends/lavapipe/lvp_pipeline.c +++ b/src/gallium/frontends/lavapipe/lvp_pipeline.c @@ -42,22 +42,23 @@ void lvp_pipeline_destroy(struct lvp_device *device, struct lvp_pipeline *pipeline) { - if (pipeline->shader_cso[PIPE_SHADER_VERTEX]) - device->queue.ctx->delete_vs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_VERTEX]); - if (pipeline->shader_cso[PIPE_SHADER_FRAGMENT] && !pipeline->noop_fs) - device->queue.ctx->delete_fs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_FRAGMENT]); - if (pipeline->shader_cso[PIPE_SHADER_GEOMETRY]) - device->queue.ctx->delete_gs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_GEOMETRY]); - if (pipeline->shader_cso[PIPE_SHADER_TESS_CTRL]) - device->queue.ctx->delete_tcs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_CTRL]); - if (pipeline->shader_cso[PIPE_SHADER_TESS_EVAL]) - device->queue.ctx->delete_tes_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_EVAL]); - if (pipeline->shader_cso[PIPE_SHADER_COMPUTE]) - device->queue.ctx->delete_compute_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_COMPUTE]); - - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) - lvp_pipeline_nir_ref(&pipeline->pipeline_nir[i], NULL); - lvp_pipeline_nir_ref(&pipeline->tess_ccw, NULL); + if (pipeline->shaders[PIPE_SHADER_VERTEX].shader_cso) + device->queue.ctx->delete_vs_state(device->queue.ctx, pipeline->shaders[PIPE_SHADER_VERTEX].shader_cso); + if (pipeline->shaders[PIPE_SHADER_FRAGMENT].shader_cso && !pipeline->noop_fs) + device->queue.ctx->delete_fs_state(device->queue.ctx, pipeline->shaders[PIPE_SHADER_FRAGMENT].shader_cso); + if (pipeline->shaders[PIPE_SHADER_GEOMETRY].shader_cso) + device->queue.ctx->delete_gs_state(device->queue.ctx, pipeline->shaders[PIPE_SHADER_GEOMETRY].shader_cso); + if (pipeline->shaders[PIPE_SHADER_TESS_CTRL].shader_cso) + device->queue.ctx->delete_tcs_state(device->queue.ctx, pipeline->shaders[PIPE_SHADER_TESS_CTRL].shader_cso); + if (pipeline->shaders[PIPE_SHADER_TESS_EVAL].shader_cso) + device->queue.ctx->delete_tes_state(device->queue.ctx, pipeline->shaders[PIPE_SHADER_TESS_EVAL].shader_cso); + if (pipeline->shaders[PIPE_SHADER_COMPUTE].shader_cso) + device->queue.ctx->delete_compute_state(device->queue.ctx, pipeline->shaders[PIPE_SHADER_COMPUTE].shader_cso); + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + lvp_pipeline_nir_ref(&pipeline->shaders[i].pipeline_nir, NULL); + lvp_pipeline_nir_ref(&pipeline->shaders[i].tess_ccw, NULL); + } if (pipeline->layout) vk_pipeline_layout_unref(&device->vk, &pipeline->layout->vk); @@ -118,9 +119,9 @@ set_image_access(struct lvp_pipeline *pipeline, nir_shader *nir, uint64_t mask = BITFIELD64_MASK(MAX2(size, 1)) << value; if (reads) - pipeline->access[nir->info.stage].images_read |= mask; + pipeline->shaders[nir->info.stage].access.images_read |= mask; if (writes) - pipeline->access[nir->info.stage].images_written |= mask; + pipeline->shaders[nir->info.stage].access.images_written |= mask; } static void @@ -151,7 +152,7 @@ set_buffer_access(struct lvp_pipeline *pipeline, nir_shader *nir, /* Structs have been lowered already, so get_aoa_size is sufficient. */ const unsigned size = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1; uint64_t mask = BITFIELD64_MASK(MAX2(size, 1)) << value; - pipeline->access[nir->info.stage].buffers_written |= mask; + pipeline->shaders[nir->info.stage].access.buffers_written |= mask; } static void @@ -514,10 +515,10 @@ lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline, nir_function_impl *impl = nir_shader_get_entrypoint(nir); if (impl->ssa_alloc > 100) //skip for small shaders - pipeline->inlines[stage].must_inline = lvp_find_inlinable_uniforms(pipeline, nir); - pipeline->pipeline_nir[stage] = ralloc(NULL, struct lvp_pipeline_nir); - pipeline->pipeline_nir[stage]->nir = nir; - pipeline->pipeline_nir[stage]->ref_cnt = 1; + pipeline->shaders[stage].inlines.must_inline = lvp_find_inlinable_uniforms(pipeline, nir); + pipeline->shaders[stage].pipeline_nir = ralloc(NULL, struct lvp_pipeline_nir); + pipeline->shaders[stage].pipeline_nir->nir = nir; + pipeline->shaders[stage].pipeline_nir->ref_cnt = 1; return VK_SUCCESS; } @@ -565,37 +566,37 @@ static void lvp_pipeline_xfb_init(struct lvp_pipeline *pipeline) { gl_shader_stage stage = MESA_SHADER_VERTEX; - if (pipeline->pipeline_nir[MESA_SHADER_GEOMETRY]) + if (pipeline->shaders[MESA_SHADER_GEOMETRY].pipeline_nir) stage = MESA_SHADER_GEOMETRY; - else if (pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]) + else if (pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir) stage = MESA_SHADER_TESS_EVAL; pipeline->last_vertex = stage; - nir_xfb_info *xfb_info = pipeline->pipeline_nir[stage]->nir->xfb_info; + nir_xfb_info *xfb_info = pipeline->shaders[stage].pipeline_nir->nir->xfb_info; if (xfb_info) { uint8_t output_mapping[VARYING_SLOT_TESS_MAX]; memset(output_mapping, 0, sizeof(output_mapping)); - nir_foreach_shader_out_variable(var, pipeline->pipeline_nir[stage]->nir) { + nir_foreach_shader_out_variable(var, pipeline->shaders[stage].pipeline_nir->nir) { unsigned slots = var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4) : glsl_count_attribute_slots(var->type, false); for (unsigned i = 0; i < slots; i++) output_mapping[var->data.location + i] = var->data.driver_location + i; } - pipeline->stream_output.num_outputs = xfb_info->output_count; + pipeline->shaders[pipeline->last_vertex].stream_output.num_outputs = xfb_info->output_count; for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) { if (xfb_info->buffers_written & (1 << i)) { - pipeline->stream_output.stride[i] = xfb_info->buffers[i].stride / 4; + pipeline->shaders[pipeline->last_vertex].stream_output.stride[i] = xfb_info->buffers[i].stride / 4; } } for (unsigned i = 0; i < xfb_info->output_count; i++) { - pipeline->stream_output.output[i].output_buffer = xfb_info->outputs[i].buffer; - pipeline->stream_output.output[i].dst_offset = xfb_info->outputs[i].offset / 4; - pipeline->stream_output.output[i].register_index = output_mapping[xfb_info->outputs[i].location]; - pipeline->stream_output.output[i].num_components = util_bitcount(xfb_info->outputs[i].component_mask); - pipeline->stream_output.output[i].start_component = ffs(xfb_info->outputs[i].component_mask) - 1; - pipeline->stream_output.output[i].stream = xfb_info->buffer_to_stream[xfb_info->outputs[i].buffer]; + pipeline->shaders[pipeline->last_vertex].stream_output.output[i].output_buffer = xfb_info->outputs[i].buffer; + pipeline->shaders[pipeline->last_vertex].stream_output.output[i].dst_offset = xfb_info->outputs[i].offset / 4; + pipeline->shaders[pipeline->last_vertex].stream_output.output[i].register_index = output_mapping[xfb_info->outputs[i].location]; + pipeline->shaders[pipeline->last_vertex].stream_output.output[i].num_components = util_bitcount(xfb_info->outputs[i].component_mask); + pipeline->shaders[pipeline->last_vertex].stream_output.output[i].start_component = ffs(xfb_info->outputs[i].component_mask) - 1; + pipeline->shaders[pipeline->last_vertex].stream_output.output[i].stream = xfb_info->buffer_to_stream[xfb_info->outputs[i].buffer]; } } @@ -616,7 +617,7 @@ lvp_pipeline_compile_stage(struct lvp_pipeline *pipeline, nir_shader *nir) shstate.type = PIPE_SHADER_IR_NIR; shstate.ir.nir = nir; if (nir->info.stage == pipeline->last_vertex) - memcpy(&shstate.stream_output, &pipeline->stream_output, sizeof(shstate.stream_output)); + memcpy(&shstate.stream_output, &pipeline->shaders[pipeline->last_vertex].stream_output, sizeof(shstate.stream_output)); switch (nir->info.stage) { case MESA_SHADER_FRAGMENT: @@ -799,8 +800,9 @@ lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline, pipeline->disable_multisample = p->disable_multisample; pipeline->line_rectangular = p->line_rectangular; pipeline->last_vertex = p->last_vertex; - memcpy(&pipeline->stream_output, &p->stream_output, sizeof(p->stream_output)); - memcpy(&pipeline->access, &p->access, sizeof(p->access)); + memcpy(pipeline->shaders, p->shaders, sizeof(struct lvp_shader) * 4); + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) + pipeline->shaders[i].pipeline_nir = NULL; //this gets handled later } if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) pipeline->force_min_sample = p->force_min_sample; @@ -843,42 +845,42 @@ lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline, switch (stage) { case MESA_SHADER_GEOMETRY: - pipeline->gs_output_lines = pipeline->pipeline_nir[MESA_SHADER_GEOMETRY] && - pipeline->pipeline_nir[MESA_SHADER_GEOMETRY]->nir->info.gs.output_primitive == SHADER_PRIM_LINES; + pipeline->gs_output_lines = pipeline->shaders[MESA_SHADER_GEOMETRY].pipeline_nir && + pipeline->shaders[MESA_SHADER_GEOMETRY].pipeline_nir->nir->info.gs.output_primitive == SHADER_PRIM_LINES; break; case MESA_SHADER_FRAGMENT: - if (pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]->nir->info.fs.uses_sample_shading) + if (pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir->nir->info.fs.uses_sample_shading) pipeline->force_min_sample = true; break; default: break; } } - if (pCreateInfo->stageCount && pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]) { - nir_lower_patch_vertices(pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->nir, pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->nir->info.tess.tcs_vertices_out, NULL); - merge_tess_info(&pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->nir->info, &pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->nir->info); + if (pCreateInfo->stageCount && pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir) { + nir_lower_patch_vertices(pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir, pipeline->shaders[MESA_SHADER_TESS_CTRL].pipeline_nir->nir->info.tess.tcs_vertices_out, NULL); + merge_tess_info(&pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info, &pipeline->shaders[MESA_SHADER_TESS_CTRL].pipeline_nir->nir->info); if (BITSET_TEST(pipeline->graphics_state.dynamic, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) { - pipeline->tess_ccw = ralloc(NULL, struct lvp_pipeline_nir); - pipeline->tess_ccw->nir = nir_shader_clone(NULL, pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->nir); - pipeline->tess_ccw->nir->info.tess.ccw = !pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->nir->info.tess.ccw; + pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw = ralloc(NULL, struct lvp_pipeline_nir); + pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw->nir = nir_shader_clone(NULL, pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir); + pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw->nir->info.tess.ccw = !pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw; } else if (pipeline->graphics_state.ts->domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT) { - pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->nir->info.tess.ccw = !pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->nir->info.tess.ccw; + pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw = !pipeline->shaders[MESA_SHADER_TESS_EVAL].pipeline_nir->nir->info.tess.ccw; } } if (libstate) { for (unsigned i = 0; i < libstate->libraryCount; i++) { LVP_FROM_HANDLE(lvp_pipeline, p, libstate->pLibraries[i]); if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_FRAGMENT_SHADER_BIT_EXT) { - if (p->pipeline_nir[MESA_SHADER_FRAGMENT]) - lvp_pipeline_nir_ref(&pipeline->pipeline_nir[MESA_SHADER_FRAGMENT], p->pipeline_nir[MESA_SHADER_FRAGMENT]); + if (p->shaders[MESA_SHADER_FRAGMENT].pipeline_nir) + lvp_pipeline_nir_ref(&pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir, p->shaders[MESA_SHADER_FRAGMENT].pipeline_nir); } if (p->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { for (unsigned j = MESA_SHADER_VERTEX; j < MESA_SHADER_FRAGMENT; j++) { - if (p->pipeline_nir[j]) - lvp_pipeline_nir_ref(&pipeline->pipeline_nir[j], p->pipeline_nir[j]); + if (p->shaders[j].pipeline_nir) + lvp_pipeline_nir_ref(&pipeline->shaders[j].pipeline_nir, p->shaders[j].pipeline_nir); } - if (p->tess_ccw) - lvp_pipeline_nir_ref(&pipeline->tess_ccw, p->tess_ccw); + if (p->shaders[MESA_SHADER_TESS_EVAL].tess_ccw) + lvp_pipeline_nir_ref(&pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw, p->shaders[MESA_SHADER_TESS_EVAL].tess_ccw); } } } else if (pipeline->stages & VK_GRAPHICS_PIPELINE_LIBRARY_PRE_RASTERIZATION_SHADERS_BIT_EXT) { @@ -896,15 +898,15 @@ lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline, if (!libstate && !pipeline->library) lvp_pipeline_shaders_compile(pipeline); - if (!pipeline->library && !pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]) { + if (!pipeline->library && !pipeline->shaders[MESA_SHADER_FRAGMENT].pipeline_nir) { pipeline->noop_fs = true; - pipeline->shader_cso[PIPE_SHADER_FRAGMENT] = device->noop_fs; + pipeline->shaders[PIPE_SHADER_FRAGMENT].shader_cso = device->noop_fs; } return VK_SUCCESS; fail: - for (unsigned i = 0; i < ARRAY_SIZE(pipeline->pipeline_nir); i++) { - lvp_pipeline_nir_ref(&pipeline->pipeline_nir[i], NULL); + for (unsigned i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) { + lvp_pipeline_nir_ref(&pipeline->shaders[i].pipeline_nir, NULL); } vk_free(&device->vk.alloc, pipeline->state_data); @@ -916,19 +918,19 @@ lvp_pipeline_shaders_compile(struct lvp_pipeline *pipeline) { if (pipeline->compiled) return; - for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->pipeline_nir); i++) { - if (!pipeline->pipeline_nir[i]) + for (uint32_t i = 0; i < ARRAY_SIZE(pipeline->shaders); i++) { + if (!pipeline->shaders[i].pipeline_nir) continue; gl_shader_stage stage = i; - assert(stage == pipeline->pipeline_nir[i]->nir->info.stage); - - if (!pipeline->inlines[stage].can_inline) { - pipeline->shader_cso[stage] = lvp_pipeline_compile(pipeline, - nir_shader_clone(NULL, pipeline->pipeline_nir[stage]->nir)); - if (pipeline->tess_ccw) - pipeline->tess_ccw_cso = lvp_pipeline_compile(pipeline, - nir_shader_clone(NULL, pipeline->tess_ccw->nir)); + assert(stage == pipeline->shaders[i].pipeline_nir->nir->info.stage); + + if (!pipeline->shaders[stage].inlines.can_inline) { + pipeline->shaders[stage].shader_cso = lvp_pipeline_compile(pipeline, + nir_shader_clone(NULL, pipeline->shaders[stage].pipeline_nir->nir)); + if (pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw) + pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw_cso = lvp_pipeline_compile(pipeline, + nir_shader_clone(NULL, pipeline->shaders[MESA_SHADER_TESS_EVAL].tess_ccw->nir)); } } pipeline->compiled = true; @@ -1024,8 +1026,8 @@ lvp_compute_pipeline_init(struct lvp_pipeline *pipeline, if (result != VK_SUCCESS) return result; - if (!pipeline->inlines[MESA_SHADER_COMPUTE].can_inline) - pipeline->shader_cso[PIPE_SHADER_COMPUTE] = lvp_pipeline_compile(pipeline, nir_shader_clone(NULL, pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->nir)); + if (!pipeline->shaders[MESA_SHADER_COMPUTE].inlines.can_inline) + pipeline->shaders[PIPE_SHADER_COMPUTE].shader_cso = lvp_pipeline_compile(pipeline, nir_shader_clone(NULL, pipeline->shaders[MESA_SHADER_COMPUTE].pipeline_nir->nir)); pipeline->compiled = true; return VK_SUCCESS; } diff --git a/src/gallium/frontends/lavapipe/lvp_private.h b/src/gallium/frontends/lavapipe/lvp_private.h index f29ec25..643b2d2 100644 --- a/src/gallium/frontends/lavapipe/lvp_private.h +++ b/src/gallium/frontends/lavapipe/lvp_private.h @@ -425,28 +425,31 @@ lvp_pipeline_nir_ref(struct lvp_pipeline_nir **dst, struct lvp_pipeline_nir *src *dst = src; } +struct lvp_shader { + struct lvp_access_info access; + struct lvp_pipeline_nir *pipeline_nir; + struct lvp_pipeline_nir *tess_ccw; + void *shader_cso; + void *tess_ccw_cso; + struct { + uint32_t uniform_offsets[PIPE_MAX_CONSTANT_BUFFERS][MAX_INLINABLE_UNIFORMS]; + uint8_t count[PIPE_MAX_CONSTANT_BUFFERS]; + bool must_inline; + uint32_t can_inline; //bitmask + } inlines; + struct pipe_stream_output_info stream_output; +}; + struct lvp_pipeline { struct vk_object_base base; struct lvp_device * device; struct lvp_pipeline_layout * layout; - struct lvp_access_info access[MESA_SHADER_STAGES]; - void *state_data; bool is_compute_pipeline; bool force_min_sample; - struct lvp_pipeline_nir *pipeline_nir[MESA_SHADER_STAGES]; - struct lvp_pipeline_nir *tess_ccw; - void *shader_cso[PIPE_SHADER_TYPES]; - void *tess_ccw_cso; - struct { - uint32_t uniform_offsets[PIPE_MAX_CONSTANT_BUFFERS][MAX_INLINABLE_UNIFORMS]; - uint8_t count[PIPE_MAX_CONSTANT_BUFFERS]; - bool must_inline; - uint32_t can_inline; //bitmask - } inlines[MESA_SHADER_STAGES]; + struct lvp_shader shaders[MESA_SHADER_STAGES]; gl_shader_stage last_vertex; - struct pipe_stream_output_info stream_output; struct vk_graphics_pipeline_state graphics_state; VkGraphicsPipelineLibraryFlagsEXT stages; bool line_smooth; -- 2.7.4