From 6d38a35afb0e352fdeaaa81bdbce1c5ff7028921 Mon Sep 17 00:00:00 2001 From: Jesse Natalie Date: Fri, 31 Dec 2021 09:58:50 -0800 Subject: [PATCH] d3d12: Compile, bind, and cache compute PSOs Reviewed-by: Sil Vilerino Part-of: --- src/gallium/drivers/d3d12/d3d12_batch.cpp | 2 +- src/gallium/drivers/d3d12/d3d12_compiler.cpp | 100 +++++++++----- src/gallium/drivers/d3d12/d3d12_compiler.h | 4 + src/gallium/drivers/d3d12/d3d12_context.cpp | 39 ++++++ src/gallium/drivers/d3d12/d3d12_context.h | 8 +- src/gallium/drivers/d3d12/d3d12_draw.cpp | 12 +- src/gallium/drivers/d3d12/d3d12_pipeline_state.cpp | 147 +++++++++++++++++++-- src/gallium/drivers/d3d12/d3d12_pipeline_state.h | 18 +++ src/gallium/drivers/d3d12/d3d12_screen.cpp | 3 +- 9 files changed, 275 insertions(+), 58 deletions(-) diff --git a/src/gallium/drivers/d3d12/d3d12_batch.cpp b/src/gallium/drivers/d3d12/d3d12_batch.cpp index 406e5b0..f4f67a1 100644 --- a/src/gallium/drivers/d3d12/d3d12_batch.cpp +++ b/src/gallium/drivers/d3d12/d3d12_batch.cpp @@ -182,7 +182,7 @@ d3d12_start_batch(struct d3d12_context *ctx, struct d3d12_batch *batch) ctx->cmdlist->SetDescriptorHeaps(2, heaps); ctx->cmdlist_dirty = ~0; - for (int i = 0; i < D3D12_GFX_SHADER_STAGES; ++i) + for (int i = 0; i < PIPE_SHADER_TYPES; ++i) ctx->shader_dirty[i] = ~0; if (!ctx->queries_disabled) diff --git a/src/gallium/drivers/d3d12/d3d12_compiler.cpp b/src/gallium/drivers/d3d12/d3d12_compiler.cpp index cc1228f..6220199 100644 --- a/src/gallium/drivers/d3d12/d3d12_compiler.cpp +++ b/src/gallium/drivers/d3d12/d3d12_compiler.cpp @@ -1047,6 +1047,54 @@ update_so_info(struct pipe_stream_output_info *so_info, return so_outputs; } +static struct d3d12_shader_selector * +d3d12_create_shader_impl(struct d3d12_context *ctx, + struct d3d12_shader_selector *sel, + struct nir_shader *nir, + struct d3d12_shader_selector *prev, + struct d3d12_shader_selector *next) +{ + unsigned tex_scan_result = scan_texture_use(nir); + sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0; + sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0; + + /* Integer cube maps are not supported in DirectX because sampling is not supported + * on integer textures and TextureLoad is not supported for cube maps, so we have to + * lower integer cube maps to be handled like 2D textures arrays*/ + NIR_PASS_V(nir, d3d12_lower_int_cubmap_to_array); + + /* Keep this initial shader as the blue print for possible variants */ + sel->initial = nir; + + /* + * We must compile some shader here, because if the previous or a next shaders exists later + * when the shaders are bound, then the key evaluation in the shader selector will access + * the current variant of these prev and next shader, and we can only assign + * a current variant when it has been successfully compiled. + * + * For shaders that require lowering because certain instructions are not available + * and their emulation is state depended (like sampling an integer texture that must be + * emulated and needs handling of boundary conditions, or shadow compare sampling with LOD), + * we must go through the shader selector here to create a compilable variant. + * For shaders that are not depended on the state this is just compiling the original + * shader. + * + * TODO: get rid of having to compiling the shader here if it can be forseen that it will + * be thrown away (i.e. it depends on states that are likely to change before the shader is + * used for the first time) + */ + struct d3d12_selection_context sel_ctx = {0}; + sel_ctx.ctx = ctx; + select_shader_variant(&sel_ctx, sel, prev, next); + + if (!sel->current) { + ralloc_free(sel); + return NULL; + } + + return sel; +} + struct d3d12_shader_selector * d3d12_create_shader(struct d3d12_context *ctx, pipe_shader_type stage, @@ -1065,11 +1113,6 @@ d3d12_create_shader(struct d3d12_context *ctx, } nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); - - unsigned tex_scan_result = scan_texture_use(nir); - sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0; - sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0; - memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info)); update_so_info(&sel->so_info, nir->info.outputs_written); @@ -1103,41 +1146,28 @@ d3d12_create_shader(struct d3d12_context *ctx, dxil_sort_ps_outputs(nir); } - /* Integer cube maps are not supported in DirectX because sampling is not supported - * on integer textures and TextureLoad is not supported for cube maps, so we have to - * lower integer cube maps to be handled like 2D textures arrays*/ - NIR_PASS_V(nir, d3d12_lower_int_cubmap_to_array); + return d3d12_create_shader_impl(ctx, sel, nir, prev, next); +} - /* Keep this initial shader as the blue print for possible variants */ - sel->initial = nir; +struct d3d12_shader_selector * +d3d12_create_compute_shader(struct d3d12_context *ctx, + const struct pipe_compute_state *shader) +{ + struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector); + sel->stage = PIPE_SHADER_COMPUTE; - /* - * We must compile some shader here, because if the previous or a next shaders exists later - * when the shaders are bound, then the key evaluation in the shader selector will access - * the current variant of these prev and next shader, and we can only assign - * a current variant when it has been successfully compiled. - * - * For shaders that require lowering because certain instructions are not available - * and their emulation is state depended (like sampling an integer texture that must be - * emulated and needs handling of boundary conditions, or shadow compare sampling with LOD), - * we must go through the shader selector here to create a compilable variant. - * For shaders that are not depended on the state this is just compiling the original - * shader. - * - * TODO: get rid of having to compiling the shader here if it can be forseen that it will - * be thrown away (i.e. it depends on states that are likely to change before the shader is - * used for the first time) - */ - struct d3d12_selection_context sel_ctx = {0}; - sel_ctx.ctx = ctx; - select_shader_variant(&sel_ctx, sel, prev, next); + struct nir_shader *nir = NULL; - if (!sel->current) { - ralloc_free(sel); - return NULL; + if (shader->ir_type == PIPE_SHADER_IR_NIR) { + nir = (nir_shader *)shader->prog; + } else { + assert(shader->ir_type == PIPE_SHADER_IR_TGSI); + nir = tgsi_to_nir(shader->prog, ctx->base.screen, false); } - return sel; + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); + + return d3d12_create_shader_impl(ctx, sel, nir, nullptr, nullptr); } void diff --git a/src/gallium/drivers/d3d12/d3d12_compiler.h b/src/gallium/drivers/d3d12/d3d12_compiler.h index 6398aef..da70633 100644 --- a/src/gallium/drivers/d3d12/d3d12_compiler.h +++ b/src/gallium/drivers/d3d12/d3d12_compiler.h @@ -197,6 +197,10 @@ d3d12_create_shader(struct d3d12_context *ctx, enum pipe_shader_type stage, const struct pipe_shader_state *shader); +struct d3d12_shader_selector * +d3d12_create_compute_shader(struct d3d12_context *ctx, + const struct pipe_compute_state *shader); + void d3d12_shader_free(struct d3d12_shader_selector *shader); diff --git a/src/gallium/drivers/d3d12/d3d12_context.cpp b/src/gallium/drivers/d3d12/d3d12_context.cpp index 0874b54..f3d0052 100644 --- a/src/gallium/drivers/d3d12/d3d12_context.cpp +++ b/src/gallium/drivers/d3d12/d3d12_context.cpp @@ -77,6 +77,7 @@ d3d12_context_destroy(struct pipe_context *pctx) slab_destroy_child(&ctx->transfer_pool_unsync); d3d12_gs_variant_cache_destroy(ctx); d3d12_gfx_pipeline_state_cache_destroy(ctx); + d3d12_compute_pipeline_state_cache_destroy(ctx); d3d12_root_signature_cache_destroy(ctx); u_suballocator_destroy(&ctx->query_allocator); @@ -1128,6 +1129,39 @@ d3d12_delete_gs_state(struct pipe_context *pctx, void *gs) (struct d3d12_shader_selector *) gs); } +static void * +d3d12_create_compute_state(struct pipe_context *pctx, + const struct pipe_compute_state *shader) +{ + return d3d12_create_compute_shader(d3d12_context(pctx), shader); +} + +static void +d3d12_bind_compute_state(struct pipe_context *pctx, void *css) +{ + d3d12_context(pctx)->compute_state = (struct d3d12_shader_selector *)css; +} + +static void +d3d12_delete_compute_state(struct pipe_context *pctx, void *cs) +{ + struct d3d12_context *ctx = d3d12_context(pctx); + struct d3d12_shader_selector *shader = (struct d3d12_shader_selector *)cs; + d3d12_compute_pipeline_state_cache_invalidate_shader(ctx, shader); + + /* Make sure the pipeline state no longer reference the deleted shader */ + struct d3d12_shader *iter = shader->first; + while (iter) { + if (ctx->compute_pipeline_state.stage == iter) { + ctx->compute_pipeline_state.stage = NULL; + break; + } + iter = iter->next_variant; + } + + d3d12_shader_free(shader); +} + static bool d3d12_init_polygon_stipple(struct pipe_context *pctx) { @@ -2163,6 +2197,10 @@ d3d12_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) ctx->base.bind_gs_state = d3d12_bind_gs_state; ctx->base.delete_gs_state = d3d12_delete_gs_state; + ctx->base.create_compute_state = d3d12_create_compute_state; + ctx->base.bind_compute_state = d3d12_bind_compute_state; + ctx->base.delete_compute_state = d3d12_delete_compute_state; + ctx->base.set_polygon_stipple = d3d12_set_polygon_stipple; ctx->base.set_vertex_buffers = d3d12_set_vertex_buffers; ctx->base.set_viewport_states = d3d12_set_viewport_states; @@ -2224,6 +2262,7 @@ d3d12_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) } d3d12_gfx_pipeline_state_cache_init(ctx); + d3d12_compute_pipeline_state_cache_init(ctx); d3d12_root_signature_cache_init(ctx); d3d12_gs_variant_cache_init(ctx); diff --git a/src/gallium/drivers/d3d12/d3d12_context.h b/src/gallium/drivers/d3d12/d3d12_context.h index 36a02e0..1fdbf13 100644 --- a/src/gallium/drivers/d3d12/d3d12_context.h +++ b/src/gallium/drivers/d3d12/d3d12_context.h @@ -162,6 +162,7 @@ struct d3d12_context { struct u_suballocator query_allocator; struct u_suballocator so_allocator; struct hash_table *pso_cache; + struct hash_table *compute_pso_cache; struct hash_table *root_signature_cache; struct hash_table *gs_variant_cache; @@ -213,12 +214,15 @@ struct d3d12_context { unsigned fake_so_buffer_factor; struct d3d12_shader_selector *gfx_stages[D3D12_GFX_SHADER_STAGES]; + struct d3d12_shader_selector *compute_state; struct d3d12_gfx_pipeline_state gfx_pipeline_state; - unsigned shader_dirty[D3D12_GFX_SHADER_STAGES]; + struct d3d12_compute_pipeline_state compute_pipeline_state; + unsigned shader_dirty[PIPE_SHADER_TYPES]; unsigned state_dirty; unsigned cmdlist_dirty; - ID3D12PipelineState *current_pso; + ID3D12PipelineState *current_gfx_pso; + ID3D12PipelineState *current_compute_pso; bool reverse_depth_range; ID3D12Fence *cmdqueue_fence; diff --git a/src/gallium/drivers/d3d12/d3d12_draw.cpp b/src/gallium/drivers/d3d12/d3d12_draw.cpp index 67a5fb6..9c45c61 100644 --- a/src/gallium/drivers/d3d12/d3d12_draw.cpp +++ b/src/gallium/drivers/d3d12/d3d12_draw.cpp @@ -761,9 +761,9 @@ d3d12_draw_vbo(struct pipe_context *pctx, } } - if (!ctx->current_pso || ctx->state_dirty & D3D12_DIRTY_PSO) { - ctx->current_pso = d3d12_get_gfx_pipeline_state(ctx); - assert(ctx->current_pso); + if (!ctx->current_gfx_pso || ctx->state_dirty & D3D12_DIRTY_PSO) { + ctx->current_gfx_pso = d3d12_get_gfx_pipeline_state(ctx); + assert(ctx->current_gfx_pso); } ctx->cmdlist_dirty |= ctx->state_dirty; @@ -778,9 +778,9 @@ d3d12_draw_vbo(struct pipe_context *pctx, } if (ctx->cmdlist_dirty & D3D12_DIRTY_PSO) { - assert(ctx->current_pso); - d3d12_batch_reference_object(batch, ctx->current_pso); - ctx->cmdlist->SetPipelineState(ctx->current_pso); + assert(ctx->current_gfx_pso); + d3d12_batch_reference_object(batch, ctx->current_gfx_pso); + ctx->cmdlist->SetPipelineState(ctx->current_gfx_pso); } D3D12_GPU_DESCRIPTOR_HANDLE root_desc_tables[MAX_DESCRIPTOR_TABLES]; diff --git a/src/gallium/drivers/d3d12/d3d12_pipeline_state.cpp b/src/gallium/drivers/d3d12/d3d12_pipeline_state.cpp index 6933f10..f236f8b 100644 --- a/src/gallium/drivers/d3d12/d3d12_pipeline_state.cpp +++ b/src/gallium/drivers/d3d12/d3d12_pipeline_state.cpp @@ -33,11 +33,16 @@ #include -struct d3d12_pso_entry { +struct d3d12_gfx_pso_entry { struct d3d12_gfx_pipeline_state key; ID3D12PipelineState *pso; }; +struct d3d12_compute_pso_entry { + struct d3d12_compute_pipeline_state key; + ID3D12PipelineState *pso; +}; + static const char * get_semantic_name(int slot, unsigned *index) { @@ -295,7 +300,7 @@ d3d12_get_gfx_pipeline_state(struct d3d12_context *ctx) struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ctx->pso_cache, hash, &ctx->gfx_pipeline_state); if (!entry) { - struct d3d12_pso_entry *data = (struct d3d12_pso_entry *)MALLOC(sizeof(struct d3d12_pso_entry)); + struct d3d12_gfx_pso_entry *data = (struct d3d12_gfx_pso_entry *)MALLOC(sizeof(struct d3d12_gfx_pso_entry)); if (!data) return NULL; @@ -310,7 +315,7 @@ d3d12_get_gfx_pipeline_state(struct d3d12_context *ctx) assert(entry); } - return ((struct d3d12_pso_entry *)(entry->data))->pso; + return ((struct d3d12_gfx_pso_entry *)(entry->data))->pso; } void @@ -320,28 +325,28 @@ d3d12_gfx_pipeline_state_cache_init(struct d3d12_context *ctx) } static void -delete_entry(struct hash_entry *entry) +delete_gfx_entry(struct hash_entry *entry) { - struct d3d12_pso_entry *data = (struct d3d12_pso_entry *)entry->data; + struct d3d12_gfx_pso_entry *data = (struct d3d12_gfx_pso_entry *)entry->data; data->pso->Release(); FREE(data); } static void -remove_entry(struct d3d12_context *ctx, struct hash_entry *entry) +remove_gfx_entry(struct d3d12_context *ctx, struct hash_entry *entry) { - struct d3d12_pso_entry *data = (struct d3d12_pso_entry *)entry->data; + struct d3d12_gfx_pso_entry *data = (struct d3d12_gfx_pso_entry *)entry->data; - if (ctx->current_pso == data->pso) - ctx->current_pso = NULL; + if (ctx->current_gfx_pso == data->pso) + ctx->current_gfx_pso = NULL; _mesa_hash_table_remove(ctx->pso_cache, entry); - delete_entry(entry); + delete_gfx_entry(entry); } void d3d12_gfx_pipeline_state_cache_destroy(struct d3d12_context *ctx) { - _mesa_hash_table_destroy(ctx->pso_cache, delete_entry); + _mesa_hash_table_destroy(ctx->pso_cache, delete_gfx_entry); } void @@ -350,7 +355,7 @@ d3d12_gfx_pipeline_state_cache_invalidate(struct d3d12_context *ctx, const void hash_table_foreach(ctx->pso_cache, entry) { const struct d3d12_gfx_pipeline_state *key = (struct d3d12_gfx_pipeline_state *)entry->key; if (key->blend == state || key->zsa == state || key->rast == state) - remove_entry(ctx, entry); + remove_gfx_entry(ctx, entry); } } @@ -365,7 +370,123 @@ d3d12_gfx_pipeline_state_cache_invalidate_shader(struct d3d12_context *ctx, hash_table_foreach(ctx->pso_cache, entry) { const struct d3d12_gfx_pipeline_state *key = (struct d3d12_gfx_pipeline_state *)entry->key; if (key->stages[stage] == shader) - remove_entry(ctx, entry); + remove_gfx_entry(ctx, entry); + } + shader = shader->next_variant; + } +} + +static ID3D12PipelineState * +create_compute_pipeline_state(struct d3d12_context *ctx) +{ + struct d3d12_screen *screen = d3d12_screen(ctx->base.screen); + struct d3d12_compute_pipeline_state *state = &ctx->compute_pipeline_state; + + D3D12_COMPUTE_PIPELINE_STATE_DESC pso_desc = { 0 }; + pso_desc.pRootSignature = state->root_signature; + + if (state->stage) { + auto shader = state->stage; + pso_desc.CS.BytecodeLength = shader->bytecode_length; + pso_desc.CS.pShaderBytecode = shader->bytecode; + } + + pso_desc.NodeMask = 0; + + pso_desc.CachedPSO.pCachedBlob = NULL; + pso_desc.CachedPSO.CachedBlobSizeInBytes = 0; + + pso_desc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + + ID3D12PipelineState *ret; + if (FAILED(screen->dev->CreateComputePipelineState(&pso_desc, + IID_PPV_ARGS(&ret)))) { + debug_printf("D3D12: CreateComputePipelineState failed!\n"); + return NULL; + } + + return ret; +} + +static uint32_t +hash_compute_pipeline_state(const void *key) +{ + return _mesa_hash_data(key, sizeof(struct d3d12_compute_pipeline_state)); +} + +static bool +equals_compute_pipeline_state(const void *a, const void *b) +{ + return memcmp(a, b, sizeof(struct d3d12_compute_pipeline_state)) == 0; +} + +ID3D12PipelineState * +d3d12_get_compute_pipeline_state(struct d3d12_context *ctx) +{ + uint32_t hash = hash_compute_pipeline_state(&ctx->compute_pipeline_state); + struct hash_entry *entry = _mesa_hash_table_search_pre_hashed(ctx->compute_pso_cache, hash, + &ctx->compute_pipeline_state); + if (!entry) { + struct d3d12_compute_pso_entry *data = (struct d3d12_compute_pso_entry *)MALLOC(sizeof(struct d3d12_compute_pso_entry)); + if (!data) + return NULL; + + data->key = ctx->compute_pipeline_state; + data->pso = create_compute_pipeline_state(ctx); + if (!data->pso) { + FREE(data); + return NULL; + } + + entry = _mesa_hash_table_insert_pre_hashed(ctx->compute_pso_cache, hash, &data->key, data); + assert(entry); + } + + return ((struct d3d12_compute_pso_entry *)(entry->data))->pso; +} + +void +d3d12_compute_pipeline_state_cache_init(struct d3d12_context *ctx) +{ + ctx->compute_pso_cache = _mesa_hash_table_create(NULL, NULL, equals_compute_pipeline_state); +} + +static void +delete_compute_entry(struct hash_entry *entry) +{ + struct d3d12_compute_pso_entry *data = (struct d3d12_compute_pso_entry *)entry->data; + data->pso->Release(); + FREE(data); +} + +static void +remove_compute_entry(struct d3d12_context *ctx, struct hash_entry *entry) +{ + struct d3d12_compute_pso_entry *data = (struct d3d12_compute_pso_entry *)entry->data; + + if (ctx->current_compute_pso == data->pso) + ctx->current_compute_pso = NULL; + _mesa_hash_table_remove(ctx->compute_pso_cache, entry); + delete_compute_entry(entry); +} + +void +d3d12_compute_pipeline_state_cache_destroy(struct d3d12_context *ctx) +{ + _mesa_hash_table_destroy(ctx->compute_pso_cache, delete_compute_entry); +} + +void +d3d12_compute_pipeline_state_cache_invalidate_shader(struct d3d12_context *ctx, + struct d3d12_shader_selector *selector) +{ + struct d3d12_shader *shader = selector->first; + + while (shader) { + hash_table_foreach(ctx->compute_pso_cache, entry) { + const struct d3d12_compute_pipeline_state *key = (struct d3d12_compute_pipeline_state *)entry->key; + if (key->stage == shader) + remove_compute_entry(ctx, entry); } shader = shader->next_variant; } diff --git a/src/gallium/drivers/d3d12/d3d12_pipeline_state.h b/src/gallium/drivers/d3d12/d3d12_pipeline_state.h index bcc6413..6b14ac2 100644 --- a/src/gallium/drivers/d3d12/d3d12_pipeline_state.h +++ b/src/gallium/drivers/d3d12/d3d12_pipeline_state.h @@ -81,6 +81,11 @@ struct d3d12_gfx_pipeline_state { enum pipe_prim_type prim_type; }; +struct d3d12_compute_pipeline_state { + ID3D12RootSignature *root_signature; + struct d3d12_shader *stage; +}; + DXGI_FORMAT d3d12_rtv_format(struct d3d12_context *ctx, unsigned index); @@ -101,4 +106,17 @@ d3d12_gfx_pipeline_state_cache_invalidate_shader(struct d3d12_context *ctx, enum pipe_shader_type stage, struct d3d12_shader_selector *selector); +void +d3d12_compute_pipeline_state_cache_init(struct d3d12_context *ctx); + +void +d3d12_compute_pipeline_state_cache_destroy(struct d3d12_context *ctx); + +ID3D12PipelineState * +d3d12_get_compute_pipeline_state(struct d3d12_context *ctx); + +void +d3d12_compute_pipeline_state_cache_invalidate_shader(struct d3d12_context *ctx, + struct d3d12_shader_selector *selector); + #endif diff --git a/src/gallium/drivers/d3d12/d3d12_screen.cpp b/src/gallium/drivers/d3d12/d3d12_screen.cpp index dc97162..27253cf 100644 --- a/src/gallium/drivers/d3d12/d3d12_screen.cpp +++ b/src/gallium/drivers/d3d12/d3d12_screen.cpp @@ -366,7 +366,8 @@ d3d12_get_shader_param(struct pipe_screen *pscreen, case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH: if (shader == PIPE_SHADER_VERTEX || shader == PIPE_SHADER_FRAGMENT || - shader == PIPE_SHADER_GEOMETRY) + shader == PIPE_SHADER_GEOMETRY || + shader == PIPE_SHADER_COMPUTE) return INT_MAX; return 0; -- 2.7.4