From c2f2c8e407207c31c29aab5570d23cd6e98d287a Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 16 Jun 2016 15:26:54 -0700 Subject: [PATCH] anv: Use different BOs for different scratch sizes and stages This solves a race condition where we can end up having different stages stomp on each other because they're all trying to scratch in the same BO but they have different views of its layout. Signed-off-by: Jason Ekstrand Cc: "12.0" --- src/intel/vulkan/anv_device.c | 4 ++-- src/intel/vulkan/anv_pipeline.c | 20 -------------------- src/intel/vulkan/anv_private.h | 4 +--- src/intel/vulkan/gen7_pipeline.c | 18 ++++++++++++------ src/intel/vulkan/gen8_pipeline.c | 18 ++++++++++++------ src/intel/vulkan/genX_cmd_buffer.c | 15 +-------------- src/intel/vulkan/genX_pipeline.c | 8 ++++---- 7 files changed, 32 insertions(+), 55 deletions(-) diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c index 97300c3..ea8e875 100644 --- a/src/intel/vulkan/anv_device.c +++ b/src/intel/vulkan/anv_device.c @@ -878,7 +878,7 @@ VkResult anv_CreateDevice( anv_bo_init_new(&device->workaround_bo, device, 1024); - anv_block_pool_init(&device->scratch_block_pool, device, 0x10000); + anv_scratch_pool_init(device, &device->scratch_pool); anv_queue_init(device, &device->queue); @@ -947,7 +947,7 @@ void anv_DestroyDevice( anv_block_pool_finish(&device->instruction_block_pool); anv_state_pool_finish(&device->surface_state_pool); anv_block_pool_finish(&device->surface_state_block_pool); - anv_block_pool_finish(&device->scratch_block_pool); + anv_scratch_pool_finish(device, &device->scratch_pool); close(device->fd); diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 295b48c..29747cf 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -397,22 +397,8 @@ anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, const struct brw_stage_prog_data *prog_data, struct anv_pipeline_bind_map *map) { - struct brw_device_info *devinfo = &pipeline->device->info; - uint32_t max_threads[] = { - [MESA_SHADER_VERTEX] = devinfo->max_vs_threads, - [MESA_SHADER_TESS_CTRL] = devinfo->max_hs_threads, - [MESA_SHADER_TESS_EVAL] = devinfo->max_ds_threads, - [MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads, - [MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads, - [MESA_SHADER_COMPUTE] = devinfo->max_cs_threads, - }; - pipeline->prog_data[stage] = prog_data; pipeline->active_stages |= mesa_to_vk_shader_stage(stage); - pipeline->scratch_start[stage] = pipeline->total_scratch; - pipeline->total_scratch = - align_u32(pipeline->total_scratch, 1024) + - prog_data->total_scratch * max_threads[stage]; pipeline->bindings[stage] = *map; } @@ -1176,7 +1162,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, * of various prog_data pointers. Make them NULL by default. */ memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); pipeline->vs_simd8 = NO_KERNEL; @@ -1185,7 +1170,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, pipeline->ps_ksp0 = NO_KERNEL; pipeline->active_stages = 0; - pipeline->total_scratch = 0; const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, }; struct anv_shader_module *modules[MESA_SHADER_STAGES] = { 0, }; @@ -1278,10 +1262,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline, if (extra && extra->use_rectlist) pipeline->topology = _3DPRIM_RECTLIST; - while (anv_block_pool_size(&device->scratch_block_pool) < - pipeline->total_scratch) - anv_block_pool_alloc(&device->scratch_block_pool); - return VK_SUCCESS; } diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h index ebbf2bc..50b860c 100644 --- a/src/intel/vulkan/anv_private.h +++ b/src/intel/vulkan/anv_private.h @@ -711,7 +711,7 @@ struct anv_device { struct anv_queue queue; - struct anv_block_pool scratch_block_pool; + struct anv_scratch_pool scratch_pool; uint32_t default_mocs; @@ -1471,8 +1471,6 @@ struct anv_pipeline { bool needs_data_cache; const struct brw_stage_prog_data * prog_data[MESA_SHADER_STAGES]; - uint32_t scratch_start[MESA_SHADER_STAGES]; - uint32_t total_scratch; struct { uint32_t start[MESA_SHADER_GEOMETRY + 1]; uint32_t size[MESA_SHADER_GEOMETRY + 1]; diff --git a/src/intel/vulkan/gen7_pipeline.c b/src/intel/vulkan/gen7_pipeline.c index 56e59a4..89cb51f 100644 --- a/src/intel/vulkan/gen7_pipeline.c +++ b/src/intel/vulkan/gen7_pipeline.c @@ -252,8 +252,10 @@ genX(graphics_pipeline_create)( vs.KernelStartPointer = pipeline->vs_vec4; vs.ScratchSpaceBasePointer = (struct anv_address) { - .bo = NULL, - .offset = pipeline->scratch_start[MESA_SHADER_VERTEX], + .bo = anv_scratch_pool_alloc(device, &device->scratch_pool, + MESA_SHADER_VERTEX, + vs_prog_data->base.base.total_scratch), + .offset = 0, }; vs.PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base); @@ -276,8 +278,10 @@ genX(graphics_pipeline_create)( gs.KernelStartPointer = pipeline->gs_kernel; gs.ScratchSpaceBasePointer = (struct anv_address) { - .bo = NULL, - .offset = pipeline->scratch_start[MESA_SHADER_GEOMETRY], + .bo = anv_scratch_pool_alloc(device, &device->scratch_pool, + MESA_SHADER_GEOMETRY, + gs_prog_data->base.base.total_scratch), + .offset = 0, }; gs.PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base); @@ -338,8 +342,10 @@ genX(graphics_pipeline_create)( ps.KernelStartPointer0 = pipeline->ps_ksp0; ps.ScratchSpaceBasePointer = (struct anv_address) { - .bo = NULL, - .offset = pipeline->scratch_start[MESA_SHADER_FRAGMENT], + .bo = anv_scratch_pool_alloc(device, &device->scratch_pool, + MESA_SHADER_FRAGMENT, + wm_prog_data->base.total_scratch), + .offset = 0, }; ps.PerThreadScratchSpace = scratch_space(&wm_prog_data->base); ps.MaximumNumberofThreads = device->info.max_wm_threads - 1; diff --git a/src/intel/vulkan/gen8_pipeline.c b/src/intel/vulkan/gen8_pipeline.c index 53fca2b..6d70df6 100644 --- a/src/intel/vulkan/gen8_pipeline.c +++ b/src/intel/vulkan/gen8_pipeline.c @@ -361,8 +361,10 @@ genX(graphics_pipeline_create)( gs.ExpectedVertexCount = gs_prog_data->vertices_in; gs.ScratchSpaceBasePointer = (struct anv_address) { - .bo = NULL, - .offset = pipeline->scratch_start[MESA_SHADER_GEOMETRY], + .bo = anv_scratch_pool_alloc(device, &device->scratch_pool, + MESA_SHADER_GEOMETRY, + gs_prog_data->base.base.total_scratch), + .offset = 0, }; gs.PerThreadScratchSpace = scratch_space(&gs_prog_data->base.base); gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1; @@ -431,8 +433,10 @@ genX(graphics_pipeline_create)( vs.SoftwareExceptionEnable = false; vs.ScratchSpaceBasePointer = (struct anv_address) { - .bo = NULL, - .offset = pipeline->scratch_start[MESA_SHADER_VERTEX], + .bo = anv_scratch_pool_alloc(device, &device->scratch_pool, + MESA_SHADER_VERTEX, + vs_prog_data->base.base.total_scratch), + .offset = 0, }; vs.PerThreadScratchSpace = scratch_space(&vs_prog_data->base.base); @@ -483,8 +487,10 @@ genX(graphics_pipeline_create)( ps.MaximumNumberofThreadsPerPSD = 64 - num_thread_bias; ps.ScratchSpaceBasePointer = (struct anv_address) { - .bo = NULL, - .offset = pipeline->scratch_start[MESA_SHADER_FRAGMENT], + .bo = anv_scratch_pool_alloc(device, &device->scratch_pool, + MESA_SHADER_FRAGMENT, + wm_prog_data->base.total_scratch), + .offset = 0, }; ps.PerThreadScratchSpace = scratch_space(&wm_prog_data->base); diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c index 95f9b48..741d5bf 100644 --- a/src/intel/vulkan/genX_cmd_buffer.c +++ b/src/intel/vulkan/genX_cmd_buffer.c @@ -33,12 +33,6 @@ void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) { struct anv_device *device = cmd_buffer->device; - struct anv_bo *scratch_bo = NULL; - - cmd_buffer->state.scratch_size = - anv_block_pool_size(&device->scratch_block_pool); - if (cmd_buffer->state.scratch_size > 0) - scratch_bo = &device->scratch_block_pool.bo; /* XXX: Do we need this on more than just BDW? */ #if (GEN_GEN >= 8) @@ -55,7 +49,7 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer) #endif anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), sba) { - sba.GeneralStateBaseAddress = (struct anv_address) { scratch_bo, 0 }; + sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 }; sba.GeneralStateMemoryObjectControlState = GENX(MOCS); sba.GeneralStateBaseAddressModifyEnable = true; @@ -503,13 +497,6 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer) cmd_buffer->state.vb_dirty &= ~vb_emit; if (cmd_buffer->state.dirty & ANV_CMD_DIRTY_PIPELINE) { - /* If somebody compiled a pipeline after starting a command buffer the - * scratch bo may have grown since we started this cmd buffer (and - * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now, - * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */ - if (cmd_buffer->state.scratch_size < pipeline->total_scratch) - anv_cmd_buffer_emit_state_base_address(cmd_buffer); - anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); /* The exact descriptor layout is pulled from the pipeline, so we need diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c index 7360448..5cbcfd2 100644 --- a/src/intel/vulkan/genX_pipeline.c +++ b/src/intel/vulkan/genX_pipeline.c @@ -64,7 +64,6 @@ genX(compute_pipeline_create)( * of various prog_data pointers. Make them NULL by default. */ memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); - memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); memset(pipeline->bindings, 0, sizeof(pipeline->bindings)); pipeline->vs_simd8 = NO_KERNEL; @@ -72,7 +71,6 @@ genX(compute_pipeline_create)( pipeline->gs_kernel = NO_KERNEL; pipeline->active_stages = 0; - pipeline->total_scratch = 0; pipeline->needs_data_cache = false; @@ -103,8 +101,10 @@ genX(compute_pipeline_create)( anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), vfe) { vfe.ScratchSpaceBasePointer = (struct anv_address) { - .bo = NULL, - .offset = pipeline->scratch_start[MESA_SHADER_COMPUTE], + .bo = anv_scratch_pool_alloc(device, &device->scratch_pool, + MESA_SHADER_COMPUTE, + cs_prog_data->base.total_scratch), + .offset = 0, }; vfe.PerThreadScratchSpace = ffs(cs_prog_data->base.total_scratch / 2048); #if GEN_GEN > 7 -- 2.7.4