From: Mike Blumenkrantz Date: Wed, 9 Jun 2021 22:14:29 +0000 (-0400) Subject: util/queue: add a global data pointer for the queue object X-Git-Tag: upstream/21.2.3~1776 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a3a6611e96fba9a519046acf4918099f600dca92;p=platform%2Fupstream%2Fmesa.git util/queue: add a global data pointer for the queue object this better enables object-specific (e.g., context) queues where the owner of the queue will always be needed and various pointers will be passed in for tasks Reviewed-by: Marek Olšák Part-of: --- diff --git a/src/freedreno/drm/msm_device.c b/src/freedreno/drm/msm_device.c index 6af709f..93abf48 100644 --- a/src/freedreno/drm/msm_device.c +++ b/src/freedreno/drm/msm_device.c @@ -70,7 +70,7 @@ msm_device_new(int fd, drmVersionPtr version) * thread's comm truncating the interesting part of the * process name. */ - util_queue_init(&msm_dev->submit_queue, "sq", 8, 1, 0); + util_queue_init(&msm_dev->submit_queue, "sq", 8, 1, 0, NULL); } dev->bo_size = sizeof(struct msm_bo); diff --git a/src/freedreno/drm/msm_ringbuffer_sp.c b/src/freedreno/drm/msm_ringbuffer_sp.c index 0a95369..d71aa67 100644 --- a/src/freedreno/drm/msm_ringbuffer_sp.c +++ b/src/freedreno/drm/msm_ringbuffer_sp.c @@ -403,7 +403,7 @@ flush_submit_list(struct list_head *submit_list) } static void -msm_submit_sp_flush_execute(void *job, int thread_index) +msm_submit_sp_flush_execute(void *job, void *gdata, int thread_index) { struct fd_submit *submit = job; struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit); @@ -414,7 +414,7 @@ msm_submit_sp_flush_execute(void *job, int thread_index) } static void -msm_submit_sp_flush_cleanup(void *job, int thread_index) +msm_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index) { struct fd_submit *submit = job; fd_submit_del(submit); diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c index a8c712a..5d28bbc 100644 --- a/src/gallium/auxiliary/util/u_threaded_context.c +++ b/src/gallium/auxiliary/util/u_threaded_context.c @@ -167,7 +167,7 @@ tc_drop_so_target_reference(struct pipe_stream_output_target *dst) offsetof(struct pipe_draw_info, min_index) static void -tc_batch_execute(void *job, UNUSED int thread_index) +tc_batch_execute(void *job, UNUSED void *gdata, int thread_index) { struct tc_batch *batch = job; struct pipe_context *pipe = batch->tc->pipe; @@ -338,7 +338,7 @@ _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char if (next->num_total_slots) { p_atomic_add(&tc->num_direct_slots, next->num_total_slots); tc->bytes_mapped_estimate = 0; - tc_batch_execute(next, 0); + tc_batch_execute(next, NULL, 0); tc_begin_next_buffer_list(tc); synced = true; } @@ -3976,7 +3976,7 @@ threaded_context_create(struct pipe_context *pipe, * from the queue before being executed, so keep one tc_batch slot for that * execution. Also, keep one unused slot for an unflushed batch. */ - if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0)) + if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL)) goto fail; for (unsigned i = 0; i < TC_MAX_BATCHES; i++) { diff --git a/src/gallium/auxiliary/util/u_trace.c b/src/gallium/auxiliary/util/u_trace.c index ab9eb32..a3d0ef7 100644 --- a/src/gallium/auxiliary/util/u_trace.c +++ b/src/gallium/auxiliary/util/u_trace.c @@ -184,7 +184,7 @@ queue_init(struct u_trace_context *utctx) bool ret = util_queue_init(&utctx->queue, "traceq", 256, 1, UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY | - UTIL_QUEUE_INIT_RESIZE_IF_FULL); + UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL); assert(ret); if (!ret) @@ -251,7 +251,7 @@ u_trace_perfetto_stop(void) #endif static void -process_chunk(void *job, int thread_index) +process_chunk(void *job, void *gdata, int thread_index) { struct u_trace_chunk *chunk = job; struct u_trace_context *utctx = chunk->utctx; @@ -312,7 +312,7 @@ process_chunk(void *job, int thread_index) } static void -cleanup_chunk(void *job, int thread_index) +cleanup_chunk(void *job, void *gdata, int thread_index) { ralloc_free(job); } diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c index 719bd1d..32d2d72 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c @@ -240,7 +240,7 @@ create_initial_variants(struct ir3_shader_state *hwcso, } static void -create_initial_variants_async(void *job, int thread_index) +create_initial_variants_async(void *job, void *gdata, int thread_index) { struct ir3_shader_state *hwcso = job; struct pipe_debug_callback debug = {}; @@ -249,7 +249,7 @@ create_initial_variants_async(void *job, int thread_index) } static void -create_initial_compute_variants_async(void *job, int thread_index) +create_initial_compute_variants_async(void *job, void *gdata, int thread_index) { struct ir3_shader_state *hwcso = job; struct ir3_shader *shader = hwcso->shader; @@ -527,7 +527,7 @@ ir3_screen_init(struct pipe_screen *pscreen) util_queue_init(&screen->compile_queue, "ir3q", 64, num_threads, UTIL_QUEUE_INIT_RESIZE_IF_FULL | - UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY); + UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL); pscreen->finalize_nir = ir3_screen_finalize_nir; pscreen->set_max_shader_compiler_threads = diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 48ec79a..136674e 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -107,7 +107,7 @@ static void code_object_to_config(const amd_kernel_code_t *code_object, } /* Asynchronous compute shader compilation. */ -static void si_create_compute_state_async(void *job, int thread_index) +static void si_create_compute_state_async(void *job, void *gdata, int thread_index) { struct si_compute *program = (struct si_compute *)job; struct si_shader_selector *sel = &program->sel; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 858f421..3967ffe 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -1125,7 +1125,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, if (!util_queue_init( &sscreen->shader_compiler_queue, "sh", 64, num_comp_hi_threads, - UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY)) { + UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL)) { si_destroy_shader_cache(sscreen); FREE(sscreen); glsl_type_singleton_decref(); @@ -1135,7 +1135,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws, if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority, "shlo", 64, num_comp_lo_threads, UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY | - UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) { + UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY, NULL)) { si_destroy_shader_cache(sscreen); FREE(sscreen); glsl_type_singleton_decref(); diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 3481477..44dad0c 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -2142,7 +2142,7 @@ static void si_build_shader_variant(struct si_shader *shader, int thread_index, si_shader_init_pm4_state(sscreen, shader); } -static void si_build_shader_variant_low_priority(void *job, int thread_index) +static void si_build_shader_variant_low_priority(void *job, void *gdata, int thread_index) { struct si_shader *shader = (struct si_shader *)job; @@ -2460,7 +2460,7 @@ static void si_parse_next_shader_property(const struct si_shader_info *info, boo * si_shader_selector initialization. Since it can be done asynchronously, * there is no way to report compile failures to applications. */ -static void si_init_shader_selector_async(void *job, int thread_index) +static void si_init_shader_selector_async(void *job, void *gdata, int thread_index) { struct si_shader_selector *sel = (struct si_shader_selector *)job; struct si_screen *sscreen = sel->screen; diff --git a/src/gallium/drivers/zink/zink_batch.c b/src/gallium/drivers/zink/zink_batch.c index 79ebb9e..d60db4a 100644 --- a/src/gallium/drivers/zink/zink_batch.c +++ b/src/gallium/drivers/zink/zink_batch.c @@ -291,14 +291,14 @@ zink_start_batch(struct zink_context *ctx, struct zink_batch *batch) batch->last_batch_id = last_state->fence.batch_id; } else { if (zink_screen(ctx->base.screen)->threaded) - util_queue_init(&batch->flush_queue, "zfq", 8, 1, UTIL_QUEUE_INIT_RESIZE_IF_FULL); + util_queue_init(&batch->flush_queue, "zfq", 8, 1, UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL); } if (!ctx->queries_disabled) zink_resume_queries(ctx, batch); } static void -post_submit(void *data, int thread_index) +post_submit(void *data, void *gdata, int thread_index) { struct zink_batch_state *bs = data; @@ -310,7 +310,7 @@ post_submit(void *data, int thread_index) } static void -submit_queue(void *data, int thread_index) +submit_queue(void *data, void *gdata, int thread_index) { struct zink_batch_state *bs = data; VkSubmitInfo si = {0}; @@ -530,8 +530,8 @@ zink_end_batch(struct zink_context *ctx, struct zink_batch *batch) submit_queue, post_submit, 0); } else { batch->state->queue = screen->queue; - submit_queue(batch->state, 0); - post_submit(batch->state, 0); + submit_queue(batch->state, NULL, 0); + post_submit(batch->state, NULL, 0); } } diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c index 4898cb3..543a191 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c @@ -1454,7 +1454,7 @@ static bool amdgpu_add_sparse_backing_buffers(struct amdgpu_winsys *ws, return true; } -static void amdgpu_cs_submit_ib(void *job, int thread_index) +static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index) { struct amdgpu_cs *acs = (struct amdgpu_cs*)job; struct amdgpu_winsys *ws = acs->ws; diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c index 5ee8ed4..06dcf33 100644 --- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c +++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c @@ -508,7 +508,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config, (void) simple_mtx_init(&aws->bo_export_table_lock, mtx_plain); if (!util_queue_init(&aws->cs_queue, "cs", 8, 1, - UTIL_QUEUE_INIT_RESIZE_IF_FULL)) { + UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL)) { amdgpu_winsys_destroy(&ws->base); simple_mtx_unlock(&dev_tab_mutex); return NULL; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index a9eaf96..c5e92ec 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -475,7 +475,7 @@ static unsigned radeon_drm_cs_get_buffer_list(struct radeon_cmdbuf *rcs, return cs->csc->num_relocs; } -void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index) +void radeon_drm_cs_emit_ioctl_oneshot(void *job, void *gdata, int thread_index) { struct radeon_cs_context *csc = ((struct radeon_drm_cs*)job)->cst; unsigned i; @@ -710,7 +710,7 @@ static int radeon_drm_cs_flush(struct radeon_cmdbuf *rcs, if (!(flags & PIPE_FLUSH_ASYNC)) radeon_drm_cs_sync_flush(rcs); } else { - radeon_drm_cs_emit_ioctl_oneshot(cs, 0); + radeon_drm_cs_emit_ioctl_oneshot(cs, NULL, 0); } } else { radeon_cs_context_cleanup(cs->cst); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 1090dfa..bda22bc 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -137,6 +137,6 @@ radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo) void radeon_drm_cs_sync_flush(struct radeon_cmdbuf *rcs); void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws); -void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index); +void radeon_drm_cs_emit_ioctl_oneshot(void *job, void *gdata, int thread_index); #endif diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index d06394e..ef9ec59 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -938,7 +938,7 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config, ws->info.pte_fragment_size = 64 * 1024; /* GPUVM page size */ if (ws->num_cpus > 1 && debug_get_option_thread()) - util_queue_init(&ws->cs_queue, "rcs", 8, 1, 0); + util_queue_init(&ws->cs_queue, "rcs", 8, 1, 0, NULL); /* Create the screen at the end. The winsys must be initialized * completely. diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c index ed619f5..e0822b8 100644 --- a/src/mesa/main/glthread.c +++ b/src/mesa/main/glthread.c @@ -42,7 +42,7 @@ static void -glthread_unmarshal_batch(void *job, int thread_index) +glthread_unmarshal_batch(void *job, void *gdata, int thread_index) { struct glthread_batch *batch = (struct glthread_batch*)job; struct gl_context *ctx = batch->ctx; @@ -80,7 +80,7 @@ glthread_unmarshal_batch(void *job, int thread_index) } static void -glthread_thread_initialization(void *job, int thread_index) +glthread_thread_initialization(void *job, void *gdata, int thread_index) { struct gl_context *ctx = (struct gl_context*)job; @@ -96,7 +96,7 @@ _mesa_glthread_init(struct gl_context *ctx) assert(!glthread->enabled); if (!util_queue_init(&glthread->queue, "gl", MARSHAL_MAX_BATCHES - 2, - 1, 0)) { + 1, 0, NULL)) { return; } @@ -241,7 +241,7 @@ _mesa_glthread_flush_batch(struct gl_context *ctx) * need to restore it when it returns. */ if (false) { - glthread_unmarshal_batch(next, 0); + glthread_unmarshal_batch(next, NULL, 0); _glapi_set_dispatch(ctx->CurrentClientDispatch); return; } @@ -296,7 +296,7 @@ _mesa_glthread_finish(struct gl_context *ctx) * restore it after it's done. */ struct _glapi_table *dispatch = _glapi_get_dispatch(); - glthread_unmarshal_batch(next, 0); + glthread_unmarshal_batch(next, NULL, 0); _glapi_set_dispatch(dispatch); /* It's not a sync because we don't enqueue partial batches, but diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c index 4aa719a..800c204 100644 --- a/src/util/disk_cache.c +++ b/src/util/disk_cache.c @@ -174,7 +174,7 @@ disk_cache_create(const char *gpu_name, const char *driver_id, if (!util_queue_init(&cache->cache_queue, "disk$", 32, 4, UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY | - UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY)) + UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL)) goto fail; cache->path_init_failed = false; @@ -310,7 +310,7 @@ fail: } static void -destroy_put_job(void *job, int thread_index) +destroy_put_job(void *job, void *gdata, int thread_index) { if (job) { struct disk_cache_put_job *dc_job = (struct disk_cache_put_job *) job; @@ -320,15 +320,15 @@ destroy_put_job(void *job, int thread_index) } static void -destroy_put_job_nocopy(void *job, int thread_index) +destroy_put_job_nocopy(void *job, void *gdata, int thread_index) { struct disk_cache_put_job *dc_job = (struct disk_cache_put_job *) job; free(dc_job->data); - destroy_put_job(job, thread_index); + destroy_put_job(job, gdata, thread_index); } static void -cache_put(void *job, int thread_index) +cache_put(void *job, void *gdata, int thread_index) { assert(job); diff --git a/src/util/u_queue.c b/src/util/u_queue.c index 9298026..2a553ef 100644 --- a/src/util/u_queue.c +++ b/src/util/u_queue.c @@ -310,11 +310,11 @@ util_queue_thread_func(void *input) mtx_unlock(&queue->lock); if (job.job) { - job.execute(job.job, thread_index); + job.execute(job.job, job.global_data, thread_index); if (job.fence) util_queue_fence_signal(job.fence); if (job.cleanup) - job.cleanup(job.job, thread_index); + job.cleanup(job.job, job.global_data, thread_index); } } @@ -406,7 +406,8 @@ util_queue_init(struct util_queue *queue, const char *name, unsigned max_jobs, unsigned num_threads, - unsigned flags) + unsigned flags, + void *global_data) { unsigned i; @@ -442,6 +443,7 @@ util_queue_init(struct util_queue *queue, queue->max_threads = num_threads; queue->num_threads = num_threads; queue->max_jobs = max_jobs; + queue->global_data = global_data; queue->jobs = (struct util_queue_job*) calloc(max_jobs, sizeof(struct util_queue_job)); @@ -597,6 +599,7 @@ util_queue_add_job(struct util_queue *queue, ptr = &queue->jobs[queue->write_idx]; assert(ptr->job == NULL); ptr->job = job; + ptr->global_data = queue->global_data; ptr->fence = fence; ptr->execute = execute; ptr->cleanup = cleanup; @@ -633,7 +636,7 @@ util_queue_drop_job(struct util_queue *queue, struct util_queue_fence *fence) i = (i + 1) % queue->max_jobs) { if (queue->jobs[i].fence == fence) { if (queue->jobs[i].cleanup) - queue->jobs[i].cleanup(queue->jobs[i].job, -1); + queue->jobs[i].cleanup(queue->jobs[i].job, queue->global_data, -1); /* Just clear it. The threads will treat as a no-op job. */ memset(&queue->jobs[i], 0, sizeof(queue->jobs[i])); @@ -650,7 +653,7 @@ util_queue_drop_job(struct util_queue *queue, struct util_queue_fence *fence) } static void -util_queue_finish_execute(void *data, int num_thread) +util_queue_finish_execute(void *data, void *gdata, int num_thread) { util_barrier *barrier = data; util_barrier_wait(barrier); diff --git a/src/util/u_queue.h b/src/util/u_queue.h index e254af4..704d6c5 100644 --- a/src/util/u_queue.h +++ b/src/util/u_queue.h @@ -189,10 +189,11 @@ util_queue_fence_wait_timeout(struct util_queue_fence *fence, return _util_queue_fence_wait_timeout(fence, abs_timeout); } -typedef void (*util_queue_execute_func)(void *job, int thread_index); +typedef void (*util_queue_execute_func)(void *job, void *gdata, int thread_index); struct util_queue_job { void *job; + void *global_data; size_t job_size; struct util_queue_fence *fence; util_queue_execute_func execute; @@ -215,6 +216,7 @@ struct util_queue { int write_idx, read_idx; /* ring buffer pointers */ size_t total_jobs_size; /* memory use of all jobs in the queue */ struct util_queue_job *jobs; + void *global_data; /* for cleanup at exit(), protected by exit_mutex */ struct list_head head; @@ -224,7 +226,8 @@ bool util_queue_init(struct util_queue *queue, const char *name, unsigned max_jobs, unsigned num_threads, - unsigned flags); + unsigned flags, + void *global_data); void util_queue_destroy(struct util_queue *queue); /* optional cleanup callback is called after fence is signaled: */