From: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Date: Wed, 9 Jun 2021 22:14:29 +0000 (-0400)
Subject: util/queue: add a global data pointer for the queue object
X-Git-Tag: upstream/21.2.3~1776
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=a3a6611e96fba9a519046acf4918099f600dca92;p=platform%2Fupstream%2Fmesa.git

util/queue: add a global data pointer for the queue object

this better enables object-specific (e.g., context) queues where the owner
of the queue will always be needed and various pointers will be passed in
for tasks

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11312>
---

diff --git a/src/freedreno/drm/msm_device.c b/src/freedreno/drm/msm_device.c
index 6af709f..93abf48 100644
--- a/src/freedreno/drm/msm_device.c
+++ b/src/freedreno/drm/msm_device.c
@@ -70,7 +70,7 @@ msm_device_new(int fd, drmVersionPtr version)
        * thread's comm truncating the interesting part of the
        * process name.
        */
-      util_queue_init(&msm_dev->submit_queue, "sq", 8, 1, 0);
+      util_queue_init(&msm_dev->submit_queue, "sq", 8, 1, 0, NULL);
    }
 
    dev->bo_size = sizeof(struct msm_bo);
diff --git a/src/freedreno/drm/msm_ringbuffer_sp.c b/src/freedreno/drm/msm_ringbuffer_sp.c
index 0a95369..d71aa67 100644
--- a/src/freedreno/drm/msm_ringbuffer_sp.c
+++ b/src/freedreno/drm/msm_ringbuffer_sp.c
@@ -403,7 +403,7 @@ flush_submit_list(struct list_head *submit_list)
 }
 
 static void
-msm_submit_sp_flush_execute(void *job, int thread_index)
+msm_submit_sp_flush_execute(void *job, void *gdata, int thread_index)
 {
    struct fd_submit *submit = job;
    struct msm_submit_sp *msm_submit = to_msm_submit_sp(submit);
@@ -414,7 +414,7 @@ msm_submit_sp_flush_execute(void *job, int thread_index)
 }
 
 static void
-msm_submit_sp_flush_cleanup(void *job, int thread_index)
+msm_submit_sp_flush_cleanup(void *job, void *gdata, int thread_index)
 {
    struct fd_submit *submit = job;
    fd_submit_del(submit);
diff --git a/src/gallium/auxiliary/util/u_threaded_context.c b/src/gallium/auxiliary/util/u_threaded_context.c
index a8c712a..5d28bbc 100644
--- a/src/gallium/auxiliary/util/u_threaded_context.c
+++ b/src/gallium/auxiliary/util/u_threaded_context.c
@@ -167,7 +167,7 @@ tc_drop_so_target_reference(struct pipe_stream_output_target *dst)
    offsetof(struct pipe_draw_info, min_index)
 
 static void
-tc_batch_execute(void *job, UNUSED int thread_index)
+tc_batch_execute(void *job, UNUSED void *gdata, int thread_index)
 {
    struct tc_batch *batch = job;
    struct pipe_context *pipe = batch->tc->pipe;
@@ -338,7 +338,7 @@ _tc_sync(struct threaded_context *tc, UNUSED const char *info, UNUSED const char
    if (next->num_total_slots) {
       p_atomic_add(&tc->num_direct_slots, next->num_total_slots);
       tc->bytes_mapped_estimate = 0;
-      tc_batch_execute(next, 0);
+      tc_batch_execute(next, NULL, 0);
       tc_begin_next_buffer_list(tc);
       synced = true;
    }
@@ -3976,7 +3976,7 @@ threaded_context_create(struct pipe_context *pipe,
     * from the queue before being executed, so keep one tc_batch slot for that
     * execution. Also, keep one unused slot for an unflushed batch.
     */
-   if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0))
+   if (!util_queue_init(&tc->queue, "gdrv", TC_MAX_BATCHES - 2, 1, 0, NULL))
       goto fail;
 
    for (unsigned i = 0; i < TC_MAX_BATCHES; i++) {
diff --git a/src/gallium/auxiliary/util/u_trace.c b/src/gallium/auxiliary/util/u_trace.c
index ab9eb32..a3d0ef7 100644
--- a/src/gallium/auxiliary/util/u_trace.c
+++ b/src/gallium/auxiliary/util/u_trace.c
@@ -184,7 +184,7 @@ queue_init(struct u_trace_context *utctx)
 
    bool ret = util_queue_init(&utctx->queue, "traceq", 256, 1,
                               UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY |
-                              UTIL_QUEUE_INIT_RESIZE_IF_FULL);
+                              UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL);
    assert(ret);
 
    if (!ret)
@@ -251,7 +251,7 @@ u_trace_perfetto_stop(void)
 #endif
 
 static void
-process_chunk(void *job, int thread_index)
+process_chunk(void *job, void *gdata, int thread_index)
 {
    struct u_trace_chunk *chunk = job;
    struct u_trace_context *utctx = chunk->utctx;
@@ -312,7 +312,7 @@ process_chunk(void *job, int thread_index)
 }
 
 static void
-cleanup_chunk(void *job, int thread_index)
+cleanup_chunk(void *job, void *gdata, int thread_index)
 {
    ralloc_free(job);
 }
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
index 719bd1d..32d2d72 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_gallium.c
@@ -240,7 +240,7 @@ create_initial_variants(struct ir3_shader_state *hwcso,
 }
 
 static void
-create_initial_variants_async(void *job, int thread_index)
+create_initial_variants_async(void *job, void *gdata, int thread_index)
 {
    struct ir3_shader_state *hwcso = job;
    struct pipe_debug_callback debug = {};
@@ -249,7 +249,7 @@ create_initial_variants_async(void *job, int thread_index)
 }
 
 static void
-create_initial_compute_variants_async(void *job, int thread_index)
+create_initial_compute_variants_async(void *job, void *gdata, int thread_index)
 {
    struct ir3_shader_state *hwcso = job;
    struct ir3_shader *shader = hwcso->shader;
@@ -527,7 +527,7 @@ ir3_screen_init(struct pipe_screen *pscreen)
 
    util_queue_init(&screen->compile_queue, "ir3q", 64, num_threads,
                    UTIL_QUEUE_INIT_RESIZE_IF_FULL |
-                      UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY);
+                      UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL);
 
    pscreen->finalize_nir = ir3_screen_finalize_nir;
    pscreen->set_max_shader_compiler_threads =
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 48ec79a..136674e 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -107,7 +107,7 @@ static void code_object_to_config(const amd_kernel_code_t *code_object,
 }
 
 /* Asynchronous compute shader compilation. */
-static void si_create_compute_state_async(void *job, int thread_index)
+static void si_create_compute_state_async(void *job, void *gdata, int thread_index)
 {
    struct si_compute *program = (struct si_compute *)job;
    struct si_shader_selector *sel = &program->sel;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 858f421..3967ffe 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1125,7 +1125,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
 
    if (!util_queue_init(
           &sscreen->shader_compiler_queue, "sh", 64, num_comp_hi_threads,
-          UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY)) {
+          UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL)) {
       si_destroy_shader_cache(sscreen);
       FREE(sscreen);
       glsl_type_singleton_decref();
@@ -1135,7 +1135,7 @@ static struct pipe_screen *radeonsi_screen_create_impl(struct radeon_winsys *ws,
    if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority, "shlo", 64,
                         num_comp_lo_threads,
                         UTIL_QUEUE_INIT_RESIZE_IF_FULL | UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY |
-                           UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) {
+                           UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY, NULL)) {
       si_destroy_shader_cache(sscreen);
       FREE(sscreen);
       glsl_type_singleton_decref();
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 3481477..44dad0c 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2142,7 +2142,7 @@ static void si_build_shader_variant(struct si_shader *shader, int thread_index,
    si_shader_init_pm4_state(sscreen, shader);
 }
 
-static void si_build_shader_variant_low_priority(void *job, int thread_index)
+static void si_build_shader_variant_low_priority(void *job, void *gdata, int thread_index)
 {
    struct si_shader *shader = (struct si_shader *)job;
 
@@ -2460,7 +2460,7 @@ static void si_parse_next_shader_property(const struct si_shader_info *info, boo
  * si_shader_selector initialization. Since it can be done asynchronously,
  * there is no way to report compile failures to applications.
  */
-static void si_init_shader_selector_async(void *job, int thread_index)
+static void si_init_shader_selector_async(void *job, void *gdata, int thread_index)
 {
    struct si_shader_selector *sel = (struct si_shader_selector *)job;
    struct si_screen *sscreen = sel->screen;
diff --git a/src/gallium/drivers/zink/zink_batch.c b/src/gallium/drivers/zink/zink_batch.c
index 79ebb9e..d60db4a 100644
--- a/src/gallium/drivers/zink/zink_batch.c
+++ b/src/gallium/drivers/zink/zink_batch.c
@@ -291,14 +291,14 @@ zink_start_batch(struct zink_context *ctx, struct zink_batch *batch)
       batch->last_batch_id = last_state->fence.batch_id;
    } else {
       if (zink_screen(ctx->base.screen)->threaded)
-         util_queue_init(&batch->flush_queue, "zfq", 8, 1, UTIL_QUEUE_INIT_RESIZE_IF_FULL);
+         util_queue_init(&batch->flush_queue, "zfq", 8, 1, UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL);
    }
    if (!ctx->queries_disabled)
       zink_resume_queries(ctx, batch);
 }
 
 static void
-post_submit(void *data, int thread_index)
+post_submit(void *data, void *gdata, int thread_index)
 {
    struct zink_batch_state *bs = data;
 
@@ -310,7 +310,7 @@ post_submit(void *data, int thread_index)
 }
 
 static void
-submit_queue(void *data, int thread_index)
+submit_queue(void *data, void *gdata, int thread_index)
 {
    struct zink_batch_state *bs = data;
    VkSubmitInfo si = {0};
@@ -530,8 +530,8 @@ zink_end_batch(struct zink_context *ctx, struct zink_batch *batch)
                          submit_queue, post_submit, 0);
    } else {
       batch->state->queue = screen->queue;
-      submit_queue(batch->state, 0);
-      post_submit(batch->state, 0);
+      submit_queue(batch->state, NULL, 0);
+      post_submit(batch->state, NULL, 0);
    }
 }
 
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 4898cb3..543a191 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -1454,7 +1454,7 @@ static bool amdgpu_add_sparse_backing_buffers(struct amdgpu_winsys *ws,
    return true;
 }
 
-static void amdgpu_cs_submit_ib(void *job, int thread_index)
+static void amdgpu_cs_submit_ib(void *job, void *gdata, int thread_index)
 {
    struct amdgpu_cs *acs = (struct amdgpu_cs*)job;
    struct amdgpu_winsys *ws = acs->ws;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 5ee8ed4..06dcf33 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -508,7 +508,7 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
       (void) simple_mtx_init(&aws->bo_export_table_lock, mtx_plain);
 
       if (!util_queue_init(&aws->cs_queue, "cs", 8, 1,
-                           UTIL_QUEUE_INIT_RESIZE_IF_FULL)) {
+                           UTIL_QUEUE_INIT_RESIZE_IF_FULL, NULL)) {
          amdgpu_winsys_destroy(&ws->base);
          simple_mtx_unlock(&dev_tab_mutex);
          return NULL;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index a9eaf96..c5e92ec 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -475,7 +475,7 @@ static unsigned radeon_drm_cs_get_buffer_list(struct radeon_cmdbuf *rcs,
    return cs->csc->num_relocs;
 }
 
-void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index)
+void radeon_drm_cs_emit_ioctl_oneshot(void *job, void *gdata, int thread_index)
 {
    struct radeon_cs_context *csc = ((struct radeon_drm_cs*)job)->cst;
    unsigned i;
@@ -710,7 +710,7 @@ static int radeon_drm_cs_flush(struct radeon_cmdbuf *rcs,
          if (!(flags & PIPE_FLUSH_ASYNC))
             radeon_drm_cs_sync_flush(rcs);
       } else {
-         radeon_drm_cs_emit_ioctl_oneshot(cs, 0);
+         radeon_drm_cs_emit_ioctl_oneshot(cs, NULL, 0);
       }
    } else {
       radeon_cs_context_cleanup(cs->cst);
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index 1090dfa..bda22bc 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -137,6 +137,6 @@ radeon_bo_is_referenced_by_any_cs(struct radeon_bo *bo)
 
 void radeon_drm_cs_sync_flush(struct radeon_cmdbuf *rcs);
 void radeon_drm_cs_init_functions(struct radeon_drm_winsys *ws);
-void radeon_drm_cs_emit_ioctl_oneshot(void *job, int thread_index);
+void radeon_drm_cs_emit_ioctl_oneshot(void *job, void *gdata, int thread_index);
 
 #endif
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index d06394e..ef9ec59 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -938,7 +938,7 @@ radeon_drm_winsys_create(int fd, const struct pipe_screen_config *config,
    ws->info.pte_fragment_size = 64 * 1024; /* GPUVM page size */
 
    if (ws->num_cpus > 1 && debug_get_option_thread())
-      util_queue_init(&ws->cs_queue, "rcs", 8, 1, 0);
+      util_queue_init(&ws->cs_queue, "rcs", 8, 1, 0, NULL);
 
    /* Create the screen at the end. The winsys must be initialized
     * completely.
diff --git a/src/mesa/main/glthread.c b/src/mesa/main/glthread.c
index ed619f5..e0822b8 100644
--- a/src/mesa/main/glthread.c
+++ b/src/mesa/main/glthread.c
@@ -42,7 +42,7 @@
 
 
 static void
-glthread_unmarshal_batch(void *job, int thread_index)
+glthread_unmarshal_batch(void *job, void *gdata, int thread_index)
 {
    struct glthread_batch *batch = (struct glthread_batch*)job;
    struct gl_context *ctx = batch->ctx;
@@ -80,7 +80,7 @@ glthread_unmarshal_batch(void *job, int thread_index)
 }
 
 static void
-glthread_thread_initialization(void *job, int thread_index)
+glthread_thread_initialization(void *job, void *gdata, int thread_index)
 {
    struct gl_context *ctx = (struct gl_context*)job;
 
@@ -96,7 +96,7 @@ _mesa_glthread_init(struct gl_context *ctx)
    assert(!glthread->enabled);
 
    if (!util_queue_init(&glthread->queue, "gl", MARSHAL_MAX_BATCHES - 2,
-                        1, 0)) {
+                        1, 0, NULL)) {
       return;
    }
 
@@ -241,7 +241,7 @@ _mesa_glthread_flush_batch(struct gl_context *ctx)
     * need to restore it when it returns.
     */
    if (false) {
-      glthread_unmarshal_batch(next, 0);
+      glthread_unmarshal_batch(next, NULL, 0);
       _glapi_set_dispatch(ctx->CurrentClientDispatch);
       return;
    }
@@ -296,7 +296,7 @@ _mesa_glthread_finish(struct gl_context *ctx)
        * restore it after it's done.
        */
       struct _glapi_table *dispatch = _glapi_get_dispatch();
-      glthread_unmarshal_batch(next, 0);
+      glthread_unmarshal_batch(next, NULL, 0);
       _glapi_set_dispatch(dispatch);
 
       /* It's not a sync because we don't enqueue partial batches, but
diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index 4aa719a..800c204 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -174,7 +174,7 @@ disk_cache_create(const char *gpu_name, const char *driver_id,
    if (!util_queue_init(&cache->cache_queue, "disk$", 32, 4,
                         UTIL_QUEUE_INIT_RESIZE_IF_FULL |
                         UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY |
-                        UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY))
+                        UTIL_QUEUE_INIT_SET_FULL_THREAD_AFFINITY, NULL))
       goto fail;
 
    cache->path_init_failed = false;
@@ -310,7 +310,7 @@ fail:
 }
 
 static void
-destroy_put_job(void *job, int thread_index)
+destroy_put_job(void *job, void *gdata, int thread_index)
 {
    if (job) {
       struct disk_cache_put_job *dc_job = (struct disk_cache_put_job *) job;
@@ -320,15 +320,15 @@ destroy_put_job(void *job, int thread_index)
 }
 
 static void
-destroy_put_job_nocopy(void *job, int thread_index)
+destroy_put_job_nocopy(void *job, void *gdata, int thread_index)
 {
    struct disk_cache_put_job *dc_job = (struct disk_cache_put_job *) job;
    free(dc_job->data);
-   destroy_put_job(job, thread_index);
+   destroy_put_job(job, gdata, thread_index);
 }
 
 static void
-cache_put(void *job, int thread_index)
+cache_put(void *job, void *gdata, int thread_index)
 {
    assert(job);
 
diff --git a/src/util/u_queue.c b/src/util/u_queue.c
index 9298026..2a553ef 100644
--- a/src/util/u_queue.c
+++ b/src/util/u_queue.c
@@ -310,11 +310,11 @@ util_queue_thread_func(void *input)
       mtx_unlock(&queue->lock);
 
       if (job.job) {
-         job.execute(job.job, thread_index);
+         job.execute(job.job, job.global_data, thread_index);
          if (job.fence)
             util_queue_fence_signal(job.fence);
          if (job.cleanup)
-            job.cleanup(job.job, thread_index);
+            job.cleanup(job.job, job.global_data, thread_index);
       }
    }
 
@@ -406,7 +406,8 @@ util_queue_init(struct util_queue *queue,
                 const char *name,
                 unsigned max_jobs,
                 unsigned num_threads,
-                unsigned flags)
+                unsigned flags,
+                void *global_data)
 {
    unsigned i;
 
@@ -442,6 +443,7 @@ util_queue_init(struct util_queue *queue,
    queue->max_threads = num_threads;
    queue->num_threads = num_threads;
    queue->max_jobs = max_jobs;
+   queue->global_data = global_data;
 
    queue->jobs = (struct util_queue_job*)
                  calloc(max_jobs, sizeof(struct util_queue_job));
@@ -597,6 +599,7 @@ util_queue_add_job(struct util_queue *queue,
    ptr = &queue->jobs[queue->write_idx];
    assert(ptr->job == NULL);
    ptr->job = job;
+   ptr->global_data = queue->global_data;
    ptr->fence = fence;
    ptr->execute = execute;
    ptr->cleanup = cleanup;
@@ -633,7 +636,7 @@ util_queue_drop_job(struct util_queue *queue, struct util_queue_fence *fence)
         i = (i + 1) % queue->max_jobs) {
       if (queue->jobs[i].fence == fence) {
          if (queue->jobs[i].cleanup)
-            queue->jobs[i].cleanup(queue->jobs[i].job, -1);
+            queue->jobs[i].cleanup(queue->jobs[i].job, queue->global_data, -1);
 
          /* Just clear it. The threads will treat as a no-op job. */
          memset(&queue->jobs[i], 0, sizeof(queue->jobs[i]));
@@ -650,7 +653,7 @@ util_queue_drop_job(struct util_queue *queue, struct util_queue_fence *fence)
 }
 
 static void
-util_queue_finish_execute(void *data, int num_thread)
+util_queue_finish_execute(void *data, void *gdata, int num_thread)
 {
    util_barrier *barrier = data;
    util_barrier_wait(barrier);
diff --git a/src/util/u_queue.h b/src/util/u_queue.h
index e254af4..704d6c5 100644
--- a/src/util/u_queue.h
+++ b/src/util/u_queue.h
@@ -189,10 +189,11 @@ util_queue_fence_wait_timeout(struct util_queue_fence *fence,
    return _util_queue_fence_wait_timeout(fence, abs_timeout);
 }
 
-typedef void (*util_queue_execute_func)(void *job, int thread_index);
+typedef void (*util_queue_execute_func)(void *job, void *gdata, int thread_index);
 
 struct util_queue_job {
    void *job;
+   void *global_data;
    size_t job_size;
    struct util_queue_fence *fence;
    util_queue_execute_func execute;
@@ -215,6 +216,7 @@ struct util_queue {
    int write_idx, read_idx; /* ring buffer pointers */
    size_t total_jobs_size;  /* memory use of all jobs in the queue */
    struct util_queue_job *jobs;
+   void *global_data;
 
    /* for cleanup at exit(), protected by exit_mutex */
    struct list_head head;
@@ -224,7 +226,8 @@ bool util_queue_init(struct util_queue *queue,
                      const char *name,
                      unsigned max_jobs,
                      unsigned num_threads,
-                     unsigned flags);
+                     unsigned flags,
+                     void *global_data);
 void util_queue_destroy(struct util_queue *queue);
 
 /* optional cleanup callback is called after fence is signaled: */