From 99cba698400ed9aebf8bc252f382b1600118f5ab Mon Sep 17 00:00:00 2001 From: Yang Rong Date: Tue, 17 Sep 2013 16:10:01 +0800 Subject: [PATCH] Implement clEnqueueMarker and clEnqueueBarrier. Add some event info to cl_command_queue. One is non-complete user events, used to block marker event and barrier. After these events become CL_COMPLETE, the events blocked by these events also become CL_COMPLETE, so marker event will also set to CL_COMPLETE. If there is no user events, need wait last event complete and set marker event to complete. Add barrier_index, for clEnqueueBarrier, point to user events, indicate the enqueue apis follow clEnqueueBarrier should wait on how many user events. Signed-off-by: Yang Rong Reviewed-by: Zhigang Gong --- src/cl_api.c | 25 ++++++++++++----- src/cl_command_queue.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/cl_command_queue.h | 15 ++++++++++ src/cl_enqueue.h | 4 +-- src/cl_event.c | 72 ++++++++++++++++++++++++++++++++++++++++++++---- src/cl_event.h | 6 ++-- 6 files changed, 180 insertions(+), 16 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 9270b65..8f62990 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -66,7 +66,7 @@ inline cl_int handle_events(cl_command_queue queue, cl_int num, const cl_event *wait_list, cl_event* event, enqueue_data* data, cl_command_type type) { - cl_int status = cl_event_wait_events(num, wait_list); + cl_int status = cl_event_wait_events(num, wait_list, queue); cl_event e; if(event != NULL || status == CL_ENQUEUE_EXECUTE_DEFER) { e = cl_event_new(queue->ctx, queue, type, event!=NULL); @@ -1076,7 +1076,7 @@ clWaitForEvents(cl_uint num_events, TRY(cl_event_check_waitlist, num_events, event_list, NULL, ctx); - while(cl_event_wait_events(num_events, event_list) == CL_ENQUEUE_EXECUTE_DEFER) { + while(cl_event_wait_events(num_events, event_list, NULL) == CL_ENQUEUE_EXECUTE_DEFER) { usleep(8000); //sleep 8ms to wait other thread } @@ -2401,8 +2401,16 @@ cl_int clEnqueueMarker(cl_command_queue command_queue, cl_event * event) { - NOT_IMPLEMENTED; - return 0; + cl_int err = CL_SUCCESS; + CHECK_QUEUE(command_queue); + if(event == NULL) { + err = CL_INVALID_VALUE; + goto error; + } + + cl_event_marker(command_queue, event); +error: + return err; } cl_int @@ -2421,9 +2429,12 @@ error: cl_int clEnqueueBarrier(cl_command_queue command_queue) { - NOT_IMPLEMENTED; - return 0; - //return clFinish(command_queue); + cl_int err = CL_SUCCESS; + CHECK_QUEUE(command_queue); + cl_command_queue_set_barrier(command_queue); + +error: + return err; } #define EXTFUNC(x) \ diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 2454db6..11be423 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -87,6 +87,7 @@ cl_command_queue_delete(cl_command_queue queue) cl_mem_delete(queue->perf); cl_context_delete(queue->ctx); cl_gpgpu_delete(queue->gpgpu); + cl_free(queue->wait_events); queue->magic = CL_MAGIC_DEAD_HEADER; /* For safety */ cl_free(queue); } @@ -412,3 +413,76 @@ cl_command_queue_finish(cl_command_queue queue) return CL_SUCCESS; } +#define DEFAULT_WAIT_EVENTS_SIZE 16 +LOCAL void +cl_command_queue_insert_event(cl_command_queue queue, cl_event event) +{ + cl_int i=0; + cl_event *new_list; + + assert(queue != NULL); + if(queue->wait_events == NULL) { + queue->wait_events_size = DEFAULT_WAIT_EVENTS_SIZE; + TRY_ALLOC_NO_ERR (queue->wait_events, CALLOC_ARRAY(cl_event, queue->wait_events_size)); + } + + for(i=0; iwait_events_num; i++) { + if(queue->wait_events[i] == event) + return; //is in the wait_events, need to insert + } + + if(queue->wait_events_num < queue->wait_events_size) { + queue->wait_events[queue->wait_events_num++] = event; + return; + } + + //wait_events_num == wait_events_size, array is full + queue->wait_events_size *= 2; + TRY_ALLOC_NO_ERR (new_list, CALLOC_ARRAY(cl_event, queue->wait_events_size)); + memcpy(new_list, queue->wait_events, sizeof(cl_event)*queue->wait_events_num); + cl_free(queue->wait_events); + queue->wait_events = new_list; + queue->wait_events[queue->wait_events_num++] = event; + return; + +exit: + return; +error: + if(queue->wait_events) + cl_free(queue->wait_events); + queue->wait_events = NULL; + queue->wait_events_size = 0; + queue->wait_events_num = 0; + goto exit; + +} + +LOCAL void +cl_command_queue_remove_event(cl_command_queue queue, cl_event event) +{ + cl_int i=0; + + assert(queue->wait_events); + for(i=0; iwait_events_num; i++) { + if(queue->wait_events[i] == event) + break; + } + + if(i == queue->wait_events_num) + return; + + if(queue->barrier_index >= i) + queue->barrier_index -= 1; + + for(; iwait_events_num-1; i++) { + queue->wait_events[i] = queue->wait_events[i+1]; + } + queue->wait_events_num -= 1; +} + +LOCAL void +cl_command_queue_set_barrier(cl_command_queue queue) +{ + queue->barrier_index = queue->wait_events_num; +} + diff --git a/src/cl_command_queue.h b/src/cl_command_queue.h index 9fe1dd1..9396fd7 100644 --- a/src/cl_command_queue.h +++ b/src/cl_command_queue.h @@ -33,6 +33,11 @@ struct _cl_command_queue { uint64_t magic; /* To identify it as a command queue */ volatile int ref_n; /* We reference count this object */ cl_context ctx; /* Its parent context */ + cl_event* wait_events; /* Point to array of non-complete user events that block this command queue */ + cl_int wait_events_num; /* Number of Non-complete user events */ + cl_int wait_events_size; /* The size of array that wait_events point to */ + cl_int barrier_index; /* Indicate event count in wait_events as barrier events */ + cl_event last_event; /* The last event in the queue, for enqueue mark used */ cl_command_queue_properties props; /* Queue properties */ cl_command_queue prev, next; /* We chain the command queues together */ cl_gpgpu gpgpu; /* Setup all GEN commands */ @@ -76,5 +81,15 @@ extern cl_int cl_command_queue_bind_surface(cl_command_queue, cl_kernel); /* Bind all the image surfaces in the GPGPU state */ extern cl_int cl_command_queue_bind_image(cl_command_queue, cl_kernel); + +/* Insert a user event to command's wait_events */ +extern void cl_command_queue_insert_event(cl_command_queue, cl_event); + +/* Remove a user event from command's wait_events */ +extern void cl_command_queue_remove_event(cl_command_queue, cl_event); + +/* Set the barrier index */ +extern void cl_command_queue_set_barrier(cl_command_queue); + #endif /* __CL_COMMAND_QUEUE_H__ */ diff --git a/src/cl_enqueue.h b/src/cl_enqueue.h index f90f921..b412d58 100644 --- a/src/cl_enqueue.h +++ b/src/cl_enqueue.h @@ -19,9 +19,8 @@ #ifndef __CL_ENQUEUE_H__ #define __CL_ENQUEUE_H__ -#include "cl_mem.h" -#include "cl_command_queue.h" #include "cl_internals.h" +#include "cl_driver.h" #include "CL/cl.h" typedef enum { @@ -41,6 +40,7 @@ typedef enum { EnqueueUnmapMemObject, EnqueueNDRangeKernel, EnqueueNativeKernel, + EnqueueMarker, EnqueueInvalid } enqueue_type; diff --git a/src/cl_event.c b/src/cl_event.c index 83e1f50..918e245 100644 --- a/src/cl_event.c +++ b/src/cl_event.c @@ -23,6 +23,7 @@ #include "cl_alloc.h" #include "cl_khr_icd.h" #include "cl_kernel.h" +#include "cl_command_queue.h" #include #include @@ -81,6 +82,8 @@ cl_event cl_event_new(cl_context ctx, cl_command_queue queue, cl_command_type ty event->enqueue_cb = NULL; event->waits_head = NULL; event->emplict = emplict; + if(queue && event->gpgpu_event) + queue->last_event = event; exit: return event; @@ -100,6 +103,9 @@ void cl_event_delete(cl_event event) if (atomic_dec(&event->ref_n) > 1) return; + if(event->queue && event->queue->last_event == event) + event->queue->last_event = NULL; + /* Call all user's callback if haven't execute */ user_callback *cb = event->user_cb; while(event->user_cb) { @@ -200,10 +206,11 @@ error: goto exit; } -cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, - const cl_event *event_wait_list) +cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, const cl_event *event_wait_list, + cl_command_queue queue) { cl_int i, j; + /* Check whether wait user events */ for(i=0; istatus <= CL_COMPLETE) @@ -219,6 +226,10 @@ cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, } } + if(queue && queue->barrier_index > 0) { + return CL_ENQUEUE_EXECUTE_DEFER; + } + /* Non user events or all user event finished, wait all enqueue events finish */ for(i=0; istatus <= CL_COMPLETE) @@ -227,7 +238,8 @@ cl_int cl_event_wait_events(cl_uint num_events_in_wait_list, //enqueue callback haven't finish, in another thread, wait if(event_wait_list[i]->enqueue_cb != NULL) return CL_ENQUEUE_EXECUTE_DEFER; - cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1); + if(event_wait_list[i]->gpgpu_event) + cl_gpgpu_event_update_status(event_wait_list[i]->gpgpu_event, 1); cl_event_set_status(event_wait_list[i], CL_COMPLETE); //Execute user's callback } return CL_ENQUEUE_EXECUTE_IMM; @@ -240,6 +252,7 @@ void cl_event_new_enqueue_callback(cl_event event, { enqueue_callback *cb, *node; user_event *user_events, *u_ev; + cl_command_queue queue = event->queue; cl_int i; /* Allocate and inialize the structure itself */ @@ -252,6 +265,27 @@ void cl_event_new_enqueue_callback(cl_event event, cb->next = NULL; cb->wait_user_events = NULL; + if(queue && queue->barrier_index > 0) { + for(i=0; ibarrier_index; i++) { + /* Insert the enqueue_callback to user event list */ + node = queue->wait_events[i]->waits_head; + if(node == NULL) + queue->wait_events[i]->waits_head = cb; + else + while((node != cb) && node->next) + node = node->next; + if(node == cb) //wait on dup user event + continue; + node->next = cb; + + /* Insert the user event to enqueue_callback's wait_user_events */ + TRY_ALLOC_NO_ERR (u_ev, CALLOC(user_event)); + u_ev->event = queue->wait_events[i]; + u_ev->next = cb->wait_user_events; + cb->wait_user_events = u_ev; + } + } + /* Find out all user events that events in event_wait_list wait */ for(i=0; istatus <= CL_COMPLETE) @@ -274,6 +308,7 @@ void cl_event_new_enqueue_callback(cl_event event, u_ev->event = event_wait_list[i]; u_ev->next = cb->wait_user_events; cb->wait_user_events = u_ev; + cl_command_queue_insert_event(event->queue, event_wait_list[i]); } else if(event_wait_list[i]->enqueue_cb != NULL) { user_events = event_wait_list[i]->enqueue_cb->wait_user_events; while(user_events != NULL) { @@ -293,10 +328,10 @@ void cl_event_new_enqueue_callback(cl_event event, u_ev->next = cb->wait_user_events; cb->wait_user_events = u_ev; user_events = user_events->next; + cl_command_queue_insert_event(event->queue, event_wait_list[i]); } } } - if(data->queue != NULL && event->gpgpu_event != NULL) { cl_gpgpu_event_pending(data->queue->gpgpu, event->gpgpu_event); data->ptr = (void *)event->gpgpu_event; @@ -403,8 +438,12 @@ void cl_event_set_status(cl_event event, cl_int status) continue; } + //remove user event frome enqueue_cb's ctx + cl_command_queue_remove_event(enqueue_cb->event->queue, event); + /* All user events complete, now wait enqueue events */ - ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb->wait_list); + ret = cl_event_wait_events(enqueue_cb->num_events, enqueue_cb->wait_list, + enqueue_cb->event->queue); assert(ret != CL_ENQUEUE_EXECUTE_DEFER); cb = enqueue_cb; @@ -428,3 +467,26 @@ void cl_event_update_status(cl_event event) (cl_gpgpu_event_update_status(event->gpgpu_event, 0) == command_complete)) cl_event_set_status(event, CL_COMPLETE); } + +cl_int cl_event_marker(cl_command_queue queue, cl_event* event) +{ + enqueue_data data; + + *event = cl_event_new(queue->ctx, queue, CL_COMMAND_MARKER, CL_TRUE); + if(event == NULL) + return CL_OUT_OF_HOST_MEMORY; + + //if wait_events_num>0, the marker event need wait queue->wait_events + if(queue->wait_events_num > 0) { + data.type = EnqueueMarker; + cl_event_new_enqueue_callback(*event, &data, queue->wait_events_num, queue->wait_events); + return CL_SUCCESS; + } + + if(queue->last_event && queue->last_event->gpgpu_event) { + cl_gpgpu_event_update_status(queue->last_event->gpgpu_event, 1); + } + + cl_event_set_status(*event, CL_COMPLETE); + return CL_SUCCESS; +} diff --git a/src/cl_event.h b/src/cl_event.h index 8523abe..7dde24b 100644 --- a/src/cl_event.h +++ b/src/cl_event.h @@ -22,9 +22,9 @@ #include -#include "cl_enqueue.h" #include "cl_internals.h" #include "cl_driver.h" +#include "cl_enqueue.h" #include "CL/cl.h" #define CL_ENQUEUE_EXECUTE_IMM 0 @@ -81,12 +81,14 @@ cl_int cl_event_set_callback(cl_event, cl_int, EVENT_NOTIFY, void *); /* Check events wait list for enqueue commonds */ cl_int cl_event_check_waitlist(cl_uint, const cl_event *, cl_event *, cl_context); /* Wait the all events in wait list complete */ -cl_int cl_event_wait_events(cl_uint, const cl_event *); +cl_int cl_event_wait_events(cl_uint, const cl_event *, cl_command_queue); /* New a enqueue suspend task */ void cl_event_new_enqueue_callback(cl_event, enqueue_data *, cl_uint, const cl_event *); /* Set the event status and call all callbacks */ void cl_event_set_status(cl_event, cl_int); /* Check and update event status */ void cl_event_update_status(cl_event); +/* Create the marker event */ +cl_int cl_event_marker(cl_command_queue, cl_event*); #endif /* __CL_EVENT_H__ */ -- 2.7.4