From 79270b1bec2d85e86eab077c397bf05cbc4922a8 Mon Sep 17 00:00:00 2001 From: Junyan He Date: Thu, 7 Nov 2013 16:44:39 +0800 Subject: [PATCH] Implement the clCreateSubBuffer API Signed-off-by: Junyan He Reviewed-by: Homer Hsing Reviewed-by: Zhigang Gong --- src/cl_api.c | 44 ++++++++++++++---- src/cl_command_queue.c | 7 ++- src/cl_driver.h | 2 +- src/cl_enqueue.c | 29 ++++++++---- src/cl_mem.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++-- src/cl_mem.h | 10 ++++- 6 files changed, 189 insertions(+), 23 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 033e279..6acc1a2 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -482,13 +482,17 @@ clCreateSubBuffer(cl_mem buffer, const void * buffer_create_info, cl_int * errcode_ret) { -#if 0 + cl_mem mem = NULL; cl_int err = CL_SUCCESS; - CHECK_MEM (buffer); - NOT_IMPLEMENTED; + + CHECK_MEM(buffer); + + mem = cl_mem_new_sub_buffer(buffer, flags, buffer_create_type, + buffer_create_info, &err); error: -#endif - return NULL; + if (errcode_ret) + *errcode_ret = err; + return mem; } cl_mem @@ -1594,7 +1598,7 @@ clEnqueueCopyBuffer(cl_command_queue command_queue, err = CL_INVALID_VALUE; goto error; } - if (dst_offset < 0 || dst_offset + cb > src_buffer->size) { + if (dst_offset < 0 || dst_offset + cb > dst_buffer->size) { err = CL_INVALID_VALUE; goto error; } @@ -1607,7 +1611,22 @@ clEnqueueCopyBuffer(cl_command_queue command_queue, goto error; } - // TODO: Need to check the sub buffer cases. + /* Check sub overlap */ + if (src_buffer->type == CL_MEM_SUBBUFFER_TYPE && dst_buffer->type == CL_MEM_SUBBUFFER_TYPE ) { + struct _cl_mem_buffer* src_b = (struct _cl_mem_buffer*)src_buffer; + struct _cl_mem_buffer* dst_b = (struct _cl_mem_buffer*)dst_buffer; + size_t src_sub_offset = src_b->sub_offset; + size_t dst_sub_offset = dst_b->sub_offset; + + if ((src_offset + src_sub_offset <= dst_offset + dst_sub_offset + && dst_offset + dst_sub_offset <= src_offset + src_sub_offset + cb - 1) + && (dst_offset + dst_sub_offset <= src_offset + src_sub_offset + && src_offset + src_sub_offset <= dst_offset + dst_sub_offset + cb - 1)) { + err = CL_MEM_COPY_OVERLAP; + goto error; + } + } + err = cl_mem_copy(command_queue, src_buffer, dst_buffer, src_offset, dst_offset, cb); TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_buffer->ctx); @@ -2066,15 +2085,22 @@ static cl_int _cl_map_mem(cl_mem mem, void **ptr, void **mem_ptr, size_t offset, { cl_int slot = -1; int err = CL_SUCCESS; + size_t sub_offset = 0; + + if(mem->type == CL_MEM_SUBBUFFER_TYPE) { + struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; + sub_offset = buffer->sub_offset; + } + if (!(*ptr = cl_mem_map_gtt_unsync(mem))) { err = CL_MAP_FAILURE; goto error; } - *ptr = (char*)(*ptr) + offset; + *ptr = (char*)(*ptr) + offset + sub_offset; if(mem->flags & CL_MEM_USE_HOST_PTR) { assert(mem->host_ptr); //only calc ptr here, will do memcpy in enqueue - *mem_ptr = mem->host_ptr + offset; + *mem_ptr = mem->host_ptr + offset + sub_offset; } else { *mem_ptr = *ptr; } diff --git a/src/cl_command_queue.c b/src/cl_command_queue.c index 768ef0c..3f9d95c 100644 --- a/src/cl_command_queue.c +++ b/src/cl_command_queue.c @@ -145,7 +145,12 @@ cl_command_queue_bind_surface(cl_command_queue queue, cl_kernel k) if (arg_type != GBE_ARG_GLOBAL_PTR || !k->args[i].mem) continue; offset = gbe_kernel_get_curbe_offset(k->opaque, GBE_CURBE_KERNEL_ARGUMENT, i); - cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, cc_llc_l3); + if (k->args[i].mem->type == CL_MEM_SUBBUFFER_TYPE) { + struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)k->args[i].mem; + cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, buffer->sub_offset, cc_llc_l3); + } else { + cl_gpgpu_bind_buf(queue->gpgpu, k->args[i].mem->bo, offset, 0, cc_llc_l3); + } } return CL_SUCCESS; diff --git a/src/cl_driver.h b/src/cl_driver.h index 1b18e87..8efe1e7 100644 --- a/src/cl_driver.h +++ b/src/cl_driver.h @@ -99,7 +99,7 @@ typedef cl_gpgpu (cl_gpgpu_sync_cb)(cl_gpgpu); extern cl_gpgpu_sync_cb *cl_gpgpu_sync; /* Bind a regular unformatted buffer */ -typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu, cl_buffer, uint32_t offset, uint32_t cchint); +typedef void (cl_gpgpu_bind_buf_cb)(cl_gpgpu, cl_buffer, uint32_t offset, uint32_t internal_offset, uint32_t cchint); extern cl_gpgpu_bind_buf_cb *cl_gpgpu_bind_buf; /* bind samplers defined in both kernel and kernel args. */ diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c index 0330691..070fd98 100644 --- a/src/cl_enqueue.c +++ b/src/cl_enqueue.c @@ -30,14 +30,18 @@ cl_int cl_enqueue_read_buffer(enqueue_data* data) { cl_int err = CL_SUCCESS; + cl_mem mem = data->mem_obj; + assert(mem->type == CL_MEM_BUFFER_TYPE || + mem->type == CL_MEM_SUBBUFFER_TYPE); void* src_ptr; + struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; if (!(src_ptr = cl_mem_map_auto(data->mem_obj))) { err = CL_MAP_FAILURE; goto error; } - memcpy(data->ptr, (char*)src_ptr + data->offset, data->size); + memcpy(data->ptr, (char*)src_ptr + data->offset + buffer->sub_offset, data->size); err = cl_mem_unmap_auto(data->mem_obj); @@ -95,6 +99,10 @@ error: cl_int cl_enqueue_write_buffer(enqueue_data *data) { cl_int err = CL_SUCCESS; + cl_mem mem = data->mem_obj; + assert(mem->type == CL_MEM_BUFFER_TYPE || + mem->type == CL_MEM_SUBBUFFER_TYPE); + struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; void* dst_ptr; if (!(dst_ptr = cl_mem_map_auto(data->mem_obj))) { @@ -102,7 +110,7 @@ cl_int cl_enqueue_write_buffer(enqueue_data *data) goto error; } - memcpy((char*)dst_ptr + data->offset, data->const_ptr, data->size); + memcpy((char*)dst_ptr + data->offset + buffer->sub_offset, data->const_ptr, data->size); err = cl_mem_unmap_auto(data->mem_obj); @@ -231,19 +239,23 @@ cl_int cl_enqueue_map_buffer(enqueue_data *data) { void *ptr = NULL; cl_int err = CL_SUCCESS; - cl_mem buffer = data->mem_obj; + cl_mem mem = data->mem_obj; + assert(mem->type == CL_MEM_BUFFER_TYPE || + mem->type == CL_MEM_SUBBUFFER_TYPE); + struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; + //because using unsync map in clEnqueueMapBuffer, so force use map_gtt here - if (!(ptr = cl_mem_map_gtt(buffer))) { + if (!(ptr = cl_mem_map_gtt(mem))) { err = CL_MAP_FAILURE; goto error; } - ptr = (char*)ptr + data->offset; + ptr = (char*)ptr + data->offset + buffer->sub_offset; assert(data->ptr == ptr); - if(buffer->flags & CL_MEM_USE_HOST_PTR) { - assert(buffer->host_ptr); - memcpy(buffer->host_ptr + data->offset, ptr, data->size); + if(mem->flags & CL_MEM_USE_HOST_PTR) { + assert(mem->host_ptr); + memcpy(mem->host_ptr + data->offset, ptr, data->size); } error: @@ -386,6 +398,7 @@ cl_int cl_enqueue_handle(enqueue_data* data) case EnqueueUnmapMemObject: return cl_enqueue_unmap_mem_object(data); case EnqueueCopyBufferRect: + case EnqueueCopyBuffer: case EnqueueCopyImage: case EnqueueCopyBufferToImage: case EnqueueCopyImageToBuffer: diff --git a/src/cl_mem.c b/src/cl_mem.c index b4c709d..00ef30d 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -52,7 +52,9 @@ static cl_mem_object_type cl_get_mem_object_type(cl_mem mem) { switch (mem->type) { - case CL_MEM_BUFFER_TYPE: return CL_MEM_OBJECT_BUFFER; + case CL_MEM_BUFFER_TYPE: + case CL_MEM_SUBBUFFER_TYPE: + return CL_MEM_OBJECT_BUFFER; case CL_MEM_IMAGE_TYPE: case CL_MEM_GL_IMAGE_TYPE: { @@ -329,6 +331,102 @@ error: goto exit; } +LOCAL cl_mem +cl_mem_new_sub_buffer(cl_mem buffer, + cl_mem_flags flags, + cl_buffer_create_type create_type, + const void *create_info, + cl_int *errcode_ret) +{ + cl_int err = CL_SUCCESS; + cl_mem mem = NULL; + struct _cl_mem_buffer *sub_buf = NULL; + + if (buffer->type != CL_MEM_BUFFER_TYPE) { + err = CL_INVALID_MEM_OBJECT; + goto error; + } + + if (flags && (((buffer->flags & CL_MEM_WRITE_ONLY) && (flags & (CL_MEM_READ_WRITE|CL_MEM_READ_ONLY))) + || ((buffer->flags & CL_MEM_READ_ONLY) && (flags & (CL_MEM_READ_WRITE|CL_MEM_WRITE_ONLY))) + || (flags & (CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR)))) { + err = CL_INVALID_VALUE; + goto error; + } + + if (create_type != CL_BUFFER_CREATE_TYPE_REGION) { + err = CL_INVALID_VALUE; + goto error; + } + + if (!create_info) { + err = CL_INVALID_VALUE; + goto error; + } + + cl_buffer_region *info = (cl_buffer_region *)create_info; + + if (!info->size) { + err = CL_INVALID_BUFFER_SIZE; + goto error; + } + + if (info->origin > buffer->size || info->origin + info->size > buffer->size) { + err = CL_INVALID_VALUE; + goto error; + } + + if (info->origin & (buffer->ctx->device->mem_base_addr_align - 1)) { + err = CL_MISALIGNED_SUB_BUFFER_OFFSET; + goto error; + } + + /* Now create the sub buffer and link it to the buffer. */ + TRY_ALLOC (sub_buf, CALLOC(struct _cl_mem_buffer)); + mem = &sub_buf->base; + mem->type = CL_MEM_SUBBUFFER_TYPE; + SET_ICD(mem->dispatch) + mem->ref_n = 1; + mem->magic = CL_MAGIC_MEM_HEADER; + mem->flags = flags; + sub_buf->parent = (struct _cl_mem_buffer*)buffer; + + cl_mem_add_ref(buffer); + /* Append the buffer in the parent buffer list */ + pthread_mutex_lock(&((struct _cl_mem_buffer*)buffer)->sub_lock); + sub_buf->sub_next = ((struct _cl_mem_buffer*)buffer)->subs; + if (((struct _cl_mem_buffer*)buffer)->subs != NULL) + ((struct _cl_mem_buffer*)buffer)->subs->sub_prev = sub_buf; + ((struct _cl_mem_buffer*)buffer)->subs = sub_buf; + pthread_mutex_unlock(&((struct _cl_mem_buffer*)buffer)->sub_lock); + + mem->bo = buffer->bo; + mem->size = info->size; + sub_buf->sub_offset = info->origin; + if (buffer->flags & CL_MEM_USE_HOST_PTR || buffer->flags & CL_MEM_COPY_HOST_PTR) { + mem->host_ptr = buffer->host_ptr; + } + + cl_context_add_ref(buffer->ctx); + mem->ctx = buffer->ctx; + /* Append the buffer in the context buffer list */ + pthread_mutex_lock(&buffer->ctx->buffer_lock); + mem->next = buffer->ctx->buffers; + if (buffer->ctx->buffers != NULL) + buffer->ctx->buffers->prev = mem; + buffer->ctx->buffers = mem; + pthread_mutex_unlock(&buffer->ctx->buffer_lock); + +exit: + if (errcode_ret) + *errcode_ret = err; + return mem; +error: + cl_mem_delete(mem); + mem = NULL; + goto exit; +} + void cl_mem_copy_image_region(const size_t *origin, const size_t *region, void *dst, size_t dst_row_pitch, size_t dst_slice_pitch, @@ -546,8 +644,6 @@ cl_mem_delete(cl_mem mem) cl_mem_gl_delete(cl_mem_gl_image(mem)); } #endif - if (LIKELY(mem->bo != NULL)) - cl_buffer_unreference(mem->bo); /* Remove it from the list */ assert(mem->ctx); @@ -586,6 +682,24 @@ cl_mem_delete(cl_mem mem) } } + /* Iff we are sub, do nothing for bo release. */ + if (mem->type == CL_MEM_SUBBUFFER_TYPE) { + struct _cl_mem_buffer* buffer = (struct _cl_mem_buffer*)mem; + /* Remove it from the parent's list */ + assert(buffer->parent); + pthread_mutex_lock(&buffer->parent->sub_lock); + if (buffer->sub_prev) + buffer->sub_prev->sub_next = buffer->sub_next; + if (buffer->sub_next) + buffer->sub_next->sub_prev = buffer->sub_prev; + if (buffer->parent->subs == buffer) + buffer->parent->subs = buffer->sub_next; + pthread_mutex_unlock(&buffer->parent->sub_lock); + cl_mem_delete((cl_mem )(buffer->parent)); + } else if (LIKELY(mem->bo != NULL)) { + cl_buffer_unreference(mem->bo); + } + cl_free(mem); } diff --git a/src/cl_mem.h b/src/cl_mem.h index 75d5cf4..e325fa1 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -66,6 +66,7 @@ typedef struct _cl_mem_dstr_cb { /* Used for buffers and images */ enum cl_mem_type { CL_MEM_BUFFER_TYPE, + CL_MEM_SUBBUFFER_TYPE, CL_MEM_IMAGE_TYPE, CL_MEM_GL_IMAGE_TYPE, }; @@ -137,7 +138,11 @@ cl_mem_image_init(struct _cl_mem_image *image, size_t w, size_t h, struct _cl_mem_buffer { _cl_mem base; - size_t offset; + struct _cl_mem_buffer* subs; /* Sub buf objects. */ + size_t sub_offset; /* The sub start offset. */ + struct _cl_mem_buffer* sub_prev, *sub_next;/* We chain the sub memory buffers together */ + pthread_mutex_t sub_lock; /* Sub buffers list lock*/ + struct _cl_mem_buffer* parent; /* Point to the parent buffer if is sub-buffer */ }; inline static struct _cl_mem_image * @@ -170,6 +175,9 @@ extern cl_int cl_get_image_info(cl_mem, cl_image_info, size_t, void *, size_t *) /* Create a new memory object and initialize it with possible user data */ extern cl_mem cl_mem_new_buffer(cl_context, cl_mem_flags, size_t, void*, cl_int*); +/* Create a new sub memory object */ +extern cl_mem cl_mem_new_sub_buffer(cl_mem, cl_mem_flags, cl_buffer_create_type, const void *, cl_int *); + /* Idem but this is an image */ extern cl_mem cl_mem_new_image(cl_context context, -- 2.7.4