From 1ea196653abe759239896580ec89702c2e5f9a28 Mon Sep 17 00:00:00 2001 From: Junyan He Date: Fri, 12 Jul 2013 14:31:14 +0800 Subject: [PATCH] Improve the clEnqueueMapBuffer and clCreateBuffer API In clCreateBuffer API, add the CL_MEM_ALLOC_HOST_PTR and CL_MEM_USE_HOST_PTR flag support. CL_MEM_ALLOC_HOST_PTR flag seem nothings special to do. CL_MEM_USE_HOST_PTR flag will request clEnqueueMapBuffer API: 1> The host_ptr specified in clCreateBuffer is guaranteed to contain the latest bits in the region being mapped when the clEnqueueMapBuffer command has completed. 2> The pointer value returned by clEnqueueMapBuffer will be derived from the host_ptr specified when the buffer object is created. We improve the clEnqueueMapBuffer to setup a map for the mapped address and do the data sync problem based on the address when mapped and unmapped. Signed-off-by: Junyan He Tested-by: Zhigang Gong --- src/cl_api.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- src/cl_mem.c | 43 ++++++++++++++++---- src/cl_mem.h | 10 +++++ 3 files changed, 169 insertions(+), 9 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 277f7a9..20cbc1e 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -1470,7 +1470,9 @@ clEnqueueMapBuffer(cl_command_queue command_queue, cl_int * errcode_ret) { void *ptr = NULL; + void *mem_ptr = NULL; cl_int err = CL_SUCCESS; + int slot = -1; CHECK_QUEUE(command_queue); CHECK_MEM(buffer); @@ -1503,10 +1505,66 @@ clEnqueueMapBuffer(cl_command_queue command_queue, ptr = (char*)ptr + offset; + if(buffer->flags & CL_MEM_USE_HOST_PTR) { + assert(buffer->host_ptr); + memcpy(buffer->host_ptr + offset, ptr, size); + mem_ptr = buffer->host_ptr + offset; + } else { + mem_ptr = ptr; + } + + /* Record the mapped address. */ + if (!buffer->mapped_ptr_sz) { + buffer->mapped_ptr_sz = 16; + buffer->mapped_ptr = (cl_mapped_ptr *)malloc( + sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz); + if (!buffer->mapped_ptr) { + cl_mem_unmap_auto (buffer); + err = CL_OUT_OF_HOST_MEMORY; + ptr = NULL; + goto error; + } + + memset(buffer->mapped_ptr, 0, buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr)); + slot = 0; + } else { + int i = 0; + for (; i < buffer->mapped_ptr_sz; i++) { + if (buffer->mapped_ptr[i].ptr == NULL) { + slot = i; + break; + } + } + + if (i == buffer->mapped_ptr_sz) { + cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc( + sizeof(cl_mapped_ptr) * buffer->mapped_ptr_sz * 2); + if (!new_ptr) { + cl_mem_unmap_auto (buffer); + err = CL_OUT_OF_HOST_MEMORY; + ptr = NULL; + goto error; + } + memset(new_ptr, 0, 2 * buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr)); + memcpy(new_ptr, buffer->mapped_ptr, + buffer->mapped_ptr_sz * sizeof(cl_mapped_ptr)); + slot = buffer->mapped_ptr_sz; + buffer->mapped_ptr_sz *= 2; + free(buffer->mapped_ptr); + buffer->mapped_ptr = new_ptr; + } + } + + assert(slot != -1); + buffer->mapped_ptr[slot].ptr = mem_ptr; + buffer->mapped_ptr[slot].v_ptr = ptr; + buffer->mapped_ptr[slot].size = size; + buffer->map_ref++; + error: if (errcode_ret) *errcode_ret = err; - return ptr; + return mem_ptr; } void * @@ -1581,7 +1639,70 @@ clEnqueueUnmapMemObject(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - return cl_mem_unmap_auto(memobj); + cl_int err = CL_SUCCESS; + int i; + size_t mapped_size = 0; + void * v_ptr = NULL; + + CHECK_QUEUE(command_queue); + CHECK_MEM(memobj); + if (command_queue->ctx != memobj->ctx) { + err = CL_INVALID_CONTEXT; + goto error; + } + + assert(memobj->mapped_ptr_sz >= memobj->map_ref); + INVALID_VALUE_IF(!mapped_ptr); + for (i = 0; i < memobj->mapped_ptr_sz; i++) { + if (memobj->mapped_ptr[i].ptr == mapped_ptr) { + memobj->mapped_ptr[i].ptr = NULL; + mapped_size = memobj->mapped_ptr[i].size; + v_ptr = memobj->mapped_ptr[i].v_ptr; + memobj->mapped_ptr[i].size = 0; + memobj->mapped_ptr[i].v_ptr = NULL; + memobj->map_ref--; + break; + } + } + /* can not find a mapped address? */ + INVALID_VALUE_IF(i == memobj->mapped_ptr_sz); + + if (memobj->flags & CL_MEM_USE_HOST_PTR) { + assert(mapped_ptr >= memobj->host_ptr && + mapped_ptr + mapped_size <= memobj->host_ptr + memobj->size); + /* Sync the data. */ + memcpy(v_ptr, mapped_ptr, mapped_size); + } else { + assert(v_ptr == mapped_ptr); + } + + cl_mem_unmap_auto(memobj); + + /* shrink the mapped slot. */ + if (memobj->mapped_ptr_sz/2 > memobj->map_ref) { + int j = 0; + cl_mapped_ptr *new_ptr = (cl_mapped_ptr *)malloc( + sizeof(cl_mapped_ptr) * (memobj->mapped_ptr_sz/2)); + if (!new_ptr) { + /* Just do nothing. */ + goto error; + } + memset(new_ptr, 0, (memobj->mapped_ptr_sz/2) * sizeof(cl_mapped_ptr)); + + for (i = 0; i < memobj->mapped_ptr_sz; i++) { + if (memobj->mapped_ptr[i].ptr) { + new_ptr[j] = memobj->mapped_ptr[i]; + j++; + assert(j < memobj->mapped_ptr_sz/2); + } + } + memobj->mapped_ptr_sz = memobj->mapped_ptr_sz/2; + free(memobj->mapped_ptr); + memobj->mapped_ptr = new_ptr; + } + +error: + return err; } cl_int diff --git a/src/cl_mem.c b/src/cl_mem.c index 064ecb3..9691ba3 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -157,10 +157,6 @@ cl_mem_allocate(cl_context ctx, cl_ulong max_mem_size; assert(ctx); - FATAL_IF (flags & CL_MEM_ALLOC_HOST_PTR, - "CL_MEM_ALLOC_HOST_PTR unsupported"); /* XXX */ - FATAL_IF (flags & CL_MEM_USE_HOST_PTR, - "CL_MEM_USE_HOST_PTR unsupported"); /* XXX */ if ((err = cl_get_device_info(ctx->device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, @@ -223,11 +219,35 @@ cl_mem_new(cl_context ctx, void *data, cl_int *errcode_ret) { + /* Possible mem type combination: + CL_MEM_ALLOC_HOST_PTR + CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR + CL_MEM_USE_HOST_PTR + CL_MEM_COPY_HOST_PTR */ + cl_int err = CL_SUCCESS; cl_mem mem = NULL; - /* Check flags consistency */ - if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && data == NULL)) { + /* This flag is valid only if host_ptr is not NULL */ + if (UNLIKELY((flags & CL_MEM_COPY_HOST_PTR || + flags & CL_MEM_USE_HOST_PTR) && + data == NULL)) { + err = CL_INVALID_HOST_PTR; + goto error; + } + + /* CL_MEM_ALLOC_HOST_PTR and CL_MEM_USE_HOST_PTR + are mutually exclusive. */ + if (UNLIKELY(flags & CL_MEM_ALLOC_HOST_PTR && + flags & CL_MEM_USE_HOST_PTR)) { + err = CL_INVALID_HOST_PTR; + goto error; + } + + /* CL_MEM_COPY_HOST_PTR and CL_MEM_USE_HOST_PTR + are mutually exclusive. */ + if (UNLIKELY(flags & CL_MEM_COPY_HOST_PTR && + flags & CL_MEM_USE_HOST_PTR)) { err = CL_INVALID_HOST_PTR; goto error; } @@ -238,9 +258,12 @@ cl_mem_new(cl_context ctx, goto error; /* Copy the data if required */ - if (flags & CL_MEM_COPY_HOST_PTR) /* TODO check other flags too */ + if (flags & CL_MEM_COPY_HOST_PTR || flags & CL_MEM_USE_HOST_PTR) cl_buffer_subdata(mem->bo, 0, sz, data); + if (flags & CL_MEM_USE_HOST_PTR) + mem->host_ptr = data; + exit: if (errcode_ret) *errcode_ret = err; @@ -469,6 +492,12 @@ cl_mem_delete(cl_mem mem) pthread_mutex_unlock(&mem->ctx->buffer_lock); cl_context_delete(mem->ctx); + /* Someone still mapped? */ + assert(!mem->map_ref); + + if (mem->mapped_ptr) + free(mem->mapped_ptr); + cl_free(mem); } diff --git a/src/cl_mem.h b/src/cl_mem.h index c63bf6c..66518a6 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -49,6 +49,12 @@ typedef enum cl_image_tiling { CL_TILE_Y = 2 } cl_image_tiling_t; +typedef struct _cl_mapped_ptr { + void * ptr; + void * v_ptr; + size_t size; +}cl_mapped_ptr; + /* Used for buffers and images */ struct _cl_mem { DEFINE_ICD(dispatch) @@ -68,6 +74,10 @@ struct _cl_mem { uint32_t intel_fmt; /* format to provide in the surface state */ uint32_t bpp; /* number of bytes per pixel */ cl_image_tiling_t tiling; /* only IVB+ supports TILE_[X,Y] (image only) */ + void * host_ptr; /* Pointer of the host mem specified by CL_MEM_ALLOC_HOST_PTR */ + cl_mapped_ptr* mapped_ptr;/* Store the mapped addresses and size by caller. */ + int mapped_ptr_sz; /* The array size of mapped_ptr. */ + int map_ref; /* The mapped count. */ }; /* Query information about a memory object */ -- 2.7.4