From 6a0a20aa37fd62866245c8e92b255c17f92f67c5 Mon Sep 17 00:00:00 2001 From: Zhigang Gong Date: Fri, 13 Sep 2013 16:54:12 +0800 Subject: [PATCH] Runtime: Implement CL_MEM_USE_HOST_PTR flag for image. Signed-off-by: Zhigang Gong Reviewed-by: "Yang, Rong R" --- src/cl_enqueue.c | 37 +++++++++++---------------------- src/cl_mem.c | 63 ++++++++++++++++++++++++++++++++++++-------------------- src/cl_mem.h | 6 ++++++ 3 files changed, 59 insertions(+), 47 deletions(-) diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c index 585fcd7..0330691 100644 --- a/src/cl_enqueue.c +++ b/src/cl_enqueue.c @@ -210,37 +210,16 @@ cl_int cl_enqueue_write_image(enqueue_data *data) cl_mem mem = data->mem_obj; CHECK_IMAGE(mem, image); - const size_t *origin = data->origin; - const size_t *region = data->region; if (!(dst_ptr = cl_mem_map_auto(mem))) { err = CL_MAP_FAILURE; goto error; } - size_t offset = image->bpp*origin[0] + image->row_pitch*origin[1] + image->slice_pitch*origin[2]; - dst_ptr = (char*)dst_ptr + offset; - - if (!origin[0] && region[0] == image->w && data->row_pitch == image->row_pitch && - (region[2] == 1 || (!origin[1] && region[1] == image->h && data->slice_pitch == image->slice_pitch))) - { - memcpy(dst_ptr, data->const_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]); - } - else { - cl_uint y, z; - for (z = 0; z < region[2]; z++) { - const char* src = data->const_ptr; - char* dst = dst_ptr; - for (y = 0; y < region[1]; y++) { - memcpy(dst, src, image->bpp*region[0]); - src += data->row_pitch; - dst += image->row_pitch; - } - data->const_ptr = (char*)data->const_ptr + data->slice_pitch; - dst_ptr = (char*)dst_ptr + image->slice_pitch; - } - } - + cl_mem_copy_image_region(data->origin, data->region, dst_ptr, + image->row_pitch, image->slice_pitch, + data->const_ptr, data->row_pitch, + data->slice_pitch, image); err = cl_mem_unmap_auto(mem); error: @@ -276,6 +255,7 @@ cl_int cl_enqueue_map_image(enqueue_data *data) cl_int err = CL_SUCCESS; cl_mem mem = data->mem_obj; void *ptr = NULL; + CHECK_IMAGE(mem, image); if (!(ptr = cl_mem_map_gtt(mem))) { err = CL_MAP_FAILURE; @@ -284,6 +264,13 @@ cl_int cl_enqueue_map_image(enqueue_data *data) assert(data->ptr == (char*)ptr + data->offset); + if(mem->flags & CL_MEM_USE_HOST_PTR) { + assert(mem->host_ptr); + cl_mem_copy_image_region(data->origin, data->region, + mem->host_ptr, image->host_row_pitch, image->host_slice_pitch, + data->ptr, data->row_pitch, data->slice_pitch, image); + } + error: return err; } diff --git a/src/cl_mem.c b/src/cl_mem.c index 25dcc15..d901f9f 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -329,34 +329,47 @@ error: goto exit; } -static void -cl_mem_copy_image(struct _cl_mem_image *image, - size_t row_pitch, - size_t slice_pitch, - void* host_ptr) +void +cl_mem_copy_image_region(const size_t *origin, const size_t *region, + void *dst, size_t dst_row_pitch, size_t dst_slice_pitch, + const void *src, size_t src_row_pitch, size_t src_slice_pitch, + const struct _cl_mem_image *image) { - char* dst_ptr = cl_mem_map_auto((cl_mem)image); - - if (row_pitch == image->row_pitch && - (image->depth == 1 || slice_pitch == image->slice_pitch)) + size_t offset = image->bpp * origin[0] + dst_row_pitch * origin[1] + dst_slice_pitch * origin[2]; + dst = (char*)dst + offset; + if (!origin[0] && region[0] == image->w && dst_row_pitch == src_row_pitch && + (region[2] == 1 || (!origin[1] && region[1] == image->h && dst_slice_pitch == src_slice_pitch))) { - memcpy(dst_ptr, host_ptr, image->depth == 1 ? row_pitch*image->h : slice_pitch*image->depth); + memcpy(dst, src, region[2] == 1 ? src_row_pitch*region[1] : src_slice_pitch*region[2]); } else { - size_t y, z; - for (z = 0; z < image->depth; z++) { - const char* src = host_ptr; - char* dst = dst_ptr; - for (y = 0; y < image->h; y++) { - memcpy(dst, src, image->bpp*image->w); - src += row_pitch; - dst += image->row_pitch; + cl_uint y, z; + for (z = 0; z < region[2]; z++) { + const char* src_ptr = src; + char* dst_ptr = dst; + for (y = 0; y < region[1]; y++) { + memcpy(dst_ptr, src_ptr, image->bpp*region[0]); + src_ptr += src_row_pitch; + dst_ptr += dst_row_pitch; } - host_ptr = (char*)host_ptr + slice_pitch; - dst_ptr = (char*)dst_ptr + image->slice_pitch; + src = (char*)src + src_slice_pitch; + dst = (char*)dst + dst_slice_pitch; } } +} + +static void +cl_mem_copy_image(struct _cl_mem_image *image, + size_t row_pitch, + size_t slice_pitch, + void* host_ptr) +{ + char* dst_ptr = cl_mem_map_auto((cl_mem)image); + size_t origin[3] = {0, 0, 0}; + size_t region[3] = {image->w, image->h, image->depth}; + cl_mem_copy_image_region(origin, region, dst_ptr, image->row_pitch, image->slice_pitch, + host_ptr, row_pitch, slice_pitch, image); cl_mem_unmap_auto((cl_mem)image); } @@ -386,7 +399,7 @@ _cl_mem_new_image(cl_context ctx, cl_image_tiling_t tiling = CL_NO_TILE; /* Check flags consistency */ - if (UNLIKELY((flags & CL_MEM_COPY_HOST_PTR) && data == NULL)) { + if (UNLIKELY((flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) && data == NULL)) { err = CL_INVALID_HOST_PTR; goto error; } @@ -473,8 +486,14 @@ _cl_mem_new_image(cl_context ctx, 0, 0, 0); /* Copy the data if required */ - if (flags & CL_MEM_COPY_HOST_PTR) + if (flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_USE_HOST_PTR)) { cl_mem_copy_image(cl_mem_image(mem), pitch, slice_pitch, data); + if (flags & CL_MEM_USE_HOST_PTR) { + mem->host_ptr = data; + cl_mem_image(mem)->host_row_pitch = pitch; + cl_mem_image(mem)->host_slice_pitch = slice_pitch; + } + } exit: if (errcode_ret) diff --git a/src/cl_mem.h b/src/cl_mem.h index 2619385..ac09c3b 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -97,6 +97,7 @@ struct _cl_mem_image { cl_mem_object_type image_type; /* only for images 1D/2D...*/ size_t w, h, depth; /* only for images (depth is only for 3D images) */ size_t row_pitch, slice_pitch; + size_t host_row_pitch, host_slice_pitch; cl_image_tiling_t tiling; /* only IVB+ supports TILE_[X,Y] (image only) */ size_t tile_x, tile_y; /* tile offset, used for mipmap images. */ size_t offset; @@ -236,6 +237,11 @@ cl_mem_allocate(enum cl_mem_type type, cl_int is_tiled, cl_int *errcode); +void +cl_mem_copy_image_region(const size_t *origin, const size_t *region, + void *dst, size_t dst_row_pitch, size_t dst_slice_pitch, + const void *src, size_t src_row_pitch, size_t src_slice_pitch, + const struct _cl_mem_image *image); #endif /* __CL_MEM_H__ */ -- 2.7.4