From cca4b253d88cbdbe9737c9c6071bde81f7b85e9e Mon Sep 17 00:00:00 2001 From: Luo Date: Tue, 24 Jun 2014 10:09:12 +0800 Subject: [PATCH] add cpu copy for 1Darray and 2darray related copy APIs. detail cases: 1Darray, 2Darray, 2Darrayto2D, 2Darrayto3D, 2Dto2Darray, 3Dto2Darray. 1d used gpu copy. v2: fixed 1d array to 1d array copy, don't need to switch depth and height. Signed-off-by: Luo Reviewed-by: Zhigang Gong Signed-off-by: Zhigang Gong --- src/CMakeLists.txt | 4 +- src/cl_context.h | 1 + src/cl_mem.c | 69 ++++++++++++++++++++++++-- src/cl_mem.h | 4 ++ src/kernels/cl_internal_copy_image_1d_to_1d.cl | 19 +++++++ 5 files changed, 91 insertions(+), 6 deletions(-) create mode 100644 src/kernels/cl_internal_copy_image_1d_to_1d.cl diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7ae84fe..46426d9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -41,8 +41,8 @@ set (KERNEL_STR_FILES) set (KERNEL_NAMES cl_internal_copy_buf_align4 cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset -cl_internal_copy_buf_rect cl_internal_copy_image_2d_to_2d cl_internal_copy_image_3d_to_2d -cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d +cl_internal_copy_buf_rect cl_internal_copy_image_1d_to_1d cl_internal_copy_image_2d_to_2d +cl_internal_copy_image_3d_to_2d cl_internal_copy_image_2d_to_3d cl_internal_copy_image_3d_to_3d cl_internal_copy_image_2d_to_buffer cl_internal_copy_image_3d_to_buffer cl_internal_copy_buffer_to_image_2d cl_internal_copy_buffer_to_image_3d cl_internal_fill_buf_align8 cl_internal_fill_buf_align4 diff --git a/src/cl_context.h b/src/cl_context.h index 0e4db73..75afbf6 100644 --- a/src/cl_context.h +++ b/src/cl_context.h @@ -47,6 +47,7 @@ enum _cl_internal_ker_type { CL_ENQUEUE_COPY_BUFFER_UNALIGN_DST_OFFSET, CL_ENQUEUE_COPY_BUFFER_UNALIGN_SRC_OFFSET, CL_ENQUEUE_COPY_BUFFER_RECT, + CL_ENQUEUE_COPY_IMAGE_1D_TO_1D, //copy image 1d to image 1d CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, //copy image 2d to image 2d CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, //copy image 3d to image 2d CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, //copy image 2d to image 3d diff --git a/src/cl_mem.c b/src/cl_mem.c index f860b38..05ca9f1 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -546,6 +546,34 @@ cl_mem_copy_image_region(const size_t *origin, const size_t *region, } } +void +cl_mem_copy_image_to_image(const size_t *dst_origin,const size_t *src_origin, const size_t *region, + const struct _cl_mem_image *dst_image, const struct _cl_mem_image *src_image) +{ + char* dst= cl_mem_map_auto((cl_mem)dst_image); + char* src= cl_mem_map_auto((cl_mem)src_image); + size_t dst_offset = dst_image->bpp * dst_origin[0] + dst_image->row_pitch * dst_origin[1] + dst_image->slice_pitch * dst_origin[2]; + size_t src_offset = src_image->bpp * src_origin[0] + src_image->row_pitch * src_origin[1] + src_image->slice_pitch * src_origin[2]; + dst= (char*)dst+ dst_offset; + src= (char*)src+ src_offset; + cl_uint y, z; + for (z = 0; z < region[2]; z++) { + const char* src_ptr = src; + char* dst_ptr = dst; + for (y = 0; y < region[1]; y++) { + memcpy(dst_ptr, src_ptr, src_image->bpp*region[0]); + src_ptr += src_image->row_pitch; + dst_ptr += dst_image->row_pitch; + } + src = (char*)src + src_image->slice_pitch; + dst = (char*)dst + dst_image->slice_pitch; + } + + cl_mem_unmap_auto((cl_mem)src_image); + cl_mem_unmap_auto((cl_mem)dst_image); + +} + static void cl_mem_copy_image(struct _cl_mem_image *image, size_t row_pitch, @@ -1447,33 +1475,66 @@ cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image assert(src_image->base.ctx == dst_image->base.ctx); /* setup the kernel and run. */ - if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D) { + if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D) { + if(dst_image->image_type == CL_MEM_OBJECT_IMAGE1D) { + extern char cl_internal_copy_image_1d_to_1d_str[]; + extern int cl_internal_copy_image_1d_to_1d_str_size; + + ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_1D_TO_1D, + cl_internal_copy_image_1d_to_1d_str, (size_t)cl_internal_copy_image_1d_to_1d_str_size, NULL); + } + } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D) { if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) { extern char cl_internal_copy_image_2d_to_2d_str[]; extern int cl_internal_copy_image_2d_to_2d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_2D, cl_internal_copy_image_2d_to_2d_str, (size_t)cl_internal_copy_image_2d_to_2d_str_size, NULL); - }else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { + } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { extern char cl_internal_copy_image_2d_to_3d_str[]; extern int cl_internal_copy_image_2d_to_3d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_2D_TO_3D, cl_internal_copy_image_2d_to_3d_str, (size_t)cl_internal_copy_image_2d_to_3d_str_size, NULL); + } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { + + cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image); + return CL_SUCCESS; } - }else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) { + } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { + if(dst_image->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY) { + + cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image); + return CL_SUCCESS; + } + } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { + if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { + + cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image); + return CL_SUCCESS; + } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) { + cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image); + return CL_SUCCESS; + } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { + cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image); + return CL_SUCCESS; + } + } else if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) { if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D) { extern char cl_internal_copy_image_3d_to_2d_str[]; extern int cl_internal_copy_image_3d_to_2d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_2D, cl_internal_copy_image_3d_to_2d_str, (size_t)cl_internal_copy_image_3d_to_2d_str_size, NULL); - }else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { + } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { extern char cl_internal_copy_image_3d_to_3d_str[]; extern int cl_internal_copy_image_3d_to_3d_str_size; ker = cl_context_get_static_kernel_from_bin(queue->ctx, CL_ENQUEUE_COPY_IMAGE_3D_TO_3D, cl_internal_copy_image_3d_to_3d_str, (size_t)cl_internal_copy_image_3d_to_3d_str_size, NULL); + } else if(dst_image->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY) { + cl_mem_copy_image_to_image(dst_origin, src_origin, region, dst_image, src_image); + return CL_SUCCESS; } } diff --git a/src/cl_mem.h b/src/cl_mem.h index 8ed8e2d..a2fb851 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -264,6 +264,10 @@ cl_mem_copy_image_region(const size_t *origin, const size_t *region, const void *src, size_t src_row_pitch, size_t src_slice_pitch, const struct _cl_mem_image *image); +void +cl_mem_copy_image_to_image(const size_t *dst_origin,const size_t *src_origin, const size_t *region, + const struct _cl_mem_image *dst_image, const struct _cl_mem_image *src_image); + extern cl_mem cl_mem_new_libva_buffer(cl_context ctx, unsigned int bo_name, cl_int *errcode); diff --git a/src/kernels/cl_internal_copy_image_1d_to_1d.cl b/src/kernels/cl_internal_copy_image_1d_to_1d.cl new file mode 100644 index 0000000..dca82b2 --- /dev/null +++ b/src/kernels/cl_internal_copy_image_1d_to_1d.cl @@ -0,0 +1,19 @@ +kernel void __cl_copy_image_1d_to_1d(__read_only image1d_t src_image, __write_only image1d_t dst_image, + unsigned int region0, unsigned int region1, unsigned int region2, + unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, + unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) +{ + int i = get_global_id(0); + int j = get_global_id(1); + int k = get_global_id(2); + int4 color; + const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; + int src_coord; + int dst_coord; + if((i >= region0) || (j>= region1) || (k>=region2)) + return; + src_coord = src_origin0 + i; + dst_coord = dst_origin0 + i; + color = read_imagei(src_image, sampler, src_coord); + write_imagei(dst_image, dst_coord, color); +} -- 2.7.4