From: Junyan He Date: Wed, 26 Mar 2014 10:27:48 +0000 (+0800) Subject: Add three copy cl files for Enqueue Copy usage. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=da6fdcd1e21d710c5253417634a9b23543ea0271;p=contrib%2Fbeignet.git Add three copy cl files for Enqueue Copy usage. Add these three cl files, one for src and dst are not aligned but have same offset to 4. second for src's %4 offset is bigger than the dst's third for src's %4 offset is small than the dst's Signed-off-by: Junyan He Reviewed-by: Zhigang Gong --- diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4c34235..d690d9a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -18,7 +18,9 @@ endforeach (KF) endmacro (MakeKernelBinStr) set (KERNEL_STR_FILES) -set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4 cl_internal_copy_buf_align16) +set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4 +cl_internal_copy_buf_align16 cl_internal_copy_buf_unalign_same_offset +cl_internal_copy_buf_unalign_dst_offset cl_internal_copy_buf_unalign_src_offset) MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}") set(OPENCL_SRC diff --git a/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl new file mode 100644 index 0000000..13f4162 --- /dev/null +++ b/src/kernels/cl_internal_copy_buf_unalign_dst_offset.cl @@ -0,0 +1,28 @@ +kernel void __cl_cpy_region_unalign_dst_offset ( global int* src, unsigned int src_offset, + global int* dst, unsigned int dst_offset, + unsigned int size, + unsigned int first_mask, unsigned int last_mask, + unsigned int shift, unsigned int dw_mask) +{ + int i = get_global_id(0); + unsigned int tmp = 0; + + if (i > size -1) + return; + + /* last dw, need to be careful, not to overflow the source. */ + if ((i == size - 1) && ((last_mask & (~(~dw_mask >> shift))) == 0)) { + tmp = ((src[src_offset + i] & ~dw_mask) >> shift); + } else { + tmp = ((src[src_offset + i] & ~dw_mask) >> shift) + | ((src[src_offset + i + 1] & dw_mask) << (32 - shift)); + } + + if (i == 0) { + dst[dst_offset] = (dst[dst_offset] & first_mask) | (tmp & (~first_mask)); + } else if (i == size - 1) { + dst[i+dst_offset] = (tmp & last_mask) | (dst[i+dst_offset] & (~last_mask)); + } else { + dst[i+dst_offset] = tmp; + } +} diff --git a/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl new file mode 100644 index 0000000..8510246 --- /dev/null +++ b/src/kernels/cl_internal_copy_buf_unalign_same_offset.cl @@ -0,0 +1,19 @@ +kernel void __cl_cpy_region_unalign_same_offset ( global int* src, unsigned int src_offset, + global int* dst, unsigned int dst_offset, + unsigned int size, + unsigned int first_mask, unsigned int last_mask) +{ + int i = get_global_id(0); + if (i > size -1) + return; + + if (i == 0) { + dst[dst_offset] = (dst[dst_offset] & first_mask) + | (src[src_offset] & (~first_mask)); + } else if (i == size - 1) { + dst[i+dst_offset] = (src[i+src_offset] & last_mask) + | (dst[i+dst_offset] & (~last_mask)); + } else { + dst[i+dst_offset] = src[i+src_offset]; + } +} diff --git a/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl new file mode 100644 index 0000000..f98368a --- /dev/null +++ b/src/kernels/cl_internal_copy_buf_unalign_src_offset.cl @@ -0,0 +1,29 @@ +kernel void __cl_cpy_region_unalign_src_offset ( global int* src, unsigned int src_offset, + global int* dst, unsigned int dst_offset, + unsigned int size, + unsigned int first_mask, unsigned int last_mask, + unsigned int shift, unsigned int dw_mask, int src_less) +{ + int i = get_global_id(0); + unsigned int tmp = 0; + + if (i > size -1) + return; + + if (i == 0) { + tmp = ((src[src_offset + i] & dw_mask) << shift); + } else if (src_less && i == size - 1) { // not exceed the bound of source + tmp = ((src[src_offset + i - 1] & ~dw_mask) >> (32 - shift)); + } else { + tmp = ((src[src_offset + i - 1] & ~dw_mask) >> (32 - shift)) + | ((src[src_offset + i] & dw_mask) << shift); + } + + if (i == 0) { + dst[dst_offset] = (dst[dst_offset] & first_mask) | (tmp & (~first_mask)); + } else if (i == size - 1) { + dst[i+dst_offset] = (tmp & last_mask) | (dst[i+dst_offset] & (~last_mask)); + } else { + dst[i+dst_offset] = tmp; + } +}