Add the internal used kernels for buffer copy
authorJunyan He <junyan.he@linux.intel.com>
Thu, 10 Oct 2013 04:28:41 +0000 (12:28 +0800)
committerZhigang Gong <zhigang.gong@linux.intel.com>
Thu, 10 Oct 2013 05:33:23 +0000 (13:33 +0800)
Add internal used kernels for buffer copy. The align
1 4 16 is seperated into three kernels to improve
performance. The CMakeList is also updated.

Signed-off-by: Junyan He <junyan.he@linux.intel.com>
Reviewed-by: "Yang, Rong R" <rong.r.yang@intel.com>
src/CMakeLists.txt
src/kernels/cl_internal_copy_buf_align1.cl [new file with mode: 0644]
src/kernels/cl_internal_copy_buf_align16.cl [new file with mode: 0644]
src/kernels/cl_internal_copy_buf_align4.cl [new file with mode: 0644]

index 3fc8689..1e28c6c 100644 (file)
@@ -4,7 +4,25 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR}
                     ${CMAKE_CURRENT_SOURCE_DIR}/../include
                     ${MESA_SOURCE_INCLUDES})
 
+macro (MakeKernelBinStr KERNEL_PATH KERNEL_FILES)
+foreach (KF ${KERNEL_FILES})
+  set (input_file ${KERNEL_PATH}/${KF}.cl)
+  set (output_file ${KERNEL_PATH}/${KF}_str.c)
+  list (APPEND KERNEL_STR_FILES ${output_file})
+  add_custom_command(
+    OUTPUT ${output_file}
+    COMMAND rm -rf ${output_file}
+    COMMAND ${CMAKE_CURRENT_BINARY_DIR}/../backend/src/gbe_bin_generater -s ${input_file} -o${output_file}
+    DEPENDS ${input_file} ${CMAKE_CURRENT_BINARY_DIR}/../backend/src/gbe_bin_generater)
+endforeach (KF)
+endmacro (MakeKernelBinStr)
+
+set (KERNEL_STR_FILES)
+set (KERNEL_NAMES cl_internal_copy_buf_align1 cl_internal_copy_buf_align4 cl_internal_copy_buf_align16)
+MakeKernelBinStr ("${CMAKE_CURRENT_SOURCE_DIR}/kernels/" "${KERNEL_NAMES}")
+
 set(OPENCL_SRC
+    ${KERNEL_STR_FILES}
     cl_api.c
     cl_alloc.c
     cl_kernel.c
diff --git a/src/kernels/cl_internal_copy_buf_align1.cl b/src/kernels/cl_internal_copy_buf_align1.cl
new file mode 100644 (file)
index 0000000..cd3ec7b
--- /dev/null
@@ -0,0 +1,8 @@
+kernel void __cl_cpy_region_align1 ( global char* src, unsigned int src_offset,
+                                     global char* dst, unsigned int dst_offset,
+                                    unsigned int size)
+{
+    int i = get_global_id(0);
+    if (i < size)
+        dst[i+dst_offset] = src[i+src_offset];
+}
diff --git a/src/kernels/cl_internal_copy_buf_align16.cl b/src/kernels/cl_internal_copy_buf_align16.cl
new file mode 100644 (file)
index 0000000..75b1a4a
--- /dev/null
@@ -0,0 +1,12 @@
+kernel void __cl_cpy_region_align16 ( global float* src, unsigned int src_offset,
+                                      global float* dst, unsigned int dst_offset,
+                                     unsigned int size)
+{
+    int i = get_global_id(0) * 4;
+    if (i < size*4) {
+        dst[i+dst_offset] = src[i+src_offset];
+        dst[i+dst_offset + 1] = src[i+src_offset + 1];
+        dst[i+dst_offset + 2] = src[i+src_offset + 2];
+        dst[i+dst_offset + 3] = src[i+src_offset + 3];
+    }
+}
diff --git a/src/kernels/cl_internal_copy_buf_align4.cl b/src/kernels/cl_internal_copy_buf_align4.cl
new file mode 100644 (file)
index 0000000..44a0f81
--- /dev/null
@@ -0,0 +1,8 @@
+kernel void __cl_cpy_region_align4 ( global float* src, unsigned int src_offset,
+                                     global float* dst, unsigned int dst_offset,
+                                    unsigned int size)
+{
+    int i = get_global_id(0);
+    if (i < size)
+        dst[i+dst_offset] = src[i+src_offset];
+}