From c7a8921ab687c917f574400e87bf5e1c9eb3579d Mon Sep 17 00:00:00 2001 From: Yang Rong Date: Mon, 9 Sep 2013 16:10:09 +0800 Subject: [PATCH] Add api clEnqueueCopyImage. Also do some mirror changes: 1. Add a image var name to macro CHECK_IMAGE. 2. Fix local size error in cl_mem_copy_buffer_rect. 3. Fix cl_enqueue_write_image typo. Reviewed-by: Zhigang Gong --- src/cl_api.c | 75 +++++++++++++++++++++++++++++---- src/cl_context.c | 25 +++++++++++ src/cl_context.h | 9 ++-- src/cl_enqueue.c | 9 ++-- src/cl_mem.c | 123 +++++++++++++++++++++++++++++++++++++++++++++---------- src/cl_mem.h | 6 ++- src/cl_utils.h | 8 ++-- 7 files changed, 214 insertions(+), 41 deletions(-) diff --git a/src/cl_api.c b/src/cl_api.c index 67e6db4..4bc53ca 100644 --- a/src/cl_api.c +++ b/src/cl_api.c @@ -1635,7 +1635,7 @@ clEnqueueReadImage(cl_command_queue command_queue, enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); - CHECK_IMAGE(mem); + CHECK_IMAGE(mem, image); if (command_queue->ctx != mem->ctx) { err = CL_INVALID_CONTEXT; goto error; @@ -1717,7 +1717,7 @@ clEnqueueWriteImage(cl_command_queue command_queue, enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); - CHECK_IMAGE(mem); + CHECK_IMAGE(mem, image); if (command_queue->ctx != mem->ctx) { err = CL_INVALID_CONTEXT; goto error; @@ -1784,8 +1784,8 @@ error: cl_int clEnqueueCopyImage(cl_command_queue command_queue, - cl_mem src_image, - cl_mem dst_image, + cl_mem src_mem, + cl_mem dst_mem, const size_t * src_origin, const size_t * dst_origin, const size_t * region, @@ -1793,8 +1793,69 @@ clEnqueueCopyImage(cl_command_queue command_queue, const cl_event * event_wait_list, cl_event * event) { - NOT_IMPLEMENTED; - return 0; + cl_int err = CL_SUCCESS; + enqueue_data *data, no_wait_data = { 0 }; + cl_bool overlap = CL_TRUE; + cl_int i = 0; + + CHECK_QUEUE(command_queue); + CHECK_IMAGE(src_mem, src_image); + CHECK_IMAGE(dst_mem, dst_image); + if (command_queue->ctx != src_mem->ctx || + command_queue->ctx != dst_mem->ctx) { + err = CL_INVALID_CONTEXT; + goto error; + } + + if (src_image->fmt.image_channel_order != dst_image->fmt.image_channel_order || + src_image->fmt.image_channel_data_type != dst_image->fmt.image_channel_data_type) { + err = CL_IMAGE_FORMAT_MISMATCH; + goto error; + } + + if (!src_origin || !region || src_origin[0] + region[0] > src_image->w || + src_origin[1] + region[1] > src_image->h || src_origin[2] + region[2] > src_image->depth) { + err = CL_INVALID_VALUE; + goto error; + } + + if (!dst_origin || !region || dst_origin[0] + region[0] > dst_image->w || + dst_origin[1] + region[1] > dst_image->h || dst_origin[2] + region[2] > dst_image->depth) { + err = CL_INVALID_VALUE; + goto error; + } + + if ((src_image->image_type == CL_MEM_OBJECT_IMAGE2D && (src_origin[2] != 0 || region[2] != 1)) || + (dst_image->image_type == CL_MEM_OBJECT_IMAGE2D && (dst_origin[2] != 0 || region[2] != 1))) { + err = CL_INVALID_VALUE; + goto error; + } + + if (src_image == dst_image) { + for(i = 0; i < 3; i++) + overlap = overlap && (src_origin[i] < dst_origin[i] + region[i]) + && (dst_origin[i] < src_origin[i] + region[i]); + if(overlap == CL_TRUE) { + err = CL_MEM_COPY_OVERLAP; + goto error; + } + } + + cl_mem_kernel_copy_image(command_queue, src_image, dst_image, src_origin, dst_origin, region); + + TRY(cl_event_check_waitlist, num_events_in_wait_list, event_wait_list, event, src_mem->ctx); + + data = &no_wait_data; + data->type = EnqueueCopyImage; + data->queue = command_queue; + + if(handle_events(command_queue, num_events_in_wait_list, event_wait_list, + event, data, CL_COMMAND_COPY_IMAGE) == CL_ENQUEUE_EXECUTE_IMM) { + err = cl_command_queue_flush(command_queue); + } + +error: + return err; } cl_int @@ -1978,7 +2039,7 @@ clEnqueueMapImage(cl_command_queue command_queue, enqueue_data *data, no_wait_data = { 0 }; CHECK_QUEUE(command_queue); - CHECK_IMAGE(mem); + CHECK_IMAGE(mem, image); if (command_queue->ctx != mem->ctx) { err = CL_INVALID_CONTEXT; goto error; diff --git a/src/cl_context.c b/src/cl_context.c index 822fdf5..4f1c611 100644 --- a/src/cl_context.c +++ b/src/cl_context.c @@ -26,6 +26,8 @@ #include "cl_utils.h" #include "cl_driver.h" #include "cl_khr_icd.h" +#include "cl_kernel.h" +#include "cl_program.h" #include "CL/cl.h" #include "CL/cl_gl.h" @@ -243,3 +245,26 @@ cl_context_get_bufmgr(cl_context ctx) return cl_driver_get_bufmgr(ctx->drv); } +cl_kernel +cl_context_get_static_kernel(cl_context ctx, cl_int index, const char * str_kernel, const char * str_option) +{ + cl_int ret; + if (!ctx->internal_prgs[index]) + { + size_t length = strlen(str_kernel) + 1; + ctx->internal_prgs[index] = cl_program_create_from_source(ctx, 1, &str_kernel, &length, NULL); + + if (!ctx->internal_prgs[index]) + return NULL; + + ret = cl_program_build(ctx->internal_prgs[index], str_option); + if (ret != CL_SUCCESS) + return NULL; + + ctx->internal_prgs[index]->is_built = 1; + + ctx->internel_kernels[index] = cl_kernel_dup(ctx->internal_prgs[index]->ker[0]); + } + + return ctx->internel_kernels[index]; +} diff --git a/src/cl_context.h b/src/cl_context.h index 461113a..0342ef4 100644 --- a/src/cl_context.h +++ b/src/cl_context.h @@ -43,9 +43,9 @@ enum _cl_internal_ker_type { CL_ENQUEUE_COPY_BUFFER = 0, CL_ENQUEUE_COPY_BUFFER_RECT = 1, CL_ENQUEUE_COPY_IMAGE_0 = 2, //copy image 2d to image 2d - CL_ENQUEUE_COPY_IMAGE_1 = 3, //copy image 2d to image 2d - CL_ENQUEUE_COPY_IMAGE_2 = 4, //copy image 2d to image 2d - CL_ENQUEUE_COPY_IMAGE_3 = 5, //copy image 2d to image 2d + CL_ENQUEUE_COPY_IMAGE_1 = 3, //copy image 3d to image 2d + CL_ENQUEUE_COPY_IMAGE_2 = 4, //copy image 2d to image 3d + CL_ENQUEUE_COPY_IMAGE_3 = 5, //copy image 3d to image 3d CL_ENQUEUE_COPY_IMAGE_TO_BUFFER = 6, CL_ENQUEUE_COPY_BUFFER_TO_IMAGE = 7, CL_INTERNAL_KERNEL_MAX = 8 @@ -132,5 +132,8 @@ extern cl_int cl_context_ND_kernel(cl_context, /* Used for allocation */ extern cl_buffer_mgr cl_context_get_bufmgr(cl_context ctx); +/* Get the internal used kernel */ +extern cl_kernel cl_context_get_static_kernel(cl_context ctx, cl_int index, const char *str_kernel, const char * str_option); + #endif /* __CL_CONTEXT_H__ */ diff --git a/src/cl_enqueue.c b/src/cl_enqueue.c index 3446ac3..989b044 100644 --- a/src/cl_enqueue.c +++ b/src/cl_enqueue.c @@ -164,7 +164,7 @@ cl_int cl_enqueue_read_image(enqueue_data *data) void* src_ptr; cl_mem mem = data->mem_obj; - CHECK_IMAGE(mem); + CHECK_IMAGE(mem, image); const size_t* origin = data->origin; const size_t* region = data->region; @@ -209,7 +209,7 @@ cl_int cl_enqueue_write_image(enqueue_data *data) void* dst_ptr; cl_mem mem = data->mem_obj; - CHECK_IMAGE(mem); + CHECK_IMAGE(mem, image); const size_t *origin = data->origin; const size_t *region = data->region; @@ -224,7 +224,7 @@ cl_int cl_enqueue_write_image(enqueue_data *data) if (!origin[0] && region[0] == image->w && data->row_pitch == image->row_pitch && (region[2] == 1 || (!origin[1] && region[1] == image->h && data->slice_pitch == image->slice_pitch))) { - memcpy(dst_ptr, data->ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]); + memcpy(dst_ptr, data->const_ptr, region[2] == 1 ? data->row_pitch*region[1] : data->slice_pitch*region[2]); } else { cl_uint y, z; @@ -236,7 +236,7 @@ cl_int cl_enqueue_write_image(enqueue_data *data) src += data->row_pitch; dst += image->row_pitch; } - data->ptr = (char*)data->ptr + data->slice_pitch; + data->const_ptr = (char*)data->const_ptr + data->slice_pitch; dst_ptr = (char*)dst_ptr + image->slice_pitch; } } @@ -373,6 +373,7 @@ cl_int cl_enqueue_handle(enqueue_data* data) case EnqueueUnmapMemObject: return cl_enqueue_unmap_mem_object(data); case EnqueueCopyBufferRect: + case EnqueueCopyImage: case EnqueueNDRangeKernel: cl_gpgpu_event_resume((cl_gpgpu_event)data->ptr); //goto default default: diff --git a/src/cl_mem.c b/src/cl_mem.c index 886af8c..203f47e 100644 --- a/src/cl_mem.c +++ b/src/cl_mem.c @@ -1,4 +1,4 @@ -/* +/* * Copyright © 2012 Intel Corporation * * This library is free software; you can redistribute it and/or @@ -25,7 +25,6 @@ #include "cl_device_id.h" #include "cl_driver.h" #include "cl_khr_icd.h" -#include "cl_program.h" #include "cl_kernel.h" #include "cl_command_queue.h" @@ -135,7 +134,7 @@ cl_get_image_info(cl_mem mem, size_t *param_value_size_ret) { int err; - CHECK_IMAGE(mem); + CHECK_IMAGE(mem, image); switch(param_name) { @@ -581,7 +580,7 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, cl_kernel ker; size_t global_off[] = {0,0,0}; size_t global_sz[] = {1,1,1}; - size_t local_sz[] = {LOCAL_SZ_2,LOCAL_SZ_1,LOCAL_SZ_0}; + size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_1}; if(region[1] == 1) local_sz[1] = 1; if(region[2] == 1) local_sz[2] = 1; global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; @@ -591,7 +590,7 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, cl_int src_offset = src_origin[2]*src_slice_pitch + src_origin[1]*src_row_pitch + src_origin[0]; cl_int dst_offset = dst_origin[2]*dst_slice_pitch + dst_origin[1]*dst_row_pitch + dst_origin[0]; - static const char *kernel_str = + static const char *str_kernel = "kernel void __cl_cpy_buffer_rect ( \n" " global char* src, global char* dst, \n" " unsigned int region0, unsigned int region1, unsigned int region2, \n" @@ -611,25 +610,9 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, /* We use one kernel to copy the data. The kernel is lazily created. */ assert(src_buf->ctx == dst_buf->ctx); - if (!src_buf->ctx->internal_prgs[index]) - { - size_t length = strlen(kernel_str) + 1; - src_buf->ctx->internal_prgs[index] = cl_program_create_from_source(src_buf->ctx, 1, &kernel_str, &length, NULL); - - if (!src_buf->ctx->internal_prgs[index]) - return CL_OUT_OF_RESOURCES; - - ret = cl_program_build(src_buf->ctx->internal_prgs[index], NULL); - if (ret != CL_SUCCESS) - return CL_OUT_OF_RESOURCES; - - src_buf->ctx->internal_prgs[index]->is_built = 1; - - src_buf->ctx->internel_kernels[index] = cl_kernel_dup(src_buf->ctx->internal_prgs[index]->ker[0]); - } /* setup the kernel and run. */ - ker = src_buf->ctx->internel_kernels[index]; + ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, NULL); if (!ker) return CL_OUT_OF_RESOURCES; @@ -651,6 +634,102 @@ cl_mem_copy_buffer_rect(cl_command_queue queue, cl_mem src_buf, cl_mem dst_buf, return ret; } +LOCAL cl_int +cl_mem_kernel_copy_image(cl_command_queue queue, struct _cl_mem_image* src_image, struct _cl_mem_image* dst_image, + const size_t *src_origin, const size_t *dst_origin, const size_t *region) { + cl_int ret; + cl_kernel ker; + size_t global_off[] = {0,0,0}; + size_t global_sz[] = {1,1,1}; + size_t local_sz[] = {LOCAL_SZ_0,LOCAL_SZ_1,LOCAL_SZ_2}; + cl_int index = CL_ENQUEUE_COPY_IMAGE_0; + char option[40] = ""; + + if(region[1] == 1) local_sz[1] = 1; + if(region[2] == 1) local_sz[2] = 1; + global_sz[0] = ((region[0] + local_sz[0] - 1) / local_sz[0]) * local_sz[0]; + global_sz[1] = ((region[1] + local_sz[1] - 1) / local_sz[1]) * local_sz[1]; + global_sz[2] = ((region[2] + local_sz[2] - 1) / local_sz[2]) * local_sz[2]; + + if(src_image->image_type == CL_MEM_OBJECT_IMAGE3D) { + strcat(option, "-D SRC_IMAGE_3D"); + index += 1; + } + if(dst_image->image_type == CL_MEM_OBJECT_IMAGE3D) { + strcat(option, " -D DST_IMAGE_3D"); + index += 2; + } + + static const char *str_kernel = + "#ifdef SRC_IMAGE_3D \n" + " #define SRC_IMAGE_TYPE image3d_t \n" + " #define SRC_COORD_TYPE int3 \n" + "#else \n" + " #define SRC_IMAGE_TYPE image2d_t \n" + " #define SRC_COORD_TYPE int2 \n" + "#endif \n" + "#ifdef DST_IMAGE_3D \n" + " #define DST_IMAGE_TYPE image3d_t \n" + " #define DST_COORD_TYPE int3 \n" + "#else \n" + " #define DST_IMAGE_TYPE image2d_t \n" + " #define DST_COORD_TYPE int2 \n" + "#endif \n" + "kernel void __cl_copy_image ( \n" + " __read_only SRC_IMAGE_TYPE src_image, __write_only DST_IMAGE_TYPE dst_image, \n" + " unsigned int region0, unsigned int region1, unsigned int region2, \n" + " unsigned int src_origin0, unsigned int src_origin1, unsigned int src_origin2, \n" + " unsigned int dst_origin0, unsigned int dst_origin1, unsigned int dst_origin2) { \n" + " int i = get_global_id(0); \n" + " int j = get_global_id(1); \n" + " int k = get_global_id(2); \n" + " int4 color; \n" + " const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST; \n" + " SRC_COORD_TYPE src_coord; \n" + " DST_COORD_TYPE dst_coord; \n" + " if((i >= region0) || (j>= region1) || (k>=region2)) \n" + " return; \n" + " src_coord.x = src_origin0 + i; \n" + " src_coord.y = src_origin1 + j; \n" + "#ifdef SRC_IMAGE_3D \n" + " src_coord.z = src_origin2 + k; \n" + "#endif \n" + " dst_coord.x = dst_origin0 + i; \n" + " dst_coord.y = dst_origin1 + j; \n" + "#ifdef SRC_IMAGE_3D \n" + " dst_coord.z = dst_origin2 + k; \n" + "#endif \n" + " color = read_imagei(src_image, sampler, src_coord); \n" + " write_imagei(dst_image, src_coord, color); \n" + "}"; + + + /* We use one kernel to copy the data. The kernel is lazily created. */ + assert(src_image->base.ctx == dst_image->base.ctx); + + /* setup the kernel and run. */ + ker = cl_context_get_static_kernel(queue->ctx, index, str_kernel, option); + if (!ker) + return CL_OUT_OF_RESOURCES; + + cl_kernel_set_arg(ker, 0, sizeof(cl_mem), &src_image); + cl_kernel_set_arg(ker, 1, sizeof(cl_mem), &dst_image); + cl_kernel_set_arg(ker, 2, sizeof(cl_int), ®ion[0]); + cl_kernel_set_arg(ker, 3, sizeof(cl_int), ®ion[1]); + cl_kernel_set_arg(ker, 4, sizeof(cl_int), ®ion[2]); + cl_kernel_set_arg(ker, 5, sizeof(cl_int), &src_origin[0]); + cl_kernel_set_arg(ker, 6, sizeof(cl_int), &src_origin[1]); + cl_kernel_set_arg(ker, 7, sizeof(cl_int), &src_origin[2]); + cl_kernel_set_arg(ker, 8, sizeof(cl_int), &dst_origin[0]); + cl_kernel_set_arg(ker, 9, sizeof(cl_int), &dst_origin[1]); + cl_kernel_set_arg(ker, 10, sizeof(cl_int), &dst_origin[2]); + + ret = cl_command_queue_ND_range(queue, ker, 1, global_off, global_sz, local_sz); + cl_command_queue_finish(queue); + + return ret; +} + LOCAL void* cl_mem_map(cl_mem mem) { diff --git a/src/cl_mem.h b/src/cl_mem.h index b9b3c0a..530fe79 100644 --- a/src/cl_mem.h +++ b/src/cl_mem.h @@ -186,10 +186,14 @@ extern void cl_mem_gl_delete(struct _cl_mem_gl_image *); /* Add one more reference to this object */ extern void cl_mem_add_ref(cl_mem); -/* api clEnqueueCopy buffer rect help function */ +/* api clEnqueueCopyBufferRect help function */ extern cl_int cl_mem_copy_buffer_rect(cl_command_queue, cl_mem, cl_mem, const size_t *, const size_t *, const size_t *, size_t, size_t, size_t, size_t); + +/* api clEnqueueCopyImage help function */ +extern cl_int cl_mem_kernel_copy_image(cl_command_queue, struct _cl_mem_image*, struct _cl_mem_image*, + const size_t *, const size_t *, const size_t *); /* Directly map a memory object */ extern void *cl_mem_map(cl_mem); diff --git a/src/cl_utils.h b/src/cl_utils.h index 5c523b2..fa900a7 100644 --- a/src/cl_utils.h +++ b/src/cl_utils.h @@ -138,7 +138,7 @@ do { \ } \ } while (0) -#define CHECK_IMAGE(MEM) \ +#define CHECK_IMAGE(MEM, IMAGE) \ CHECK_MEM(MEM); \ do { \ if (UNLIKELY(!IS_IMAGE(MEM))) { \ @@ -146,13 +146,13 @@ do { \ goto error; \ } \ } while (0); \ -struct _cl_mem_image *image; \ -image = cl_mem_image(MEM); \ +struct _cl_mem_image *IMAGE; \ +IMAGE = cl_mem_image(MEM); \ #define CHECK_EVENT(EVENT) \ do { \ if (UNLIKELY(EVENT == NULL)) { \ - err = CL_INVALID_EVENT; \ + err = CL_INVALID_EVENT; \ goto error; \ } \ if (UNLIKELY(EVENT->magic != CL_MAGIC_EVENT_HEADER)) { \ -- 2.7.4