Merge pull request #1263 from abidrahmank:pyCLAHE_24
[profile/ivi/opencv.git] / modules / ocl / src / mcwutil.cpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                           License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // @Authors
18 //    Peng Xiao, pengxiao@multicorewareinc.com
19 //
20 // Redistribution and use in source and binary forms, with or without modification,
21 // are permitted provided that the following conditions are met:
22 //
23 //   * Redistribution's of source code must retain the above copyright notice,
24 //     this list of conditions and the following disclaimer.
25 //
26 //   * Redistribution's in binary form must reproduce the above copyright notice,
27 //     this list of conditions and the following disclaimer in the documentation
28 //     and/or other oclMaterials provided with the distribution.
29 //
30 //   * The name of the copyright holders may not be used to endorse or promote products
31 //     derived from this software without specific prior written permission.
32 //
33 // This software is provided by the copyright holders and contributors as is and
34 // any express or implied warranties, including, but not limited to, the implied
35 // warranties of merchantability and fitness for a particular purpose are disclaimed.
36 // In no event shall the Intel Corporation or contributors be liable for any direct,
37 // indirect, incidental, special, exemplary, or consequential damages
38 // (including, but not limited to, procurement of substitute goods or services;
39 // loss of use, data, or profits; or business interruption) however caused
40 // and on any theory of liability, whether in contract, strict liability,
41 // or tort (including negligence or otherwise) arising in any way out of
42 // the use of this software, even if advised of the possibility of such damage.
43 //
44 //M*/
45
46 #include "precomp.hpp"
47
48 #ifdef __GNUC__
49 #if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 402
50 #define GCC_DIAG_STR(s) #s
51 #define GCC_DIAG_JOINSTR(x,y) GCC_DIAG_STR(x ## y)
52 # define GCC_DIAG_DO_PRAGMA(x) _Pragma (#x)
53 # define GCC_DIAG_PRAGMA(x) GCC_DIAG_DO_PRAGMA(GCC diagnostic x)
54 # if ((__GNUC__ * 100) + __GNUC_MINOR__) >= 406
55 #  define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(push) \
56 GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x))
57 #  define GCC_DIAG_ON(x) GCC_DIAG_PRAGMA(pop)
58 # else
59 #  define GCC_DIAG_OFF(x) GCC_DIAG_PRAGMA(ignored GCC_DIAG_JOINSTR(-W,x))
60 #  define GCC_DIAG_ON(x)  GCC_DIAG_PRAGMA(warning GCC_DIAG_JOINSTR(-W,x))
61 # endif
62 #else
63 # define GCC_DIAG_OFF(x)
64 # define GCC_DIAG_ON(x)
65 #endif
66 #endif /* __GNUC__ */
67
68 using namespace std;
69
70 namespace cv
71 {
72     namespace ocl
73     {
74
75         inline int divUp(int total, int grain)
76         {
77             return (total + grain - 1) / grain;
78         }
79
80         // provide additional methods for the user to interact with the command queue after a task is fired
81         static void openCLExecuteKernel_2(Context *clCxt , const char **source, string kernelName, size_t globalThreads[3],
82                                    size_t localThreads[3],  vector< pair<size_t, const void *> > &args, int channels,
83                                    int depth, char *build_options, FLUSH_MODE finish_mode)
84         {
85             //construct kernel name
86             //The rule is functionName_Cn_Dn, C represent Channels, D Represent DataType Depth, n represent an integer number
87             //for exmaple split_C2_D2, represent the split kernel with channels =2 and dataType Depth = 2(Data type is char)
88             stringstream idxStr;
89             if(channels != -1)
90                 idxStr << "_C" << channels;
91             if(depth != -1)
92                 idxStr << "_D" << depth;
93             kernelName += idxStr.str();
94
95             cl_kernel kernel;
96             kernel = openCLGetKernelFromSource(clCxt, source, kernelName, build_options);
97
98             if ( localThreads != NULL)
99             {
100                 globalThreads[0] = divUp(globalThreads[0], localThreads[0]) * localThreads[0];
101                 globalThreads[1] = divUp(globalThreads[1], localThreads[1]) * localThreads[1];
102                 globalThreads[2] = divUp(globalThreads[2], localThreads[2]) * localThreads[2];
103
104                 //size_t blockSize = localThreads[0] * localThreads[1] * localThreads[2];
105                 cv::ocl::openCLVerifyKernel(clCxt, kernel,  localThreads);
106             }
107             for(size_t i = 0; i < args.size(); i ++)
108                 openCLSafeCall(clSetKernelArg(kernel, i, args[i].first, args[i].second));
109
110             openCLSafeCall(clEnqueueNDRangeKernel((cl_command_queue)clCxt->oclCommandQueue(), kernel, 3, NULL, globalThreads,
111                                                   localThreads, 0, NULL, NULL));
112
113             switch(finish_mode)
114             {
115             case CLFINISH:
116                 clFinish((cl_command_queue)clCxt->oclCommandQueue());
117             case CLFLUSH:
118                 clFlush((cl_command_queue)clCxt->oclCommandQueue());
119                 break;
120             case DISABLE:
121             default:
122                 break;
123             }
124             openCLSafeCall(clReleaseKernel(kernel));
125         }
126
127         void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName,
128                                   size_t globalThreads[3], size_t localThreads[3],
129                                   vector< pair<size_t, const void *> > &args, int channels, int depth, FLUSH_MODE finish_mode)
130         {
131             openCLExecuteKernel2(clCxt, source, kernelName, globalThreads, localThreads, args,
132                                  channels, depth, NULL, finish_mode);
133         }
134         void openCLExecuteKernel2(Context *clCxt , const char **source, string kernelName,
135                                   size_t globalThreads[3], size_t localThreads[3],
136                                   vector< pair<size_t, const void *> > &args, int channels, int depth, char *build_options, FLUSH_MODE finish_mode)
137
138         {
139             openCLExecuteKernel_2(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth,
140                                   build_options, finish_mode);
141         }
142
143 #ifdef __GNUC__
144         GCC_DIAG_OFF(deprecated-declarations)
145 #endif
146         cl_mem bindTexture(const oclMat &mat)
147         {
148             cl_mem texture;
149             cl_image_format format;
150             int err;
151             int depth    = mat.depth();
152             int channels = mat.oclchannels();
153
154             switch(depth)
155             {
156             case CV_8U:
157                 format.image_channel_data_type = CL_UNSIGNED_INT8;
158                 break;
159             case CV_32S:
160                 format.image_channel_data_type = CL_UNSIGNED_INT32;
161                 break;
162             case CV_32F:
163                 format.image_channel_data_type = CL_FLOAT;
164                 break;
165             default:
166                 CV_Error(-1, "Image forma is not supported");
167                 break;
168             }
169             switch(channels)
170             {
171             case 1:
172                 format.image_channel_order     = CL_R;
173                 break;
174             case 3:
175                 format.image_channel_order     = CL_RGB;
176                 break;
177             case 4:
178                 format.image_channel_order     = CL_RGBA;
179                 break;
180             default:
181                 CV_Error(-1, "Image format is not supported");
182                 break;
183             }
184 #ifdef CL_VERSION_1_2
185             //this enables backwards portability to
186             //run on OpenCL 1.1 platform if library binaries are compiled with OpenCL 1.2 support
187             if(Context::getContext()->supportsFeature(Context::CL_VER_1_2))
188             {
189                 cl_image_desc desc;
190                 desc.image_type       = CL_MEM_OBJECT_IMAGE2D;
191                 desc.image_width      = mat.cols;
192                 desc.image_height     = mat.rows;
193                 desc.image_depth      = 0;
194                 desc.image_array_size = 1;
195                 desc.image_row_pitch  = 0;
196                 desc.image_slice_pitch = 0;
197                 desc.buffer           = NULL;
198                 desc.num_mip_levels   = 0;
199                 desc.num_samples      = 0;
200                 texture = clCreateImage((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_WRITE, &format, &desc, NULL, &err);            
201             }
202             else
203 #endif
204             {
205                 texture = clCreateImage2D(
206                     (cl_context)mat.clCxt->oclContext(),
207                     CL_MEM_READ_WRITE,
208                     &format,
209                     mat.cols,
210                     mat.rows,
211                     0,
212                     NULL,
213                     &err);
214             }
215             size_t origin[] = { 0, 0, 0 };
216             size_t region[] = { mat.cols, mat.rows, 1 };
217
218             cl_mem devData;
219             if (mat.cols * mat.elemSize() != mat.step)
220             {
221                 devData = clCreateBuffer((cl_context)mat.clCxt->oclContext(), CL_MEM_READ_ONLY, mat.cols * mat.rows
222                     * mat.elemSize(), NULL, NULL);
223                 const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1};
224                 clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin,
225                     regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL);
226                 clFlush((cl_command_queue)mat.clCxt->oclCommandQueue()); 
227             }
228             else
229             {
230                 devData = (cl_mem)mat.data;
231             }
232
233             clEnqueueCopyBufferToImage((cl_command_queue)mat.clCxt->oclCommandQueue(), devData, texture, 0, origin, region, 0, NULL, 0);
234             if ((mat.cols * mat.elemSize() != mat.step))
235             {
236                 clFlush((cl_command_queue)mat.clCxt->oclCommandQueue());
237                 clReleaseMemObject(devData);
238             }
239
240             openCLSafeCall(err);
241             return texture;
242         }
243 #ifdef __GNUC__
244         GCC_DIAG_ON(deprecated-declarations)
245 #endif
246
247         Ptr<TextureCL> bindTexturePtr(const oclMat &mat)
248         {
249             return Ptr<TextureCL>(new TextureCL(bindTexture(mat), mat.rows, mat.cols, mat.type()));
250         }
251         void releaseTexture(cl_mem& texture)
252         {
253             openCLFree(texture);
254         }
255
256         bool support_image2d(Context *clCxt)
257         {
258             static const char * _kernel_string = "__kernel void test_func(image2d_t img) {}";
259             static bool _isTested = false;
260             static bool _support = false;
261             if(_isTested)
262             {
263                 return _support;
264             }
265             try
266             {
267                 cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func");
268                 finish();
269                 _support = true;
270             }
271             catch (const cv::Exception& e)
272             {
273                 if(e.code == -217)
274                 {
275                     _support = false;
276                 }
277                 else
278                 {
279                     // throw e once again
280                     throw e;
281                 }
282             }
283             _isTested = true;
284             return _support;
285         }
286     }//namespace ocl
287
288 }//namespace cv