1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
14 // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
18 // Wang Weiyan, wangweiyanster@gmail.com
19 // Peng Xiao, pengxiao@multicorewareinc.com
21 // Redistribution and use in source and binary forms, with or without modification,
22 // are permitted provided that the following conditions are met:
24 // * Redistribution's of source code must retain the above copyright notice,
25 // this list of conditions and the following disclaimer.
27 // * Redistribution's in binary form must reproduce the above copyright notice,
28 // this list of conditions and the following disclaimer in the documentation
29 // and/or other materials provided with the distribution.
31 // * The name of the copyright holders may not be used to endorse or promote products
32 // derived from this software without specific prior written permission.
34 // This software is provided by the copyright holders and contributors "as is" and
35 // any express or implied warranties, including, but not limited to, the implied
36 // warranties of merchantability and fitness for a particular purpose are disclaimed.
37 // In no event shall the Intel Corporation or contributors be liable for any direct,
38 // indirect, incidental, special, exemplary, or consequential damages
39 // (including, but not limited to, procurement of substitute goods or services;
40 // loss of use, data, or profits; or business interruption) however caused
41 // and on any theory of liability, whether in contract, strict liability,
42 // or tort (including negligence or otherwise) arising in any way out of
43 // the use of this software, even if advised of the possibility of such damage.
47 #include "precomp.hpp"
48 #include "opencl_kernels.hpp"
51 using namespace cv::ocl;
53 static void fromRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
54 const std::string & additionalOptions = std::string(),
55 const oclMat & data1 = oclMat(), const oclMat & data2 = oclMat())
57 int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
58 int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
60 std::string build_options = format("-D DEPTH_%d", src.depth());
61 if (!additionalOptions.empty())
62 build_options += additionalOptions;
64 vector<pair<size_t , const void *> > args;
65 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
66 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
67 args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
68 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
69 args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
70 args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
71 args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
72 args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
73 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
76 args.push_back( make_pair( sizeof(cl_mem) , (void *)&data1.data ));
78 args.push_back( make_pair( sizeof(cl_mem) , (void *)&data2.data ));
80 size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
81 openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
84 static void toRGB_caller(const oclMat &src, oclMat &dst, int bidx, const std::string & kernelName,
85 const std::string & additionalOptions = std::string(), const oclMat & data = oclMat())
87 std::string build_options = format("-D DEPTH_%d -D dcn=%d", src.depth(), dst.channels());
88 if (!additionalOptions.empty())
89 build_options += additionalOptions;
91 int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
92 int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
94 vector<pair<size_t , const void *> > args;
95 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
96 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
97 args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
98 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
99 args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
100 args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
101 args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
102 args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
103 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
106 args.push_back( make_pair( sizeof(cl_mem) , (void *)&data.data ));
108 size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
109 openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
112 static void RGB_caller(const oclMat &src, oclMat &dst, bool reverse)
114 std::string build_options = format("-D DEPTH_%d -D dcn=%d -D scn=%d -D %s", src.depth(),
115 dst.channels(), src.channels(), reverse ? "REVERSE" : "ORDER");
116 int src_offset = src.offset / src.elemSize1(), src_step = src.step1();
117 int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step1();
119 vector<pair<size_t , const void *> > args;
120 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
121 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
122 args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
123 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
124 args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
125 args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
126 args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
127 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
129 size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
130 openCLExecuteKernel(src.clCxt, &cvt_color, "RGB", gt, lt, args, -1, -1, build_options.c_str());
133 static void fromRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
135 std::string build_options = format("-D DEPTH_%d -D greenbits=%d -D dcn=%d",
136 src.depth(), greenbits, dst.channels());
137 int src_offset = src.offset >> 1, src_step = src.step >> 1;
138 int dst_offset = dst.offset / dst.elemSize1(), dst_step = dst.step / dst.elemSize1();
140 vector<pair<size_t , const void *> > args;
141 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
142 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
143 args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
144 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
145 args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
146 args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
147 args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
148 args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
149 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
151 size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
152 openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
155 static void toRGB5x5_caller(const oclMat &src, oclMat &dst, int bidx, int greenbits, const std::string & kernelName)
157 std::string build_options = format("-D DEPTH_%d -D greenbits=%d -D scn=%d",
158 src.depth(), greenbits, src.channels());
159 int src_offset = (int)src.offset, src_step = (int)src.step;
160 int dst_offset = dst.offset >> 1, dst_step = dst.step >> 1;
162 vector<pair<size_t , const void *> > args;
163 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.cols));
164 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.rows));
165 args.push_back( make_pair( sizeof(cl_int) , (void *)&src_step));
166 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_step));
167 args.push_back( make_pair( sizeof(cl_int) , (void *)&bidx));
168 args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data));
169 args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data));
170 args.push_back( make_pair( sizeof(cl_int) , (void *)&src_offset ));
171 args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_offset ));
173 size_t gt[3] = { dst.cols, dst.rows, 1 }, lt[3] = { 16, 16, 1 };
174 openCLExecuteKernel(src.clCxt, &cvt_color, kernelName.c_str(), gt, lt, args, -1, -1, build_options.c_str());
177 static void cvtColor_caller(const oclMat &src, oclMat &dst, int code, int dcn)
179 Size sz = src.size();
180 int scn = src.channels(), depth = src.depth(), bidx;
182 CV_Assert(depth == CV_8U || depth == CV_16U || depth == CV_32F);
186 case CV_BGR2BGRA: case CV_RGB2BGRA: case CV_BGRA2BGR:
187 case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA:
189 CV_Assert(scn == 3 || scn == 4);
190 dcn = code == CV_BGR2BGRA || code == CV_RGB2BGRA || code == CV_BGRA2RGBA ? 4 : 3;
191 bool reverse = !(code == CV_BGR2BGRA || code == CV_BGRA2BGR);
192 dst.create(sz, CV_MAKE_TYPE(depth, dcn));
193 RGB_caller(src, dst, reverse);
196 case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555:
197 case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555:
199 CV_Assert((scn == 3 || scn == 4) && depth == CV_8U );
200 bidx = code == CV_BGR2BGR565 || code == CV_BGR2BGR555 ||
201 code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2;
202 int greenbits = code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||
203 code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5;
204 dst.create(sz, CV_8UC2);
205 toRGB5x5_caller(src, dst, bidx, greenbits, "RGB2RGB5x5");
208 case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:
209 case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:
211 dcn = code == CV_BGR5652BGRA || code == CV_BGR5552BGRA || code == CV_BGR5652RGBA || code == CV_BGR5552RGBA ? 4 : 3;
212 CV_Assert((dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U);
213 bidx = code == CV_BGR5652BGR || code == CV_BGR5552BGR ||
214 code == CV_BGR5652BGRA || code == CV_BGR5552BGRA ? 0 : 2;
215 int greenbits = code == CV_BGR5652BGR || code == CV_BGR5652RGB ||
216 code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5;
217 dst.create(sz, CV_MAKETYPE(depth, dcn));
218 fromRGB5x5_caller(src, dst, bidx, greenbits, "RGB5x52RGB");
221 case CV_BGR5652GRAY: case CV_BGR5552GRAY:
223 CV_Assert(scn == 2 && depth == CV_8U);
224 dst.create(sz, CV_8UC1);
225 int greenbits = code == CV_BGR5652GRAY ? 6 : 5;
226 fromRGB5x5_caller(src, dst, -1, greenbits, "BGR5x52Gray");
229 case CV_GRAY2BGR565: case CV_GRAY2BGR555:
231 CV_Assert(scn == 1 && depth == CV_8U);
232 dst.create(sz, CV_8UC2);
233 int greenbits = code == CV_GRAY2BGR565 ? 6 : 5;
234 toRGB5x5_caller(src, dst, -1, greenbits, "Gray2BGR5x5");
237 case CV_RGB2GRAY: case CV_BGR2GRAY: case CV_RGBA2GRAY: case CV_BGRA2GRAY:
239 CV_Assert(scn == 3 || scn == 4);
240 bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2;
241 dst.create(sz, CV_MAKETYPE(depth, 1));
242 fromRGB_caller(src, dst, bidx, "RGB2Gray");
245 case CV_GRAY2BGR: case CV_GRAY2BGRA:
248 dcn = code == CV_GRAY2BGRA ? 4 : 3;
249 dst.create(sz, CV_MAKETYPE(depth, dcn));
250 toRGB_caller(src, dst, 0, "Gray2RGB");
253 case CV_BGR2YUV: case CV_RGB2YUV:
255 CV_Assert(scn == 3 || scn == 4);
256 bidx = code == CV_BGR2YUV ? 0 : 2;
257 dst.create(sz, CV_MAKETYPE(depth, 3));
258 fromRGB_caller(src, dst, bidx, "RGB2YUV");
261 case CV_YUV2BGR: case CV_YUV2RGB:
265 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
266 bidx = code == CV_YUV2BGR ? 0 : 2;
267 dst.create(sz, CV_MAKETYPE(depth, dcn));
268 toRGB_caller(src, dst, bidx, "YUV2RGB");
271 case CV_YUV2RGB_NV12: case CV_YUV2BGR_NV12:
272 case CV_YUV2RGBA_NV12: case CV_YUV2BGRA_NV12:
275 CV_Assert( sz.width % 2 == 0 && sz.height % 3 == 0 && depth == CV_8U );
276 dcn = code == CV_YUV2BGRA_NV12 || code == CV_YUV2RGBA_NV12 ? 4 : 3;
277 bidx = code == CV_YUV2BGRA_NV12 || code == CV_YUV2BGR_NV12 ? 0 : 2;
279 Size dstSz(sz.width, sz.height * 2 / 3);
280 dst.create(dstSz, CV_MAKETYPE(depth, dcn));
281 toRGB_caller(src, dst, bidx, "YUV2RGBA_NV12");
284 case CV_BGR2YCrCb: case CV_RGB2YCrCb:
286 CV_Assert(scn == 3 || scn == 4);
287 bidx = code == CV_BGR2YCrCb ? 0 : 2;
288 dst.create(sz, CV_MAKETYPE(depth, 3));
289 fromRGB_caller(src, dst, bidx, "RGB2YCrCb");
292 case CV_YCrCb2BGR: case CV_YCrCb2RGB:
296 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
297 bidx = code == CV_YCrCb2BGR ? 0 : 2;
298 dst.create(sz, CV_MAKETYPE(depth, dcn));
299 toRGB_caller(src, dst, bidx, "YCrCb2RGB");
302 case CV_BGR2XYZ: case CV_RGB2XYZ:
304 CV_Assert(scn == 3 || scn == 4);
305 bidx = code == CV_BGR2XYZ ? 0 : 2;
306 dst.create(sz, CV_MAKE_TYPE(depth, 3));
313 0.412453f, 0.357580f, 0.180423f,
314 0.212671f, 0.715160f, 0.072169f,
315 0.019334f, 0.119193f, 0.950227f
319 std::swap(coeffs[0], coeffs[2]);
320 std::swap(coeffs[3], coeffs[5]);
321 std::swap(coeffs[6], coeffs[8]);
323 Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
335 std::swap(coeffs[0], coeffs[2]);
336 std::swap(coeffs[3], coeffs[5]);
337 std::swap(coeffs[6], coeffs[8]);
339 Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
343 fromRGB_caller(src, dst, bidx, "RGB2XYZ", "", oclCoeffs);
346 case CV_XYZ2BGR: case CV_XYZ2RGB:
350 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4));
351 bidx = code == CV_XYZ2BGR ? 0 : 2;
352 dst.create(sz, CV_MAKE_TYPE(depth, dcn));
359 3.240479f, -1.53715f, -0.498535f,
360 -0.969256f, 1.875991f, 0.041556f,
361 0.055648f, -0.204043f, 1.057311f
365 std::swap(coeffs[0], coeffs[6]);
366 std::swap(coeffs[1], coeffs[7]);
367 std::swap(coeffs[2], coeffs[8]);
369 Mat(1, 9, CV_32FC1, &coeffs[0]).copyTo(c);
381 std::swap(coeffs[0], coeffs[6]);
382 std::swap(coeffs[1], coeffs[7]);
383 std::swap(coeffs[2], coeffs[8]);
385 Mat(1, 9, CV_32SC1, &coeffs[0]).copyTo(c);
389 toRGB_caller(src, dst, bidx, "XYZ2RGB", "", oclCoeffs);
392 case CV_BGR2HSV: case CV_RGB2HSV: case CV_BGR2HSV_FULL: case CV_RGB2HSV_FULL:
393 case CV_BGR2HLS: case CV_RGB2HLS: case CV_BGR2HLS_FULL: case CV_RGB2HLS_FULL:
395 CV_Assert((scn == 3 || scn == 4) && (depth == CV_8U || depth == CV_32F));
396 bidx = code == CV_BGR2HSV || code == CV_BGR2HLS ||
397 code == CV_BGR2HSV_FULL || code == CV_BGR2HLS_FULL ? 0 : 2;
398 int hrange = depth == CV_32F ? 360 : code == CV_BGR2HSV || code == CV_RGB2HSV ||
399 code == CV_BGR2HLS || code == CV_RGB2HLS ? 180 : 256;
400 bool is_hsv = code == CV_BGR2HSV || code == CV_RGB2HSV || code == CV_BGR2HSV_FULL || code == CV_RGB2HSV_FULL;
401 dst.create(sz, CV_MAKETYPE(depth, 3));
402 std::string kernelName = std::string("RGB2") + (is_hsv ? "HSV" : "HLS");
404 if (is_hsv && depth == CV_8U)
406 static oclMat sdiv_data;
407 static oclMat hdiv_data180;
408 static oclMat hdiv_data256;
409 static int sdiv_table[256];
410 static int hdiv_table180[256];
411 static int hdiv_table256[256];
412 static volatile bool initialized180 = false, initialized256 = false;
413 volatile bool & initialized = hrange == 180 ? initialized180 : initialized256;
417 int * const hdiv_table = hrange == 180 ? hdiv_table180 : hdiv_table256, hsv_shift = 12;
418 oclMat & hdiv_data = hrange == 180 ? hdiv_data180 : hdiv_data256;
420 sdiv_table[0] = hdiv_table180[0] = hdiv_table256[0] = 0;
422 int v = 255 << hsv_shift;
423 if (!initialized180 && !initialized256)
425 for(int i = 1; i < 256; i++ )
426 sdiv_table[i] = saturate_cast<int>(v/(1.*i));
427 sdiv_data.upload(Mat(1, 256, CV_32SC1, sdiv_table));
430 v = hrange << hsv_shift;
431 for (int i = 1; i < 256; i++ )
432 hdiv_table[i] = saturate_cast<int>(v/(6.*i));
434 hdiv_data.upload(Mat(1, 256, CV_32SC1, hdiv_table));
438 fromRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d", hrange), sdiv_data, hrange == 256 ? hdiv_data256 : hdiv_data180);
442 fromRGB_caller(src, dst, bidx, kernelName, format(" -D hscale=%f", hrange*(1.f/360.f)));
445 case CV_HSV2BGR: case CV_HSV2RGB: case CV_HSV2BGR_FULL: case CV_HSV2RGB_FULL:
446 case CV_HLS2BGR: case CV_HLS2RGB: case CV_HLS2BGR_FULL: case CV_HLS2RGB_FULL:
450 CV_Assert(scn == 3 && (dcn == 3 || dcn == 4) && (depth == CV_8U || depth == CV_32F));
451 bidx = code == CV_HSV2BGR || code == CV_HLS2BGR ||
452 code == CV_HSV2BGR_FULL || code == CV_HLS2BGR_FULL ? 0 : 2;
453 int hrange = depth == CV_32F ? 360 : code == CV_HSV2BGR || code == CV_HSV2RGB ||
454 code == CV_HLS2BGR || code == CV_HLS2RGB ? 180 : 255;
455 bool is_hsv = code == CV_HSV2BGR || code == CV_HSV2RGB ||
456 code == CV_HSV2BGR_FULL || code == CV_HSV2RGB_FULL;
458 dst.create(sz, CV_MAKETYPE(depth, dcn));
460 std::string kernelName = std::string(is_hsv ? "HSV" : "HLS") + "2RGB";
461 toRGB_caller(src, dst, bidx, kernelName, format(" -D hrange=%d -D hscale=%f", hrange, 6.f/hrange));
464 case CV_RGBA2mRGBA: case CV_mRGBA2RGBA:
466 CV_Assert(scn == 4 && depth == CV_8U);
467 dst.create(sz, CV_MAKETYPE(depth, 4));
468 std::string kernelName = code == CV_RGBA2mRGBA ? "RGBA2mRGBA" : "mRGBA2RGBA";
470 fromRGB_caller(src, dst, 0, kernelName);
474 CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
478 void cv::ocl::cvtColor(const oclMat &src, oclMat &dst, int code, int dcn)
480 cvtColor_caller(src, dst, code, dcn);