1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "precomp.hpp"
44 #include "opencl_kernels_stitching.hpp"
47 namespace cv { namespace cuda { namespace device
51 void addSrcWeightGpu16S(const PtrStep<short> src, const PtrStep<short> src_weight,
52 PtrStep<short> dst, PtrStep<short> dst_weight, cv::Rect &rc);
53 void addSrcWeightGpu32F(const PtrStep<short> src, const PtrStepf src_weight,
54 PtrStep<short> dst, PtrStepf dst_weight, cv::Rect &rc);
55 void normalizeUsingWeightMapGpu16S(const PtrStep<short> weight, PtrStep<short> src,
56 const int width, const int height);
57 void normalizeUsingWeightMapGpu32F(const PtrStepf weight, PtrStep<short> src,
58 const int width, const int height);
66 static const float WEIGHT_EPS = 1e-5f;
68 Ptr<Blender> Blender::createDefault(int type, bool try_gpu)
71 return makePtr<Blender>();
73 return makePtr<FeatherBlender>();
74 if (type == MULTI_BAND)
75 return makePtr<MultiBandBlender>(try_gpu);
76 CV_Error(Error::StsBadArg, "unsupported blending method");
77 return Ptr<Blender>();
81 void Blender::prepare(const std::vector<Point> &corners, const std::vector<Size> &sizes)
83 prepare(resultRoi(corners, sizes));
87 void Blender::prepare(Rect dst_roi)
89 dst_.create(dst_roi.size(), CV_16SC3);
90 dst_.setTo(Scalar::all(0));
91 dst_mask_.create(dst_roi.size(), CV_8U);
92 dst_mask_.setTo(Scalar::all(0));
97 void Blender::feed(InputArray _img, InputArray _mask, Point tl)
99 Mat img = _img.getMat();
100 Mat mask = _mask.getMat();
101 Mat dst = dst_.getMat(ACCESS_RW);
102 Mat dst_mask = dst_mask_.getMat(ACCESS_RW);
104 CV_Assert(img.type() == CV_16SC3);
105 CV_Assert(mask.type() == CV_8U);
106 int dx = tl.x - dst_roi_.x;
107 int dy = tl.y - dst_roi_.y;
109 for (int y = 0; y < img.rows; ++y)
111 const Point3_<short> *src_row = img.ptr<Point3_<short> >(y);
112 Point3_<short> *dst_row = dst.ptr<Point3_<short> >(dy + y);
113 const uchar *mask_row = mask.ptr<uchar>(y);
114 uchar *dst_mask_row = dst_mask.ptr<uchar>(dy + y);
116 for (int x = 0; x < img.cols; ++x)
119 dst_row[dx + x] = src_row[x];
120 dst_mask_row[dx + x] |= mask_row[x];
126 void Blender::blend(InputOutputArray dst, InputOutputArray dst_mask)
129 compare(dst_mask_, 0, mask, CMP_EQ);
130 dst_.setTo(Scalar::all(0), mask);
132 dst_mask.assign(dst_mask_);
138 void FeatherBlender::prepare(Rect dst_roi)
140 Blender::prepare(dst_roi);
141 dst_weight_map_.create(dst_roi.size(), CV_32F);
142 dst_weight_map_.setTo(0);
146 void FeatherBlender::feed(InputArray _img, InputArray mask, Point tl)
148 Mat img = _img.getMat();
149 Mat dst = dst_.getMat(ACCESS_RW);
151 CV_Assert(img.type() == CV_16SC3);
152 CV_Assert(mask.type() == CV_8U);
154 createWeightMap(mask, sharpness_, weight_map_);
155 Mat weight_map = weight_map_.getMat(ACCESS_READ);
156 Mat dst_weight_map = dst_weight_map_.getMat(ACCESS_RW);
158 int dx = tl.x - dst_roi_.x;
159 int dy = tl.y - dst_roi_.y;
161 for (int y = 0; y < img.rows; ++y)
163 const Point3_<short>* src_row = img.ptr<Point3_<short> >(y);
164 Point3_<short>* dst_row = dst.ptr<Point3_<short> >(dy + y);
165 const float* weight_row = weight_map.ptr<float>(y);
166 float* dst_weight_row = dst_weight_map.ptr<float>(dy + y);
168 for (int x = 0; x < img.cols; ++x)
170 dst_row[dx + x].x += static_cast<short>(src_row[x].x * weight_row[x]);
171 dst_row[dx + x].y += static_cast<short>(src_row[x].y * weight_row[x]);
172 dst_row[dx + x].z += static_cast<short>(src_row[x].z * weight_row[x]);
173 dst_weight_row[dx + x] += weight_row[x];
179 void FeatherBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
181 normalizeUsingWeightMap(dst_weight_map_, dst_);
182 compare(dst_weight_map_, WEIGHT_EPS, dst_mask_, CMP_GT);
183 Blender::blend(dst, dst_mask);
187 Rect FeatherBlender::createWeightMaps(const std::vector<UMat> &masks, const std::vector<Point> &corners,
188 std::vector<UMat> &weight_maps)
190 weight_maps.resize(masks.size());
191 for (size_t i = 0; i < masks.size(); ++i)
192 createWeightMap(masks[i], sharpness_, weight_maps[i]);
194 Rect dst_roi = resultRoi(corners, masks);
195 Mat weights_sum(dst_roi.size(), CV_32F);
196 weights_sum.setTo(0);
198 for (size_t i = 0; i < weight_maps.size(); ++i)
200 Rect roi(corners[i].x - dst_roi.x, corners[i].y - dst_roi.y,
201 weight_maps[i].cols, weight_maps[i].rows);
202 add(weights_sum(roi), weight_maps[i], weights_sum(roi));
205 for (size_t i = 0; i < weight_maps.size(); ++i)
207 Rect roi(corners[i].x - dst_roi.x, corners[i].y - dst_roi.y,
208 weight_maps[i].cols, weight_maps[i].rows);
209 Mat tmp = weights_sum(roi);
210 tmp.setTo(1, tmp < std::numeric_limits<float>::epsilon());
211 divide(weight_maps[i], tmp, weight_maps[i]);
218 MultiBandBlender::MultiBandBlender(int try_gpu, int num_bands, int weight_type)
221 setNumBands(num_bands);
223 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
224 can_use_gpu_ = try_gpu && cuda::getCudaEnabledDeviceCount();
227 can_use_gpu_ = false;
230 CV_Assert(weight_type == CV_32F || weight_type == CV_16S);
231 weight_type_ = weight_type;
235 void MultiBandBlender::prepare(Rect dst_roi)
237 dst_roi_final_ = dst_roi;
239 // Crop unnecessary bands
240 double max_len = static_cast<double>(std::max(dst_roi.width, dst_roi.height));
241 num_bands_ = std::min(actual_num_bands_, static_cast<int>(ceil(std::log(max_len) / std::log(2.0))));
243 // Add border to the final image, to ensure sizes are divided by (1 << num_bands_)
244 dst_roi.width += ((1 << num_bands_) - dst_roi.width % (1 << num_bands_)) % (1 << num_bands_);
245 dst_roi.height += ((1 << num_bands_) - dst_roi.height % (1 << num_bands_)) % (1 << num_bands_);
247 Blender::prepare(dst_roi);
249 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
252 gpu_dst_pyr_laplace_.resize(num_bands_ + 1);
253 gpu_dst_pyr_laplace_[0].create(dst_roi.size(), CV_16SC3);
254 gpu_dst_pyr_laplace_[0].setTo(Scalar::all(0));
256 gpu_dst_band_weights_.resize(num_bands_ + 1);
257 gpu_dst_band_weights_[0].create(dst_roi.size(), weight_type_);
258 gpu_dst_band_weights_[0].setTo(0);
260 for (int i = 1; i <= num_bands_; ++i)
262 gpu_dst_pyr_laplace_[i].create((gpu_dst_pyr_laplace_[i - 1].rows + 1) / 2,
263 (gpu_dst_pyr_laplace_[i - 1].cols + 1) / 2, CV_16SC3);
264 gpu_dst_band_weights_[i].create((gpu_dst_band_weights_[i - 1].rows + 1) / 2,
265 (gpu_dst_band_weights_[i - 1].cols + 1) / 2, weight_type_);
266 gpu_dst_pyr_laplace_[i].setTo(Scalar::all(0));
267 gpu_dst_band_weights_[i].setTo(0);
273 dst_pyr_laplace_.resize(num_bands_ + 1);
274 dst_pyr_laplace_[0] = dst_;
276 dst_band_weights_.resize(num_bands_ + 1);
277 dst_band_weights_[0].create(dst_roi.size(), weight_type_);
278 dst_band_weights_[0].setTo(0);
280 for (int i = 1; i <= num_bands_; ++i)
282 dst_pyr_laplace_[i].create((dst_pyr_laplace_[i - 1].rows + 1) / 2,
283 (dst_pyr_laplace_[i - 1].cols + 1) / 2, CV_16SC3);
284 dst_band_weights_[i].create((dst_band_weights_[i - 1].rows + 1) / 2,
285 (dst_band_weights_[i - 1].cols + 1) / 2, weight_type_);
286 dst_pyr_laplace_[i].setTo(Scalar::all(0));
287 dst_band_weights_[i].setTo(0);
293 static bool ocl_MultiBandBlender_feed(InputArray _src, InputArray _weight,
294 InputOutputArray _dst, InputOutputArray _dst_weight)
296 String buildOptions = "-D DEFINE_feed";
297 ocl::buildOptionsAddMatrixDescription(buildOptions, "src", _src);
298 ocl::buildOptionsAddMatrixDescription(buildOptions, "weight", _weight);
299 ocl::buildOptionsAddMatrixDescription(buildOptions, "dst", _dst);
300 ocl::buildOptionsAddMatrixDescription(buildOptions, "dstWeight", _dst_weight);
301 ocl::Kernel k("feed", ocl::stitching::multibandblend_oclsrc, buildOptions);
305 UMat src = _src.getUMat();
307 k.args(ocl::KernelArg::ReadOnly(src),
308 ocl::KernelArg::ReadOnly(_weight.getUMat()),
309 ocl::KernelArg::ReadWrite(_dst.getUMat()),
310 ocl::KernelArg::ReadWrite(_dst_weight.getUMat())
313 size_t globalsize[2] = {(size_t)src.cols, (size_t)src.rows };
314 return k.run(2, globalsize, NULL, false);
318 void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
321 int64 t = getTickCount();
324 UMat img = _img.getUMat();
325 CV_Assert(img.type() == CV_16SC3 || img.type() == CV_8UC3);
326 CV_Assert(mask.type() == CV_8U);
328 // Keep source image in memory with small border
329 int gap = 3 * (1 << num_bands_);
330 Point tl_new(std::max(dst_roi_.x, tl.x - gap),
331 std::max(dst_roi_.y, tl.y - gap));
332 Point br_new(std::min(dst_roi_.br().x, tl.x + img.cols + gap),
333 std::min(dst_roi_.br().y, tl.y + img.rows + gap));
335 // Ensure coordinates of top-left, bottom-right corners are divided by (1 << num_bands_).
336 // After that scale between layers is exactly 2.
338 // We do it to avoid interpolation problems when keeping sub-images only. There is no such problem when
339 // image is bordered to have size equal to the final image size, but this is too memory hungry approach.
340 tl_new.x = dst_roi_.x + (((tl_new.x - dst_roi_.x) >> num_bands_) << num_bands_);
341 tl_new.y = dst_roi_.y + (((tl_new.y - dst_roi_.y) >> num_bands_) << num_bands_);
342 int width = br_new.x - tl_new.x;
343 int height = br_new.y - tl_new.y;
344 width += ((1 << num_bands_) - width % (1 << num_bands_)) % (1 << num_bands_);
345 height += ((1 << num_bands_) - height % (1 << num_bands_)) % (1 << num_bands_);
346 br_new.x = tl_new.x + width;
347 br_new.y = tl_new.y + height;
348 int dy = std::max(br_new.y - dst_roi_.br().y, 0);
349 int dx = std::max(br_new.x - dst_roi_.br().x, 0);
350 tl_new.x -= dx; br_new.x -= dx;
351 tl_new.y -= dy; br_new.y -= dy;
353 int top = tl.y - tl_new.y;
354 int left = tl.x - tl_new.x;
355 int bottom = br_new.y - tl.y - img.rows;
356 int right = br_new.x - tl.x - img.cols;
358 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
361 // Create the source image Laplacian pyramid
362 cuda::GpuMat gpu_img;
364 cuda::GpuMat img_with_border;
365 cuda::copyMakeBorder(gpu_img, img_with_border, top, bottom, left, right, BORDER_REFLECT);
366 std::vector<cuda::GpuMat> gpu_src_pyr_laplace(num_bands_ + 1);
367 img_with_border.convertTo(gpu_src_pyr_laplace[0], CV_16S);
368 for (int i = 0; i < num_bands_; ++i)
369 cuda::pyrDown(gpu_src_pyr_laplace[i], gpu_src_pyr_laplace[i + 1]);
370 for (int i = 0; i < num_bands_; ++i)
373 cuda::pyrUp(gpu_src_pyr_laplace[i + 1], up);
374 cuda::subtract(gpu_src_pyr_laplace[i], up, gpu_src_pyr_laplace[i]);
377 // Create the weight map Gaussian pyramid
378 cuda::GpuMat gpu_mask;
379 gpu_mask.upload(mask);
380 cuda::GpuMat weight_map;
381 std::vector<cuda::GpuMat> gpu_weight_pyr_gauss(num_bands_ + 1);
383 if (weight_type_ == CV_32F)
385 gpu_mask.convertTo(weight_map, CV_32F, 1. / 255.);
387 else // weight_type_ == CV_16S
389 gpu_mask.convertTo(weight_map, CV_16S);
390 cuda::GpuMat add_mask;
391 cuda::compare(gpu_mask, 0, add_mask, CMP_NE);
392 cuda::add(weight_map, Scalar::all(1), weight_map, add_mask);
394 cuda::copyMakeBorder(weight_map, gpu_weight_pyr_gauss[0], top, bottom, left, right, BORDER_CONSTANT);
395 for (int i = 0; i < num_bands_; ++i)
396 cuda::pyrDown(gpu_weight_pyr_gauss[i], gpu_weight_pyr_gauss[i + 1]);
398 int y_tl = tl_new.y - dst_roi_.y;
399 int y_br = br_new.y - dst_roi_.y;
400 int x_tl = tl_new.x - dst_roi_.x;
401 int x_br = br_new.x - dst_roi_.x;
403 // Add weighted layer of the source image to the final Laplacian pyramid layer
404 for (int i = 0; i <= num_bands_; ++i)
406 Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl);
407 cuda::GpuMat &_src_pyr_laplace = gpu_src_pyr_laplace[i];
408 cuda::GpuMat _dst_pyr_laplace = gpu_dst_pyr_laplace_[i](rc);
409 cuda::GpuMat &_weight_pyr_gauss = gpu_weight_pyr_gauss[i];
410 cuda::GpuMat _dst_band_weights = gpu_dst_band_weights_[i](rc);
412 using namespace cv::cuda::device::blend;
413 if (weight_type_ == CV_32F)
415 addSrcWeightGpu32F(_src_pyr_laplace, _weight_pyr_gauss, _dst_pyr_laplace, _dst_band_weights, rc);
419 addSrcWeightGpu16S(_src_pyr_laplace, _weight_pyr_gauss, _dst_pyr_laplace, _dst_band_weights, rc);
421 x_tl /= 2; y_tl /= 2;
422 x_br /= 2; y_br /= 2;
428 // Create the source image Laplacian pyramid
429 UMat img_with_border;
430 copyMakeBorder(_img, img_with_border, top, bottom, left, right,
432 LOGLN(" Add border to the source image, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
437 std::vector<UMat> src_pyr_laplace;
438 createLaplacePyr(img_with_border, num_bands_, src_pyr_laplace);
440 LOGLN(" Create the source image Laplacian pyramid, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
445 // Create the weight map Gaussian pyramid
447 std::vector<UMat> weight_pyr_gauss(num_bands_ + 1);
449 if(weight_type_ == CV_32F)
451 mask.getUMat().convertTo(weight_map, CV_32F, 1./255.);
453 else // weight_type_ == CV_16S
455 mask.getUMat().convertTo(weight_map, CV_16S);
457 compare(mask, 0, add_mask, CMP_NE);
458 add(weight_map, Scalar::all(1), weight_map, add_mask);
461 copyMakeBorder(weight_map, weight_pyr_gauss[0], top, bottom, left, right, BORDER_CONSTANT);
463 for (int i = 0; i < num_bands_; ++i)
464 pyrDown(weight_pyr_gauss[i], weight_pyr_gauss[i + 1]);
466 LOGLN(" Create the weight map Gaussian pyramid, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
471 int y_tl = tl_new.y - dst_roi_.y;
472 int y_br = br_new.y - dst_roi_.y;
473 int x_tl = tl_new.x - dst_roi_.x;
474 int x_br = br_new.x - dst_roi_.x;
476 // Add weighted layer of the source image to the final Laplacian pyramid layer
477 for (int i = 0; i <= num_bands_; ++i)
479 Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl);
481 if ( !cv::ocl::isOpenCLActivated() ||
482 !ocl_MultiBandBlender_feed(src_pyr_laplace[i], weight_pyr_gauss[i],
483 dst_pyr_laplace_[i](rc), dst_band_weights_[i](rc)) )
486 Mat _src_pyr_laplace = src_pyr_laplace[i].getMat(ACCESS_READ);
487 Mat _dst_pyr_laplace = dst_pyr_laplace_[i](rc).getMat(ACCESS_RW);
488 Mat _weight_pyr_gauss = weight_pyr_gauss[i].getMat(ACCESS_READ);
489 Mat _dst_band_weights = dst_band_weights_[i](rc).getMat(ACCESS_RW);
490 if(weight_type_ == CV_32F)
492 for (int y = 0; y < rc.height; ++y)
494 const Point3_<short>* src_row = _src_pyr_laplace.ptr<Point3_<short> >(y);
495 Point3_<short>* dst_row = _dst_pyr_laplace.ptr<Point3_<short> >(y);
496 const float* weight_row = _weight_pyr_gauss.ptr<float>(y);
497 float* dst_weight_row = _dst_band_weights.ptr<float>(y);
499 for (int x = 0; x < rc.width; ++x)
501 dst_row[x].x += static_cast<short>(src_row[x].x * weight_row[x]);
502 dst_row[x].y += static_cast<short>(src_row[x].y * weight_row[x]);
503 dst_row[x].z += static_cast<short>(src_row[x].z * weight_row[x]);
504 dst_weight_row[x] += weight_row[x];
508 else // weight_type_ == CV_16S
510 for (int y = 0; y < y_br - y_tl; ++y)
512 const Point3_<short>* src_row = _src_pyr_laplace.ptr<Point3_<short> >(y);
513 Point3_<short>* dst_row = _dst_pyr_laplace.ptr<Point3_<short> >(y);
514 const short* weight_row = _weight_pyr_gauss.ptr<short>(y);
515 short* dst_weight_row = _dst_band_weights.ptr<short>(y);
517 for (int x = 0; x < x_br - x_tl; ++x)
519 dst_row[x].x += short((src_row[x].x * weight_row[x]) >> 8);
520 dst_row[x].y += short((src_row[x].y * weight_row[x]) >> 8);
521 dst_row[x].z += short((src_row[x].z * weight_row[x]) >> 8);
522 dst_weight_row[x] += weight_row[x];
530 CV_IMPL_ADD(CV_IMPL_OCL);
534 x_tl /= 2; y_tl /= 2;
535 x_br /= 2; y_br /= 2;
538 LOGLN(" Add weighted layer of the source image to the final Laplacian pyramid layer, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
542 void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
544 cv::UMat dst_band_weights_0;
545 Rect dst_rc(0, 0, dst_roi_final_.width, dst_roi_final_.height);
546 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
549 for (int i = 0; i <= num_bands_; ++i)
551 cuda::GpuMat dst_i = gpu_dst_pyr_laplace_[i];
552 cuda::GpuMat weight_i = gpu_dst_band_weights_[i];
554 using namespace ::cv::cuda::device::blend;
555 if (weight_type_ == CV_32F)
557 normalizeUsingWeightMapGpu32F(weight_i, dst_i, weight_i.cols, weight_i.rows);
561 normalizeUsingWeightMapGpu16S(weight_i, dst_i, weight_i.cols, weight_i.rows);
565 // Restore image from Laplacian pyramid
566 for (size_t i = num_bands_; i > 0; --i)
569 cuda::pyrUp(gpu_dst_pyr_laplace_[i], up);
570 cuda::add(up, gpu_dst_pyr_laplace_[i - 1], gpu_dst_pyr_laplace_[i - 1]);
573 gpu_dst_pyr_laplace_[0](dst_rc).download(dst_);
574 gpu_dst_band_weights_[0].download(dst_band_weights_0);
576 gpu_dst_pyr_laplace_.clear();
577 gpu_dst_band_weights_.clear();
582 for (int i = 0; i <= num_bands_; ++i)
583 normalizeUsingWeightMap(dst_band_weights_[i], dst_pyr_laplace_[i]);
585 restoreImageFromLaplacePyr(dst_pyr_laplace_);
587 dst_ = dst_pyr_laplace_[0](dst_rc);
588 dst_band_weights_0 = dst_band_weights_[0];
590 dst_pyr_laplace_.clear();
591 dst_band_weights_.clear();
594 compare(dst_band_weights_0(dst_rc), WEIGHT_EPS, dst_mask_, CMP_GT);
596 Blender::blend(dst, dst_mask);
600 //////////////////////////////////////////////////////////////////////////////
601 // Auxiliary functions
604 static bool ocl_normalizeUsingWeightMap(InputArray _weight, InputOutputArray _mat)
606 String buildOptions = "-D DEFINE_normalizeUsingWeightMap";
607 ocl::buildOptionsAddMatrixDescription(buildOptions, "mat", _mat);
608 ocl::buildOptionsAddMatrixDescription(buildOptions, "weight", _weight);
609 ocl::Kernel k("normalizeUsingWeightMap", ocl::stitching::multibandblend_oclsrc, buildOptions);
613 UMat mat = _mat.getUMat();
615 k.args(ocl::KernelArg::ReadWrite(mat),
616 ocl::KernelArg::ReadOnly(_weight.getUMat())
619 size_t globalsize[2] = {(size_t)mat.cols, (size_t)mat.rows };
620 return k.run(2, globalsize, NULL, false);
624 void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src)
630 if ( !cv::ocl::isOpenCLActivated() ||
631 !ocl_normalizeUsingWeightMap(_weight, _src) )
635 weight = _weight.getMat();
637 CV_Assert(src.type() == CV_16SC3);
639 if (weight.type() == CV_32FC1)
641 for (int y = 0; y < src.rows; ++y)
643 Point3_<short> *row = src.ptr<Point3_<short> >(y);
644 const float *weight_row = weight.ptr<float>(y);
646 for (int x = 0; x < src.cols; ++x)
648 row[x].x = static_cast<short>(row[x].x / (weight_row[x] + WEIGHT_EPS));
649 row[x].y = static_cast<short>(row[x].y / (weight_row[x] + WEIGHT_EPS));
650 row[x].z = static_cast<short>(row[x].z / (weight_row[x] + WEIGHT_EPS));
656 CV_Assert(weight.type() == CV_16SC1);
658 for (int y = 0; y < src.rows; ++y)
660 const short *weight_row = weight.ptr<short>(y);
661 Point3_<short> *row = src.ptr<Point3_<short> >(y);
663 for (int x = 0; x < src.cols; ++x)
665 int w = weight_row[x] + 1;
666 row[x].x = static_cast<short>((row[x].x << 8) / w);
667 row[x].y = static_cast<short>((row[x].y << 8) / w);
668 row[x].z = static_cast<short>((row[x].z << 8) / w);
676 CV_IMPL_ADD(CV_IMPL_OCL);
682 void createWeightMap(InputArray mask, float sharpness, InputOutputArray weight)
684 CV_Assert(mask.type() == CV_8U);
685 distanceTransform(mask, weight, DIST_L1, 3);
687 multiply(weight, sharpness, tmp);
688 threshold(tmp, weight, 1.f, 1.f, THRESH_TRUNC);
692 void createLaplacePyr(InputArray img, int num_levels, std::vector<UMat> &pyr)
694 pyr.resize(num_levels + 1);
696 if(img.depth() == CV_8U)
700 img.getUMat().convertTo(pyr[0], CV_16S);
705 UMat current = img.getUMat();
706 pyrDown(img, downNext);
708 for(int i = 1; i < num_levels; ++i)
713 pyrDown(downNext, lvl_down);
714 pyrUp(downNext, lvl_up, current.size());
715 subtract(current, lvl_up, pyr[i-1], noArray(), CV_16S);
723 pyrUp(downNext, lvl_up, current.size());
724 subtract(current, lvl_up, pyr[num_levels-1], noArray(), CV_16S);
726 downNext.convertTo(pyr[num_levels], CV_16S);
731 pyr[0] = img.getUMat();
732 for (int i = 0; i < num_levels; ++i)
733 pyrDown(pyr[i], pyr[i + 1]);
735 for (int i = 0; i < num_levels; ++i)
737 pyrUp(pyr[i + 1], tmp, pyr[i].size());
738 subtract(pyr[i], tmp, pyr[i]);
744 void createLaplacePyrGpu(InputArray img, int num_levels, std::vector<UMat> &pyr)
746 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
747 pyr.resize(num_levels + 1);
749 std::vector<cuda::GpuMat> gpu_pyr(num_levels + 1);
750 gpu_pyr[0].upload(img);
751 for (int i = 0; i < num_levels; ++i)
752 cuda::pyrDown(gpu_pyr[i], gpu_pyr[i + 1]);
755 for (int i = 0; i < num_levels; ++i)
757 cuda::pyrUp(gpu_pyr[i + 1], tmp);
758 cuda::subtract(gpu_pyr[i], tmp, gpu_pyr[i]);
759 gpu_pyr[i].download(pyr[i]);
762 gpu_pyr[num_levels].download(pyr[num_levels]);
767 CV_Error(Error::StsNotImplemented, "CUDA optimization is unavailable");
772 void restoreImageFromLaplacePyr(std::vector<UMat> &pyr)
777 for (size_t i = pyr.size() - 1; i > 0; --i)
779 pyrUp(pyr[i], tmp, pyr[i - 1].size());
780 add(tmp, pyr[i - 1], pyr[i - 1]);
785 void restoreImageFromLaplacePyrGpu(std::vector<UMat> &pyr)
787 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
791 std::vector<cuda::GpuMat> gpu_pyr(pyr.size());
792 for (size_t i = 0; i < pyr.size(); ++i)
793 gpu_pyr[i].upload(pyr[i]);
796 for (size_t i = pyr.size() - 1; i > 0; --i)
798 cuda::pyrUp(gpu_pyr[i], tmp);
799 cuda::add(tmp, gpu_pyr[i - 1], gpu_pyr[i - 1]);
802 gpu_pyr[0].download(pyr[0]);
805 CV_Error(Error::StsNotImplemented, "CUDA optimization is unavailable");
809 } // namespace detail