next: drop HAVE_TEGRA_OPTIMIZATION/TADP
[platform/upstream/opencv.git] / modules / stitching / src / blenders.cpp
1 /*M///////////////////////////////////////////////////////////////////////////////////////
2 //
3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
4 //
5 //  By downloading, copying, installing or using the software you agree to this license.
6 //  If you do not agree to this license, do not download, install,
7 //  copy or use the software.
8 //
9 //
10 //                          License Agreement
11 //                For Open Source Computer Vision Library
12 //
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
16 //
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
19 //
20 //   * Redistribution's of source code must retain the above copyright notice,
21 //     this list of conditions and the following disclaimer.
22 //
23 //   * Redistribution's in binary form must reproduce the above copyright notice,
24 //     this list of conditions and the following disclaimer in the documentation
25 //     and/or other materials provided with the distribution.
26 //
27 //   * The name of the copyright holders may not be used to endorse or promote products
28 //     derived from this software without specific prior written permission.
29 //
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
40 //
41 //M*/
42
43 #include "precomp.hpp"
44 #include "opencl_kernels_stitching.hpp"
45
46 #ifdef HAVE_CUDA
47     namespace cv { namespace cuda { namespace device
48     {
49         namespace blend
50         {
51             void addSrcWeightGpu16S(const PtrStep<short> src, const PtrStep<short> src_weight,
52                                     PtrStep<short> dst, PtrStep<short> dst_weight, cv::Rect &rc);
53             void addSrcWeightGpu32F(const PtrStep<short> src, const PtrStepf src_weight,
54                                     PtrStep<short> dst, PtrStepf dst_weight, cv::Rect &rc);
55             void normalizeUsingWeightMapGpu16S(const PtrStep<short> weight, PtrStep<short> src,
56                                                const int width, const int height);
57             void normalizeUsingWeightMapGpu32F(const PtrStepf weight, PtrStep<short> src,
58                                                const int width, const int height);
59         }
60     }}}
61 #endif
62
63 namespace cv {
64 namespace detail {
65
66 static const float WEIGHT_EPS = 1e-5f;
67
68 Ptr<Blender> Blender::createDefault(int type, bool try_gpu)
69 {
70     if (type == NO)
71         return makePtr<Blender>();
72     if (type == FEATHER)
73         return makePtr<FeatherBlender>();
74     if (type == MULTI_BAND)
75         return makePtr<MultiBandBlender>(try_gpu);
76     CV_Error(Error::StsBadArg, "unsupported blending method");
77     return Ptr<Blender>();
78 }
79
80
81 void Blender::prepare(const std::vector<Point> &corners, const std::vector<Size> &sizes)
82 {
83     prepare(resultRoi(corners, sizes));
84 }
85
86
87 void Blender::prepare(Rect dst_roi)
88 {
89     dst_.create(dst_roi.size(), CV_16SC3);
90     dst_.setTo(Scalar::all(0));
91     dst_mask_.create(dst_roi.size(), CV_8U);
92     dst_mask_.setTo(Scalar::all(0));
93     dst_roi_ = dst_roi;
94 }
95
96
97 void Blender::feed(InputArray _img, InputArray _mask, Point tl)
98 {
99     Mat img = _img.getMat();
100     Mat mask = _mask.getMat();
101     Mat dst = dst_.getMat(ACCESS_RW);
102     Mat dst_mask = dst_mask_.getMat(ACCESS_RW);
103
104     CV_Assert(img.type() == CV_16SC3);
105     CV_Assert(mask.type() == CV_8U);
106     int dx = tl.x - dst_roi_.x;
107     int dy = tl.y - dst_roi_.y;
108
109     for (int y = 0; y < img.rows; ++y)
110     {
111         const Point3_<short> *src_row = img.ptr<Point3_<short> >(y);
112         Point3_<short> *dst_row = dst.ptr<Point3_<short> >(dy + y);
113         const uchar *mask_row = mask.ptr<uchar>(y);
114         uchar *dst_mask_row = dst_mask.ptr<uchar>(dy + y);
115
116         for (int x = 0; x < img.cols; ++x)
117         {
118             if (mask_row[x])
119                 dst_row[dx + x] = src_row[x];
120             dst_mask_row[dx + x] |= mask_row[x];
121         }
122     }
123 }
124
125
126 void Blender::blend(InputOutputArray dst, InputOutputArray dst_mask)
127 {
128     UMat mask;
129     compare(dst_mask_, 0, mask, CMP_EQ);
130     dst_.setTo(Scalar::all(0), mask);
131     dst.assign(dst_);
132     dst_mask.assign(dst_mask_);
133     dst_.release();
134     dst_mask_.release();
135 }
136
137
138 void FeatherBlender::prepare(Rect dst_roi)
139 {
140     Blender::prepare(dst_roi);
141     dst_weight_map_.create(dst_roi.size(), CV_32F);
142     dst_weight_map_.setTo(0);
143 }
144
145
146 void FeatherBlender::feed(InputArray _img, InputArray mask, Point tl)
147 {
148     Mat img = _img.getMat();
149     Mat dst = dst_.getMat(ACCESS_RW);
150
151     CV_Assert(img.type() == CV_16SC3);
152     CV_Assert(mask.type() == CV_8U);
153
154     createWeightMap(mask, sharpness_, weight_map_);
155     Mat weight_map = weight_map_.getMat(ACCESS_READ);
156     Mat dst_weight_map = dst_weight_map_.getMat(ACCESS_RW);
157
158     int dx = tl.x - dst_roi_.x;
159     int dy = tl.y - dst_roi_.y;
160
161     for (int y = 0; y < img.rows; ++y)
162     {
163         const Point3_<short>* src_row = img.ptr<Point3_<short> >(y);
164         Point3_<short>* dst_row = dst.ptr<Point3_<short> >(dy + y);
165         const float* weight_row = weight_map.ptr<float>(y);
166         float* dst_weight_row = dst_weight_map.ptr<float>(dy + y);
167
168         for (int x = 0; x < img.cols; ++x)
169         {
170             dst_row[dx + x].x += static_cast<short>(src_row[x].x * weight_row[x]);
171             dst_row[dx + x].y += static_cast<short>(src_row[x].y * weight_row[x]);
172             dst_row[dx + x].z += static_cast<short>(src_row[x].z * weight_row[x]);
173             dst_weight_row[dx + x] += weight_row[x];
174         }
175     }
176 }
177
178
179 void FeatherBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
180 {
181     normalizeUsingWeightMap(dst_weight_map_, dst_);
182     compare(dst_weight_map_, WEIGHT_EPS, dst_mask_, CMP_GT);
183     Blender::blend(dst, dst_mask);
184 }
185
186
187 Rect FeatherBlender::createWeightMaps(const std::vector<UMat> &masks, const std::vector<Point> &corners,
188                                       std::vector<UMat> &weight_maps)
189 {
190     weight_maps.resize(masks.size());
191     for (size_t i = 0; i < masks.size(); ++i)
192         createWeightMap(masks[i], sharpness_, weight_maps[i]);
193
194     Rect dst_roi = resultRoi(corners, masks);
195     Mat weights_sum(dst_roi.size(), CV_32F);
196     weights_sum.setTo(0);
197
198     for (size_t i = 0; i < weight_maps.size(); ++i)
199     {
200         Rect roi(corners[i].x - dst_roi.x, corners[i].y - dst_roi.y,
201                  weight_maps[i].cols, weight_maps[i].rows);
202         add(weights_sum(roi), weight_maps[i], weights_sum(roi));
203     }
204
205     for (size_t i = 0; i < weight_maps.size(); ++i)
206     {
207         Rect roi(corners[i].x - dst_roi.x, corners[i].y - dst_roi.y,
208                  weight_maps[i].cols, weight_maps[i].rows);
209         Mat tmp = weights_sum(roi);
210         tmp.setTo(1, tmp < std::numeric_limits<float>::epsilon());
211         divide(weight_maps[i], tmp, weight_maps[i]);
212     }
213
214     return dst_roi;
215 }
216
217
218 MultiBandBlender::MultiBandBlender(int try_gpu, int num_bands, int weight_type)
219 {
220     num_bands_ = 0;
221     setNumBands(num_bands);
222
223 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
224     can_use_gpu_ = try_gpu && cuda::getCudaEnabledDeviceCount();
225 #else
226     (void) try_gpu;
227     can_use_gpu_ = false;
228 #endif
229
230     CV_Assert(weight_type == CV_32F || weight_type == CV_16S);
231     weight_type_ = weight_type;
232 }
233
234
235 void MultiBandBlender::prepare(Rect dst_roi)
236 {
237     dst_roi_final_ = dst_roi;
238
239     // Crop unnecessary bands
240     double max_len = static_cast<double>(std::max(dst_roi.width, dst_roi.height));
241     num_bands_ = std::min(actual_num_bands_, static_cast<int>(ceil(std::log(max_len) / std::log(2.0))));
242
243     // Add border to the final image, to ensure sizes are divided by (1 << num_bands_)
244     dst_roi.width += ((1 << num_bands_) - dst_roi.width % (1 << num_bands_)) % (1 << num_bands_);
245     dst_roi.height += ((1 << num_bands_) - dst_roi.height % (1 << num_bands_)) % (1 << num_bands_);
246
247     Blender::prepare(dst_roi);
248
249 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
250     if (can_use_gpu_)
251     {
252         gpu_dst_pyr_laplace_.resize(num_bands_ + 1);
253         gpu_dst_pyr_laplace_[0].create(dst_roi.size(), CV_16SC3);
254         gpu_dst_pyr_laplace_[0].setTo(Scalar::all(0));
255
256         gpu_dst_band_weights_.resize(num_bands_ + 1);
257         gpu_dst_band_weights_[0].create(dst_roi.size(), weight_type_);
258         gpu_dst_band_weights_[0].setTo(0);
259
260         for (int i = 1; i <= num_bands_; ++i)
261         {
262             gpu_dst_pyr_laplace_[i].create((gpu_dst_pyr_laplace_[i - 1].rows + 1) / 2,
263                 (gpu_dst_pyr_laplace_[i - 1].cols + 1) / 2, CV_16SC3);
264             gpu_dst_band_weights_[i].create((gpu_dst_band_weights_[i - 1].rows + 1) / 2,
265                 (gpu_dst_band_weights_[i - 1].cols + 1) / 2, weight_type_);
266             gpu_dst_pyr_laplace_[i].setTo(Scalar::all(0));
267             gpu_dst_band_weights_[i].setTo(0);
268         }
269     }
270     else
271 #endif
272     {
273         dst_pyr_laplace_.resize(num_bands_ + 1);
274         dst_pyr_laplace_[0] = dst_;
275
276         dst_band_weights_.resize(num_bands_ + 1);
277         dst_band_weights_[0].create(dst_roi.size(), weight_type_);
278         dst_band_weights_[0].setTo(0);
279
280         for (int i = 1; i <= num_bands_; ++i)
281         {
282             dst_pyr_laplace_[i].create((dst_pyr_laplace_[i - 1].rows + 1) / 2,
283                 (dst_pyr_laplace_[i - 1].cols + 1) / 2, CV_16SC3);
284             dst_band_weights_[i].create((dst_band_weights_[i - 1].rows + 1) / 2,
285                 (dst_band_weights_[i - 1].cols + 1) / 2, weight_type_);
286             dst_pyr_laplace_[i].setTo(Scalar::all(0));
287             dst_band_weights_[i].setTo(0);
288         }
289     }
290 }
291
292 #ifdef HAVE_OPENCL
293 static bool ocl_MultiBandBlender_feed(InputArray _src, InputArray _weight,
294         InputOutputArray _dst, InputOutputArray _dst_weight)
295 {
296     String buildOptions = "-D DEFINE_feed";
297     ocl::buildOptionsAddMatrixDescription(buildOptions, "src", _src);
298     ocl::buildOptionsAddMatrixDescription(buildOptions, "weight", _weight);
299     ocl::buildOptionsAddMatrixDescription(buildOptions, "dst", _dst);
300     ocl::buildOptionsAddMatrixDescription(buildOptions, "dstWeight", _dst_weight);
301     ocl::Kernel k("feed", ocl::stitching::multibandblend_oclsrc, buildOptions);
302     if (k.empty())
303         return false;
304
305     UMat src = _src.getUMat();
306
307     k.args(ocl::KernelArg::ReadOnly(src),
308            ocl::KernelArg::ReadOnly(_weight.getUMat()),
309            ocl::KernelArg::ReadWrite(_dst.getUMat()),
310            ocl::KernelArg::ReadWrite(_dst_weight.getUMat())
311            );
312
313     size_t globalsize[2] = {(size_t)src.cols, (size_t)src.rows };
314     return k.run(2, globalsize, NULL, false);
315 }
316 #endif
317
318 void MultiBandBlender::feed(InputArray _img, InputArray mask, Point tl)
319 {
320 #if ENABLE_LOG
321     int64 t = getTickCount();
322 #endif
323
324     UMat img = _img.getUMat();
325     CV_Assert(img.type() == CV_16SC3 || img.type() == CV_8UC3);
326     CV_Assert(mask.type() == CV_8U);
327
328     // Keep source image in memory with small border
329     int gap = 3 * (1 << num_bands_);
330     Point tl_new(std::max(dst_roi_.x, tl.x - gap),
331                  std::max(dst_roi_.y, tl.y - gap));
332     Point br_new(std::min(dst_roi_.br().x, tl.x + img.cols + gap),
333                  std::min(dst_roi_.br().y, tl.y + img.rows + gap));
334
335     // Ensure coordinates of top-left, bottom-right corners are divided by (1 << num_bands_).
336     // After that scale between layers is exactly 2.
337     //
338     // We do it to avoid interpolation problems when keeping sub-images only. There is no such problem when
339     // image is bordered to have size equal to the final image size, but this is too memory hungry approach.
340     tl_new.x = dst_roi_.x + (((tl_new.x - dst_roi_.x) >> num_bands_) << num_bands_);
341     tl_new.y = dst_roi_.y + (((tl_new.y - dst_roi_.y) >> num_bands_) << num_bands_);
342     int width = br_new.x - tl_new.x;
343     int height = br_new.y - tl_new.y;
344     width += ((1 << num_bands_) - width % (1 << num_bands_)) % (1 << num_bands_);
345     height += ((1 << num_bands_) - height % (1 << num_bands_)) % (1 << num_bands_);
346     br_new.x = tl_new.x + width;
347     br_new.y = tl_new.y + height;
348     int dy = std::max(br_new.y - dst_roi_.br().y, 0);
349     int dx = std::max(br_new.x - dst_roi_.br().x, 0);
350     tl_new.x -= dx; br_new.x -= dx;
351     tl_new.y -= dy; br_new.y -= dy;
352
353     int top = tl.y - tl_new.y;
354     int left = tl.x - tl_new.x;
355     int bottom = br_new.y - tl.y - img.rows;
356     int right = br_new.x - tl.x - img.cols;
357
358 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
359     if (can_use_gpu_)
360     {
361         // Create the source image Laplacian pyramid
362         cuda::GpuMat gpu_img;
363         gpu_img.upload(img);
364         cuda::GpuMat img_with_border;
365         cuda::copyMakeBorder(gpu_img, img_with_border, top, bottom, left, right, BORDER_REFLECT);
366         std::vector<cuda::GpuMat> gpu_src_pyr_laplace(num_bands_ + 1);
367         img_with_border.convertTo(gpu_src_pyr_laplace[0], CV_16S);
368         for (int i = 0; i < num_bands_; ++i)
369             cuda::pyrDown(gpu_src_pyr_laplace[i], gpu_src_pyr_laplace[i + 1]);
370         for (int i = 0; i < num_bands_; ++i)
371         {
372             cuda::GpuMat up;
373             cuda::pyrUp(gpu_src_pyr_laplace[i + 1], up);
374             cuda::subtract(gpu_src_pyr_laplace[i], up, gpu_src_pyr_laplace[i]);
375         }
376
377         // Create the weight map Gaussian pyramid
378         cuda::GpuMat gpu_mask;
379         gpu_mask.upload(mask);
380         cuda::GpuMat weight_map;
381         std::vector<cuda::GpuMat> gpu_weight_pyr_gauss(num_bands_ + 1);
382
383         if (weight_type_ == CV_32F)
384         {
385             gpu_mask.convertTo(weight_map, CV_32F, 1. / 255.);
386         }
387         else // weight_type_ == CV_16S
388         {
389             gpu_mask.convertTo(weight_map, CV_16S);
390             cuda::GpuMat add_mask;
391             cuda::compare(gpu_mask, 0, add_mask, CMP_NE);
392             cuda::add(weight_map, Scalar::all(1), weight_map, add_mask);
393         }
394         cuda::copyMakeBorder(weight_map, gpu_weight_pyr_gauss[0], top, bottom, left, right, BORDER_CONSTANT);
395         for (int i = 0; i < num_bands_; ++i)
396             cuda::pyrDown(gpu_weight_pyr_gauss[i], gpu_weight_pyr_gauss[i + 1]);
397
398         int y_tl = tl_new.y - dst_roi_.y;
399         int y_br = br_new.y - dst_roi_.y;
400         int x_tl = tl_new.x - dst_roi_.x;
401         int x_br = br_new.x - dst_roi_.x;
402
403         // Add weighted layer of the source image to the final Laplacian pyramid layer
404         for (int i = 0; i <= num_bands_; ++i)
405         {
406             Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl);
407             cuda::GpuMat &_src_pyr_laplace = gpu_src_pyr_laplace[i];
408             cuda::GpuMat _dst_pyr_laplace = gpu_dst_pyr_laplace_[i](rc);
409             cuda::GpuMat &_weight_pyr_gauss = gpu_weight_pyr_gauss[i];
410             cuda::GpuMat _dst_band_weights = gpu_dst_band_weights_[i](rc);
411
412             using namespace cv::cuda::device::blend;
413             if (weight_type_ == CV_32F)
414             {
415                 addSrcWeightGpu32F(_src_pyr_laplace, _weight_pyr_gauss, _dst_pyr_laplace, _dst_band_weights, rc);
416             }
417             else
418             {
419                 addSrcWeightGpu16S(_src_pyr_laplace, _weight_pyr_gauss, _dst_pyr_laplace, _dst_band_weights, rc);
420             }
421             x_tl /= 2; y_tl /= 2;
422             x_br /= 2; y_br /= 2;
423         }
424         return;
425     }
426 #endif
427
428     // Create the source image Laplacian pyramid
429     UMat img_with_border;
430     copyMakeBorder(_img, img_with_border, top, bottom, left, right,
431                    BORDER_REFLECT);
432     LOGLN("  Add border to the source image, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
433 #if ENABLE_LOG
434     t = getTickCount();
435 #endif
436
437     std::vector<UMat> src_pyr_laplace;
438     createLaplacePyr(img_with_border, num_bands_, src_pyr_laplace);
439
440     LOGLN("  Create the source image Laplacian pyramid, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
441 #if ENABLE_LOG
442     t = getTickCount();
443 #endif
444
445     // Create the weight map Gaussian pyramid
446     UMat weight_map;
447     std::vector<UMat> weight_pyr_gauss(num_bands_ + 1);
448
449     if(weight_type_ == CV_32F)
450     {
451         mask.getUMat().convertTo(weight_map, CV_32F, 1./255.);
452     }
453     else // weight_type_ == CV_16S
454     {
455         mask.getUMat().convertTo(weight_map, CV_16S);
456         UMat add_mask;
457         compare(mask, 0, add_mask, CMP_NE);
458         add(weight_map, Scalar::all(1), weight_map, add_mask);
459     }
460
461     copyMakeBorder(weight_map, weight_pyr_gauss[0], top, bottom, left, right, BORDER_CONSTANT);
462
463     for (int i = 0; i < num_bands_; ++i)
464         pyrDown(weight_pyr_gauss[i], weight_pyr_gauss[i + 1]);
465
466     LOGLN("  Create the weight map Gaussian pyramid, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
467 #if ENABLE_LOG
468     t = getTickCount();
469 #endif
470
471     int y_tl = tl_new.y - dst_roi_.y;
472     int y_br = br_new.y - dst_roi_.y;
473     int x_tl = tl_new.x - dst_roi_.x;
474     int x_br = br_new.x - dst_roi_.x;
475
476     // Add weighted layer of the source image to the final Laplacian pyramid layer
477     for (int i = 0; i <= num_bands_; ++i)
478     {
479         Rect rc(x_tl, y_tl, x_br - x_tl, y_br - y_tl);
480 #ifdef HAVE_OPENCL
481         if ( !cv::ocl::isOpenCLActivated() ||
482              !ocl_MultiBandBlender_feed(src_pyr_laplace[i], weight_pyr_gauss[i],
483                     dst_pyr_laplace_[i](rc), dst_band_weights_[i](rc)) )
484 #endif
485         {
486             Mat _src_pyr_laplace = src_pyr_laplace[i].getMat(ACCESS_READ);
487             Mat _dst_pyr_laplace = dst_pyr_laplace_[i](rc).getMat(ACCESS_RW);
488             Mat _weight_pyr_gauss = weight_pyr_gauss[i].getMat(ACCESS_READ);
489             Mat _dst_band_weights = dst_band_weights_[i](rc).getMat(ACCESS_RW);
490             if(weight_type_ == CV_32F)
491             {
492                 for (int y = 0; y < rc.height; ++y)
493                 {
494                     const Point3_<short>* src_row = _src_pyr_laplace.ptr<Point3_<short> >(y);
495                     Point3_<short>* dst_row = _dst_pyr_laplace.ptr<Point3_<short> >(y);
496                     const float* weight_row = _weight_pyr_gauss.ptr<float>(y);
497                     float* dst_weight_row = _dst_band_weights.ptr<float>(y);
498
499                     for (int x = 0; x < rc.width; ++x)
500                     {
501                         dst_row[x].x += static_cast<short>(src_row[x].x * weight_row[x]);
502                         dst_row[x].y += static_cast<short>(src_row[x].y * weight_row[x]);
503                         dst_row[x].z += static_cast<short>(src_row[x].z * weight_row[x]);
504                         dst_weight_row[x] += weight_row[x];
505                     }
506                 }
507             }
508             else // weight_type_ == CV_16S
509             {
510                 for (int y = 0; y < y_br - y_tl; ++y)
511                 {
512                     const Point3_<short>* src_row = _src_pyr_laplace.ptr<Point3_<short> >(y);
513                     Point3_<short>* dst_row = _dst_pyr_laplace.ptr<Point3_<short> >(y);
514                     const short* weight_row = _weight_pyr_gauss.ptr<short>(y);
515                     short* dst_weight_row = _dst_band_weights.ptr<short>(y);
516
517                     for (int x = 0; x < x_br - x_tl; ++x)
518                     {
519                         dst_row[x].x += short((src_row[x].x * weight_row[x]) >> 8);
520                         dst_row[x].y += short((src_row[x].y * weight_row[x]) >> 8);
521                         dst_row[x].z += short((src_row[x].z * weight_row[x]) >> 8);
522                         dst_weight_row[x] += weight_row[x];
523                     }
524                 }
525             }
526         }
527 #ifdef HAVE_OPENCL
528         else
529         {
530             CV_IMPL_ADD(CV_IMPL_OCL);
531         }
532 #endif
533
534         x_tl /= 2; y_tl /= 2;
535         x_br /= 2; y_br /= 2;
536     }
537
538     LOGLN("  Add weighted layer of the source image to the final Laplacian pyramid layer, time: " << ((getTickCount() - t) / getTickFrequency()) << " sec");
539 }
540
541
542 void MultiBandBlender::blend(InputOutputArray dst, InputOutputArray dst_mask)
543 {
544     cv::UMat dst_band_weights_0;
545     Rect dst_rc(0, 0, dst_roi_final_.width, dst_roi_final_.height);
546 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
547     if (can_use_gpu_)
548     {
549         for (int i = 0; i <= num_bands_; ++i)
550         {
551             cuda::GpuMat dst_i = gpu_dst_pyr_laplace_[i];
552             cuda::GpuMat weight_i = gpu_dst_band_weights_[i];
553
554             using namespace ::cv::cuda::device::blend;
555             if (weight_type_ == CV_32F)
556             {
557                 normalizeUsingWeightMapGpu32F(weight_i, dst_i, weight_i.cols, weight_i.rows);
558             }
559             else
560             {
561                 normalizeUsingWeightMapGpu16S(weight_i, dst_i, weight_i.cols, weight_i.rows);
562             }
563         }
564
565         // Restore image from Laplacian pyramid
566         for (size_t i = num_bands_; i > 0; --i)
567         {
568             cuda::GpuMat up;
569             cuda::pyrUp(gpu_dst_pyr_laplace_[i], up);
570             cuda::add(up, gpu_dst_pyr_laplace_[i - 1], gpu_dst_pyr_laplace_[i - 1]);
571         }
572
573         gpu_dst_pyr_laplace_[0](dst_rc).download(dst_);
574         gpu_dst_band_weights_[0].download(dst_band_weights_0);
575
576         gpu_dst_pyr_laplace_.clear();
577         gpu_dst_band_weights_.clear();
578     }
579     else
580 #endif
581     {
582         for (int i = 0; i <= num_bands_; ++i)
583             normalizeUsingWeightMap(dst_band_weights_[i], dst_pyr_laplace_[i]);
584
585         restoreImageFromLaplacePyr(dst_pyr_laplace_);
586
587         dst_ = dst_pyr_laplace_[0](dst_rc);
588         dst_band_weights_0 = dst_band_weights_[0];
589
590         dst_pyr_laplace_.clear();
591         dst_band_weights_.clear();
592     }
593
594     compare(dst_band_weights_0(dst_rc), WEIGHT_EPS, dst_mask_, CMP_GT);
595
596     Blender::blend(dst, dst_mask);
597 }
598
599
600 //////////////////////////////////////////////////////////////////////////////
601 // Auxiliary functions
602
603 #ifdef HAVE_OPENCL
604 static bool ocl_normalizeUsingWeightMap(InputArray _weight, InputOutputArray _mat)
605 {
606     String buildOptions = "-D DEFINE_normalizeUsingWeightMap";
607     ocl::buildOptionsAddMatrixDescription(buildOptions, "mat", _mat);
608     ocl::buildOptionsAddMatrixDescription(buildOptions, "weight", _weight);
609     ocl::Kernel k("normalizeUsingWeightMap", ocl::stitching::multibandblend_oclsrc, buildOptions);
610     if (k.empty())
611         return false;
612
613     UMat mat = _mat.getUMat();
614
615     k.args(ocl::KernelArg::ReadWrite(mat),
616            ocl::KernelArg::ReadOnly(_weight.getUMat())
617            );
618
619     size_t globalsize[2] = {(size_t)mat.cols, (size_t)mat.rows };
620     return k.run(2, globalsize, NULL, false);
621 }
622 #endif
623
624 void normalizeUsingWeightMap(InputArray _weight, InputOutputArray _src)
625 {
626     Mat src;
627     Mat weight;
628
629 #ifdef HAVE_OPENCL
630     if ( !cv::ocl::isOpenCLActivated() ||
631             !ocl_normalizeUsingWeightMap(_weight, _src) )
632 #endif
633     {
634         src = _src.getMat();
635         weight = _weight.getMat();
636
637         CV_Assert(src.type() == CV_16SC3);
638
639         if (weight.type() == CV_32FC1)
640         {
641             for (int y = 0; y < src.rows; ++y)
642             {
643                 Point3_<short> *row = src.ptr<Point3_<short> >(y);
644                 const float *weight_row = weight.ptr<float>(y);
645
646                 for (int x = 0; x < src.cols; ++x)
647                 {
648                     row[x].x = static_cast<short>(row[x].x / (weight_row[x] + WEIGHT_EPS));
649                     row[x].y = static_cast<short>(row[x].y / (weight_row[x] + WEIGHT_EPS));
650                     row[x].z = static_cast<short>(row[x].z / (weight_row[x] + WEIGHT_EPS));
651                 }
652             }
653         }
654         else
655         {
656             CV_Assert(weight.type() == CV_16SC1);
657
658             for (int y = 0; y < src.rows; ++y)
659             {
660                 const short *weight_row = weight.ptr<short>(y);
661                 Point3_<short> *row = src.ptr<Point3_<short> >(y);
662
663                 for (int x = 0; x < src.cols; ++x)
664                 {
665                     int w = weight_row[x] + 1;
666                     row[x].x = static_cast<short>((row[x].x << 8) / w);
667                     row[x].y = static_cast<short>((row[x].y << 8) / w);
668                     row[x].z = static_cast<short>((row[x].z << 8) / w);
669                 }
670             }
671         }
672     }
673 #ifdef HAVE_OPENCL
674     else
675     {
676         CV_IMPL_ADD(CV_IMPL_OCL);
677     }
678 #endif
679 }
680
681
682 void createWeightMap(InputArray mask, float sharpness, InputOutputArray weight)
683 {
684     CV_Assert(mask.type() == CV_8U);
685     distanceTransform(mask, weight, DIST_L1, 3);
686     UMat tmp;
687     multiply(weight, sharpness, tmp);
688     threshold(tmp, weight, 1.f, 1.f, THRESH_TRUNC);
689 }
690
691
692 void createLaplacePyr(InputArray img, int num_levels, std::vector<UMat> &pyr)
693 {
694     pyr.resize(num_levels + 1);
695
696     if(img.depth() == CV_8U)
697     {
698         if(num_levels == 0)
699         {
700             img.getUMat().convertTo(pyr[0], CV_16S);
701             return;
702         }
703
704         UMat downNext;
705         UMat current = img.getUMat();
706         pyrDown(img, downNext);
707
708         for(int i = 1; i < num_levels; ++i)
709         {
710             UMat lvl_up;
711             UMat lvl_down;
712
713             pyrDown(downNext, lvl_down);
714             pyrUp(downNext, lvl_up, current.size());
715             subtract(current, lvl_up, pyr[i-1], noArray(), CV_16S);
716
717             current = downNext;
718             downNext = lvl_down;
719         }
720
721         {
722             UMat lvl_up;
723             pyrUp(downNext, lvl_up, current.size());
724             subtract(current, lvl_up, pyr[num_levels-1], noArray(), CV_16S);
725
726             downNext.convertTo(pyr[num_levels], CV_16S);
727         }
728     }
729     else
730     {
731         pyr[0] = img.getUMat();
732         for (int i = 0; i < num_levels; ++i)
733             pyrDown(pyr[i], pyr[i + 1]);
734         UMat tmp;
735         for (int i = 0; i < num_levels; ++i)
736         {
737             pyrUp(pyr[i + 1], tmp, pyr[i].size());
738             subtract(pyr[i], tmp, pyr[i]);
739         }
740     }
741 }
742
743
744 void createLaplacePyrGpu(InputArray img, int num_levels, std::vector<UMat> &pyr)
745 {
746 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
747     pyr.resize(num_levels + 1);
748
749     std::vector<cuda::GpuMat> gpu_pyr(num_levels + 1);
750     gpu_pyr[0].upload(img);
751     for (int i = 0; i < num_levels; ++i)
752         cuda::pyrDown(gpu_pyr[i], gpu_pyr[i + 1]);
753
754     cuda::GpuMat tmp;
755     for (int i = 0; i < num_levels; ++i)
756     {
757         cuda::pyrUp(gpu_pyr[i + 1], tmp);
758         cuda::subtract(gpu_pyr[i], tmp, gpu_pyr[i]);
759         gpu_pyr[i].download(pyr[i]);
760     }
761
762     gpu_pyr[num_levels].download(pyr[num_levels]);
763 #else
764     (void)img;
765     (void)num_levels;
766     (void)pyr;
767     CV_Error(Error::StsNotImplemented, "CUDA optimization is unavailable");
768 #endif
769 }
770
771
772 void restoreImageFromLaplacePyr(std::vector<UMat> &pyr)
773 {
774     if (pyr.empty())
775         return;
776     UMat tmp;
777     for (size_t i = pyr.size() - 1; i > 0; --i)
778     {
779         pyrUp(pyr[i], tmp, pyr[i - 1].size());
780         add(tmp, pyr[i - 1], pyr[i - 1]);
781     }
782 }
783
784
785 void restoreImageFromLaplacePyrGpu(std::vector<UMat> &pyr)
786 {
787 #if defined(HAVE_OPENCV_CUDAARITHM) && defined(HAVE_OPENCV_CUDAWARPING)
788     if (pyr.empty())
789         return;
790
791     std::vector<cuda::GpuMat> gpu_pyr(pyr.size());
792     for (size_t i = 0; i < pyr.size(); ++i)
793         gpu_pyr[i].upload(pyr[i]);
794
795     cuda::GpuMat tmp;
796     for (size_t i = pyr.size() - 1; i > 0; --i)
797     {
798         cuda::pyrUp(gpu_pyr[i], tmp);
799         cuda::add(tmp, gpu_pyr[i - 1], gpu_pyr[i - 1]);
800     }
801
802     gpu_pyr[0].download(pyr[0]);
803 #else
804     (void)pyr;
805     CV_Error(Error::StsNotImplemented, "CUDA optimization is unavailable");
806 #endif
807 }
808
809 } // namespace detail
810 } // namespace cv