Fixed support of translation in the GPU-based plane warper

[profile/ivi/opencv.git] / modules / gpu / src / imgproc.cpp
diff --git a/modules/gpu/src/imgproc.cpp b/modules/gpu/src/imgproc.cpp

index 20782c6..8b86ce6 100644 (file)
--- a/modules/gpu/src/imgproc.cpp
+++ b/modules/gpu/src/imgproc.cpp
@@ -47,18 +47,18 @@ using namespace cv::gpu;
  \r
  #if !defined (HAVE_CUDA)\r
  \r
-void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&, int, int, const Scalar&){ throw_nogpu(); }\r
+void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&, int, int, const Scalar&, Stream&){ throw_nogpu(); }\r
  void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }\r
  void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }\r
  void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }\r
  void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, Stream&) { throw_nogpu(); }\r
  void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&) { throw_nogpu(); }\r
-void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, const Scalar&, Stream&) { throw_nogpu(); }\r
+void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, int, const Scalar&, Stream&) { throw_nogpu(); }\r
  void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }\r
  void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int, Stream&) { throw_nogpu(); }\r
-void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, double, double, double, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
-void cv::gpu::buildWarpCylindricalMaps(Size, Rect, const Mat&, double, double, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
-void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, double, double, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
+void cv::gpu::buildWarpPlaneMaps(Size, Rect, const Mat&, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
+void cv::gpu::buildWarpCylindricalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
+void cv::gpu::buildWarpSphericalMaps(Size, Rect, const Mat&, const Mat&, float, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int, Stream&) { throw_nogpu(); }\r
  void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
@@ -90,8 +90,6 @@ void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int) { throw_nogpu(); }
  void cv::gpu::ConvolveBuf::create(Size, Size) { throw_nogpu(); }\r
  void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogpu(); }\r
  void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&) { throw_nogpu(); }\r
-void cv::gpu::downsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
-void cv::gpu::upsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }\r
  void cv::gpu::pyrDown(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }\r
  void cv::gpu::pyrUp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }\r
  void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_nogpu(); }\r
@@ -110,36 +108,42 @@ void cv::gpu::CannyBuf::release() { throw_nogpu(); }
  namespace cv { namespace gpu {  namespace imgproc\r
  {\r
      template <typename T> void remap_gpu(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, \r
-                                         int interpolation, int borderMode, const double borderValue[4]);\r
+                                         int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
  }}}\r
  \r
-void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, const Scalar& borderValue)\r
+void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, const Scalar& borderValue, Stream& stream)\r
  {\r
      using namespace cv::gpu::imgproc;\r
  \r
-    typedef void (*caller_t)(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);;\r
+    typedef void (*caller_t)(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream);\r
      static const caller_t callers[6][4] = \r
      {\r
-        {remap_gpu<uchar>, remap_gpu<uchar2>, remap_gpu<uchar3>, remap_gpu<uchar4>},\r
-        {remap_gpu<schar>, remap_gpu<char2>, remap_gpu<char3>, remap_gpu<char4>},\r
-        {remap_gpu<ushort>, remap_gpu<ushort2>, remap_gpu<ushort3>, remap_gpu<ushort4>},\r
-        {remap_gpu<short>, remap_gpu<short2>, remap_gpu<short3>, remap_gpu<short4>},\r
-        {remap_gpu<int>, remap_gpu<int2>, remap_gpu<int3>, remap_gpu<int4>},\r
-        {remap_gpu<float>, remap_gpu<float2>, remap_gpu<float3>, remap_gpu<float4>}\r
+        {remap_gpu<uchar>, 0/*remap_gpu<uchar2>*/, remap_gpu<uchar3>, remap_gpu<uchar4>},\r
+        {0/*remap_gpu<schar>*/, 0/*remap_gpu<char2>*/, 0/*remap_gpu<char3>*/, 0/*remap_gpu<char4>*/},\r
+        {remap_gpu<ushort>, 0/*remap_gpu<ushort2>*/, remap_gpu<ushort3>, remap_gpu<ushort4>},\r
+        {remap_gpu<short>, 0/*remap_gpu<short2>*/, remap_gpu<short3>, remap_gpu<short4>},\r
+        {0/*remap_gpu<int>*/, 0/*remap_gpu<int2>*/, 0/*remap_gpu<int3>*/, 0/*remap_gpu<int4>*/},\r
+        {remap_gpu<float>, 0/*remap_gpu<float2>*/, remap_gpu<float3>, remap_gpu<float4>}\r
      };\r
  \r
      CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
      CV_Assert(xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size());\r
  \r
-    CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR);\r
+    caller_t func = callers[src.depth()][src.channels() - 1];\r
+    CV_Assert(func != 0);\r
  \r
-    CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT);\r
+    CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);\r
+\r
+    CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP);\r
      int gpuBorderType;\r
      CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));\r
  \r
      dst.create(xmap.size(), src.type());\r
+    \r
+    Scalar_<float> borderValueFloat;\r
+    borderValueFloat = borderValue;\r
  \r
-    callers[src.depth()][src.channels() - 1](src, xmap, ymap, dst, interpolation, gpuBorderType, borderValue.val);\r
+    func(src, xmap, ymap, dst, interpolation, gpuBorderType, borderValueFloat.val, StreamAccessor::getStream(stream));\r
  }\r
  \r
  ////////////////////////////////////////////////////////////////////////\r
@@ -269,14 +273,15 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q,
  ////////////////////////////////////////////////////////////////////////\r
  // resize\r
  \r
-void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)\r
+namespace cv { namespace gpu {  namespace imgproc\r
  {\r
-    static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR/*, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS*/};\r
-\r
-    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);\r
-    CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR/* || interpolation == INTER_CUBIC || interpolation == INTER_LANCZOS4*/);\r
+    template <typename T> void resize_gpu(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+}}}\r
  \r
-    CV_Assert( src.size().area() > 0 );\r
+void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)\r
+{\r
+    CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );\r
+    CV_Assert( interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC );\r
      CV_Assert( !(dsize == Size()) || (fx > 0 && fy > 0) );\r
  \r
      if( dsize == Size() )\r
@@ -291,93 +296,163 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
  \r
      dst.create(dsize, src.type());\r
  \r
-    NppiSize srcsz;\r
-    srcsz.width  = src.cols;\r
-    srcsz.height = src.rows;\r
-    NppiRect srcrect;\r
-    srcrect.x = srcrect.y = 0;\r
-    srcrect.width  = src.cols;\r
-    srcrect.height = src.rows;\r
-    NppiSize dstsz;\r
-    dstsz.width  = dst.cols;\r
-    dstsz.height = dst.rows;\r
+    if (dsize == src.size())\r
+    {\r
+        if (s)\r
+            s.enqueueCopy(src, dst);\r
+        else\r
+            src.copyTo(dst);\r
+        return;\r
+    }\r
  \r
      cudaStream_t stream = StreamAccessor::getStream(s);\r
  \r
-    NppStreamHandler h(stream);\r
-\r
-    if (src.type() == CV_8UC1)\r
+    if ((src.type() == CV_8UC1 || src.type() == CV_8UC4) && (interpolation == INTER_NEAREST || interpolation == INTER_LINEAR))\r
      {\r
-        nppSafeCall( nppiResize_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
-            dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
+        static const int npp_inter[] = {NPPI_INTER_NN, NPPI_INTER_LINEAR, NPPI_INTER_CUBIC, 0, NPPI_INTER_LANCZOS};\r
+\r
+        NppiSize srcsz;\r
+        srcsz.width  = src.cols;\r
+        srcsz.height = src.rows;\r
+        NppiRect srcrect;\r
+        srcrect.x = srcrect.y = 0;\r
+        srcrect.width  = src.cols;\r
+        srcrect.height = src.rows;\r
+        NppiSize dstsz;\r
+        dstsz.width  = dst.cols;\r
+        dstsz.height = dst.rows;\r
+\r
+        NppStreamHandler h(stream);\r
+\r
+        if (src.type() == CV_8UC1)\r
+        {\r
+            nppSafeCall( nppiResize_8u_C1R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
+                dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
+        }\r
+        else\r
+        {\r
+            nppSafeCall( nppiResize_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
+                dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
+        }\r
+\r
+        if (stream == 0)\r
+            cudaSafeCall( cudaDeviceSynchronize() );\r
      }\r
      else\r
      {\r
-        nppSafeCall( nppiResize_8u_C4R(src.ptr<Npp8u>(), srcsz, static_cast<int>(src.step), srcrect,\r
-            dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, fx, fy, npp_inter[interpolation]) );\r
-    }\r
+        using namespace cv::gpu::imgproc;\r
  \r
-    if (stream == 0)\r
-        cudaSafeCall( cudaDeviceSynchronize() );\r
+        typedef void (*caller_t)(const DevMem2D& src, float fx, float fy, const DevMem2D& dst, int interpolation, cudaStream_t stream);\r
+        static const caller_t callers[6][4] = \r
+        {\r
+            {resize_gpu<uchar>, 0/*resize_gpu<uchar2>*/, resize_gpu<uchar3>, resize_gpu<uchar4>},\r
+            {0/*resize_gpu<schar>*/, 0/*resize_gpu<char2>*/, 0/*resize_gpu<char3>*/, 0/*resize_gpu<char4>*/},\r
+            {resize_gpu<ushort>, 0/*resize_gpu<ushort2>*/, resize_gpu<ushort3>, resize_gpu<ushort4>},\r
+            {resize_gpu<short>, 0/*resize_gpu<short2>*/, resize_gpu<short3>, resize_gpu<short4>},\r
+            {0/*resize_gpu<int>*/, 0/*resize_gpu<int2>*/, 0/*resize_gpu<int3>*/, 0/*resize_gpu<int4>*/},\r
+            {resize_gpu<float>, 0/*resize_gpu<float2>*/, resize_gpu<float3>, resize_gpu<float4>}\r
+        };\r
+\r
+        callers[src.depth()][src.channels() - 1](src, static_cast<float>(fx), static_cast<float>(fy), dst, interpolation, stream);\r
+    }\r
  }\r
  \r
  ////////////////////////////////////////////////////////////////////////\r
  // copyMakeBorder\r
  \r
-void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, const Scalar& value, Stream& s)\r
+namespace cv { namespace gpu {  namespace imgproc\r
  {\r
-    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1);\r
+    template <typename T, int cn> void copyMakeBorder_gpu(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderMode, const T* borderValue, cudaStream_t stream);\r
+}}}\r
  \r
-    dst.create(src.rows + top + bottom, src.cols + left + right, src.type());\r
+namespace\r
+{\r
+    template <typename T, int cn> void copyMakeBorder_caller(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream)\r
+    {\r
+        Scalar_<T> val(saturate_cast<T>(value[0]), saturate_cast<T>(value[1]), saturate_cast<T>(value[2]), saturate_cast<T>(value[3]));\r
  \r
-    NppiSize srcsz;\r
-    srcsz.width  = src.cols;\r
-    srcsz.height = src.rows;\r
-    NppiSize dstsz;\r
-    dstsz.width  = dst.cols;\r
-    dstsz.height = dst.rows;\r
+        imgproc::copyMakeBorder_gpu<T, cn>(src, dst, top, left, borderType, val.val, stream);\r
+    }\r
+}\r
  \r
-    cudaStream_t stream = StreamAccessor::getStream(s);\r
+void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value, Stream& s)\r
+{\r
+    CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
+    CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);\r
  \r
-    NppStreamHandler h(stream);\r
+    dst.create(src.rows + top + bottom, src.cols + left + right, src.type());\r
  \r
-    switch (src.type())\r
+    cudaStream_t stream = StreamAccessor::getStream(s);\r
+\r
+    if (borderType == BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1))\r
      {\r
-    case CV_8UC1:\r
-        {\r
-            Npp8u nVal = static_cast<Npp8u>(value[0]);\r
-            nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
-                dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
-            break;\r
-        }\r
-    case CV_8UC4:\r
-        {\r
-            Npp8u nVal[] = {static_cast<Npp8u>(value[0]), static_cast<Npp8u>(value[1]), static_cast<Npp8u>(value[2]), static_cast<Npp8u>(value[3])};\r
-            nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
-                dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
-            break;\r
-        }\r
-    case CV_32SC1:\r
-        {\r
-            Npp32s nVal = static_cast<Npp32s>(value[0]);\r
-            nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
-                dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
-            break;\r
-        }\r
-    case CV_32FC1:\r
+        NppiSize srcsz;\r
+        srcsz.width  = src.cols;\r
+        srcsz.height = src.rows;\r
+\r
+        NppiSize dstsz;\r
+        dstsz.width  = dst.cols;\r
+        dstsz.height = dst.rows;\r
+\r
+        NppStreamHandler h(stream);\r
+\r
+        switch (src.type())\r
          {\r
-            Npp32f val = static_cast<Npp32f>(value[0]);\r
-            Npp32s nVal = *(reinterpret_cast<Npp32s*>(&val));\r
-            nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
-                dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
-            break;\r
+        case CV_8UC1:\r
+            {\r
+                Npp8u nVal = saturate_cast<Npp8u>(value[0]);\r
+                nppSafeCall( nppiCopyConstBorder_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
+                    dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
+                break;\r
+            }\r
+        case CV_8UC4:\r
+            {\r
+                Npp8u nVal[] = {saturate_cast<Npp8u>(value[0]), saturate_cast<Npp8u>(value[1]), saturate_cast<Npp8u>(value[2]), saturate_cast<Npp8u>(value[3])};\r
+                nppSafeCall( nppiCopyConstBorder_8u_C4R(src.ptr<Npp8u>(), static_cast<int>(src.step), srcsz,\r
+                    dst.ptr<Npp8u>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
+                break;\r
+            }\r
+        case CV_32SC1:\r
+            {\r
+                Npp32s nVal = saturate_cast<Npp32s>(value[0]);\r
+                nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
+                    dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
+                break;\r
+            }\r
+        case CV_32FC1:\r
+            {\r
+                Npp32f val = saturate_cast<Npp32f>(value[0]);\r
+                Npp32s nVal = *(reinterpret_cast<Npp32s*>(&val));\r
+                nppSafeCall( nppiCopyConstBorder_32s_C1R(src.ptr<Npp32s>(), static_cast<int>(src.step), srcsz,\r
+                    dst.ptr<Npp32s>(), static_cast<int>(dst.step), dstsz, top, left, nVal) );\r
+                break;\r
+            }\r
          }\r
-    default:\r
-        CV_Assert(!"Unsupported source type");\r
+\r
+        if (stream == 0)\r
+            cudaSafeCall( cudaDeviceSynchronize() );\r
      }\r
+    else\r
+    {\r
+        typedef void (*caller_t)(const DevMem2D& src, const DevMem2D& dst, int top, int left, int borderType, const Scalar& value, cudaStream_t stream);\r
+        static const caller_t callers[6][4] = \r
+        {\r
+            {   copyMakeBorder_caller<uchar, 1>  , 0/*copyMakeBorder_caller<uchar, 2>*/ ,    copyMakeBorder_caller<uchar, 3>  ,    copyMakeBorder_caller<uchar, 4>},\r
+            {0/*copyMakeBorder_caller<schar, 1>*/, 0/*copyMakeBorder_caller<schar, 2>*/ , 0/*copyMakeBorder_caller<schar, 3>*/, 0/*copyMakeBorder_caller<schar, 4>*/},\r
+            {   copyMakeBorder_caller<ushort, 1> , 0/*copyMakeBorder_caller<ushort, 2>*/,    copyMakeBorder_caller<ushort, 3> ,    copyMakeBorder_caller<ushort, 4>},\r
+            {   copyMakeBorder_caller<short, 1>  , 0/*copyMakeBorder_caller<short, 2>*/ ,    copyMakeBorder_caller<short, 3>  ,    copyMakeBorder_caller<short, 4>},\r
+            {0/*copyMakeBorder_caller<int, 1>*/  , 0/*copyMakeBorder_caller<int, 2>*/   , 0/*copyMakeBorder_caller<int, 3>*/  , 0/*copyMakeBorder_caller<int, 4>*/},\r
+            {   copyMakeBorder_caller<float, 1>  , 0/*copyMakeBorder_caller<float, 2>*/ ,    copyMakeBorder_caller<float, 3>  ,    copyMakeBorder_caller<float ,4>}\r
+        };\r
  \r
-    if (stream == 0)\r
-        cudaSafeCall( cudaDeviceSynchronize() );\r
+        caller_t func = callers[src.depth()][src.channels() - 1];\r
+        CV_Assert(func != 0);\r
+\r
+        int gpuBorderType;\r
+        CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
+\r
+        func(src, dst, top, left, gpuBorderType, value, stream);\r
+    }\r
  }\r
  \r
  ////////////////////////////////////////////////////////////////////////\r
@@ -548,22 +623,26 @@ void cv::gpu::warpPerspective(const GpuMat& src, GpuMat& dst, const Mat& M, Size
  namespace cv { namespace gpu { namespace imgproc\r
  {\r
      void buildWarpPlaneMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
-                            const float r[9], const float rinv[9], float f, float s, float dist,\r
-                            float half_w, float half_h, cudaStream_t stream);\r
+                            const float k_rinv[9], const float r_kinv[9], const float t[3], float scale,\r
+                            cudaStream_t stream);\r
  }}}\r
  \r
-void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s,\r
-                                 double dist, GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
+void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, const Mat &T, \r
+                                 float scale, GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
  {\r
-    CV_Assert(R.size() == Size(3,3) && R.isContinuous() && R.type() == CV_32F);\r
-    Mat Rinv = R.inv();\r
-    CV_Assert(Rinv.isContinuous());\r
+    CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);\r
+    CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);\r
+    CV_Assert((T.size() == Size(3,1) || T.size() == Size(1,3)) && T.type() == CV_32F && T.isContinuous());\r
+\r
+    Mat K_Rinv = K * R.t();\r
+    Mat R_Kinv = R * K.inv();\r
+    CV_Assert(K_Rinv.isContinuous());\r
+    CV_Assert(R_Kinv.isContinuous());\r
  \r
      map_x.create(dst_roi.size(), CV_32F);\r
      map_y.create(dst_roi.size(), CV_32F);\r
-    imgproc::buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, R.ptr<float>(), Rinv.ptr<float>(),\r
-                                static_cast<float>(f), static_cast<float>(s), static_cast<float>(dist), \r
-                                0.5f*src_size.width, 0.5f*src_size.height, StreamAccessor::getStream(stream));\r
+    imgproc::buildWarpPlaneMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(), \r
+                                T.ptr<float>(), scale, StreamAccessor::getStream(stream));\r
  }\r
  \r
  //////////////////////////////////////////////////////////////////////////////\r
@@ -572,22 +651,25 @@ void cv::gpu::buildWarpPlaneMaps(Size src_size, Rect dst_roi, const Mat& R, doub
  namespace cv { namespace gpu { namespace imgproc\r
  {\r
      void buildWarpCylindricalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
-                                  const float r[9], const float rinv[9], float f, float s,\r
-                                  float half_w, float half_h, cudaStream_t stream);\r
+                                  const float k_rinv[9], const float r_kinv[9], float scale,\r
+                                  cudaStream_t stream);\r
  }}}\r
  \r
-void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s,\r
+void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,\r
                                         GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
  {\r
-    CV_Assert(R.size() == Size(3,3) && R.isContinuous() && R.type() == CV_32F);\r
-    Mat Rinv = R.inv();\r
-    CV_Assert(Rinv.isContinuous());\r
+    CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);\r
+    CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);\r
+\r
+    Mat K_Rinv = K * R.t();\r
+    Mat R_Kinv = R * K.inv();\r
+    CV_Assert(K_Rinv.isContinuous());\r
+    CV_Assert(R_Kinv.isContinuous());\r
  \r
      map_x.create(dst_roi.size(), CV_32F);\r
      map_y.create(dst_roi.size(), CV_32F);\r
-    imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, R.ptr<float>(), Rinv.ptr<float>(),\r
-                                      static_cast<float>(f), static_cast<float>(s), 0.5f*src_size.width, 0.5f*src_size.height, \r
-                                      StreamAccessor::getStream(stream));\r
+    imgproc::buildWarpCylindricalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),\r
+                                      scale, StreamAccessor::getStream(stream));\r
  }\r
  \r
  \r
@@ -597,22 +679,25 @@ void cv::gpu::buildWarpCylindricalMaps(Size src_size, Rect dst_roi, const Mat& R
  namespace cv { namespace gpu { namespace imgproc\r
  {\r
      void buildWarpSphericalMaps(int tl_u, int tl_v, DevMem2Df map_x, DevMem2Df map_y,\r
-                                const float r[9], const float rinv[9], float f, float s,\r
-                                float half_w, float half_h, cudaStream_t stream);\r
+                                const float k_rinv[9], const float r_kinv[9], float scale,\r
+                                cudaStream_t stream);\r
  }}}\r
  \r
-void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat& R, double f, double s,\r
+void cv::gpu::buildWarpSphericalMaps(Size src_size, Rect dst_roi, const Mat &K, const Mat& R, float scale,\r
                                       GpuMat& map_x, GpuMat& map_y, Stream& stream)\r
  {\r
-    CV_Assert(R.size() == Size(3,3) && R.isContinuous() && R.type() == CV_32F);\r
-    Mat Rinv = R.inv();\r
-    CV_Assert(Rinv.isContinuous());\r
+    CV_Assert(K.size() == Size(3,3) && K.type() == CV_32F);\r
+    CV_Assert(R.size() == Size(3,3) && R.type() == CV_32F);\r
+\r
+    Mat K_Rinv = K * R.t();\r
+    Mat R_Kinv = R * K.inv();\r
+    CV_Assert(K_Rinv.isContinuous());\r
+    CV_Assert(R_Kinv.isContinuous());\r
  \r
      map_x.create(dst_roi.size(), CV_32F);\r
      map_y.create(dst_roi.size(), CV_32F);\r
-    imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, R.ptr<float>(), Rinv.ptr<float>(),\r
-                                    static_cast<float>(f), static_cast<float>(s), 0.5f*src_size.width, 0.5f*src_size.height, \r
-                                    StreamAccessor::getStream(stream));\r
+    imgproc::buildWarpSphericalMaps(dst_roi.tl().x, dst_roi.tl().y, map_x, map_y, K_Rinv.ptr<float>(), R_Kinv.ptr<float>(),\r
+                                    scale, StreamAccessor::getStream(stream));\r
  }\r
  \r
  ////////////////////////////////////////////////////////////////////////\r
@@ -1228,24 +1313,26 @@ namespace
  \r
  bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType)\r
  {\r
-    if (cpuBorderType == cv::BORDER_REFLECT101)\r
+    switch (cpuBorderType)\r
      {\r
+    case cv::BORDER_REFLECT101:\r
          gpuBorderType = cv::gpu::BORDER_REFLECT101_GPU;\r
          return true;\r
-    }\r
-\r
-    if (cpuBorderType == cv::BORDER_REPLICATE)\r
-    {\r
+    case cv::BORDER_REPLICATE:\r
          gpuBorderType = cv::gpu::BORDER_REPLICATE_GPU;\r
          return true;\r
-    }\r
-    \r
-    if (cpuBorderType == cv::BORDER_CONSTANT)\r
-    {\r
+    case cv::BORDER_CONSTANT:\r
          gpuBorderType = cv::gpu::BORDER_CONSTANT_GPU;\r
          return true;\r
-    }\r
-\r
+    case cv::BORDER_REFLECT:\r
+        gpuBorderType = cv::gpu::BORDER_REFLECT_GPU;\r
+        return true;\r
+    case cv::BORDER_WRAP:\r
+        gpuBorderType = cv::gpu::BORDER_WRAP_GPU;\r
+        return true;\r
+    default:\r
+        return false;\r
+    };\r
      return false;\r
  }\r
  \r
@@ -1552,75 +1639,6 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
      cufftSafeCall(cufftDestroy(planC2R));\r
  }\r
  \r
-\r
-////////////////////////////////////////////////////////////////////\r
-// downsample\r
-\r
-namespace cv { namespace gpu { namespace imgproc\r
-{\r
-    template <typename T, int cn>\r
-    void downsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream);\r
-}}}\r
-\r
-\r
-void cv::gpu::downsample(const GpuMat& src, GpuMat& dst, Stream& stream)\r
-{\r
-    CV_Assert(src.depth() < CV_64F && src.channels() <= 4);\r
-\r
-    typedef void (*Caller)(const DevMem2D, DevMem2D, cudaStream_t stream);\r
-    static const Caller callers[6][4] =\r
-        {{imgproc::downsampleCaller<uchar,1>, imgproc::downsampleCaller<uchar,2>,\r
-          imgproc::downsampleCaller<uchar,3>, imgproc::downsampleCaller<uchar,4>},\r
-         {0,0,0,0}, {0,0,0,0},\r
-         {imgproc::downsampleCaller<short,1>, imgproc::downsampleCaller<short,2>,\r
-          imgproc::downsampleCaller<short,3>, imgproc::downsampleCaller<short,4>},\r
-         {0,0,0,0},\r
-         {imgproc::downsampleCaller<float,1>, imgproc::downsampleCaller<float,2>,\r
-          imgproc::downsampleCaller<float,3>, imgproc::downsampleCaller<float,4>}};\r
-\r
-    Caller caller = callers[src.depth()][src.channels()-1];\r
-    if (!caller)\r
-        CV_Error(CV_StsUnsupportedFormat, "bad number of channels");\r
-\r
-    dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());\r
-    caller(src, dst.reshape(1), StreamAccessor::getStream(stream));\r
-}\r
-\r
-\r
-//////////////////////////////////////////////////////////////////////////////\r
-// upsample\r
-\r
-namespace cv { namespace gpu { namespace imgproc\r
-{\r
-    template <typename T, int cn>\r
-    void upsampleCaller(const DevMem2D src, DevMem2D dst, cudaStream_t stream);\r
-}}}\r
-\r
-\r
-void cv::gpu::upsample(const GpuMat& src, GpuMat& dst, Stream& stream)\r
-{\r
-    CV_Assert(src.depth() < CV_64F && src.channels() <= 4);\r
-\r
-    typedef void (*Caller)(const DevMem2D, DevMem2D, cudaStream_t stream);\r
-    static const Caller callers[6][5] =\r
-        {{imgproc::upsampleCaller<uchar,1>, imgproc::upsampleCaller<uchar,2>,\r
-          imgproc::upsampleCaller<uchar,3>, imgproc::upsampleCaller<uchar,4>},\r
-         {0,0,0,0}, {0,0,0,0},\r
-         {imgproc::upsampleCaller<short,1>, imgproc::upsampleCaller<short,2>,\r
-          imgproc::upsampleCaller<short,3>, imgproc::upsampleCaller<short,4>},\r
-         {0,0,0,0},\r
-         {imgproc::upsampleCaller<float,1>, imgproc::upsampleCaller<float,2>,\r
-          imgproc::upsampleCaller<float,3>, imgproc::upsampleCaller<float,4>}};\r
-\r
-    Caller caller = callers[src.depth()][src.channels()-1];\r
-    if (!caller)\r
-        CV_Error(CV_StsUnsupportedFormat, "bad number of channels");\r
-\r
-    dst.create(src.rows*2, src.cols*2, src.type());\r
-    caller(src, dst.reshape(1), StreamAccessor::getStream(stream));\r
-}\r
-\r
-\r
  //////////////////////////////////////////////////////////////////////////////\r
  // pyrDown\r
  \r
@@ -1647,7 +1665,7 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, int borderType, Stream& st
  \r
      CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
  \r
-    CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT);\r
+    CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);\r
      int gpuBorderType;\r
      CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
  \r
@@ -1683,7 +1701,7 @@ void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, int borderType, Stream& stre
  \r
      CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);\r
  \r
-    CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT);\r
+    CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP);\r
      int gpuBorderType;\r
      CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));\r
  \r
@@ -1780,6 +1798,7 @@ void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_th
  {\r
      using namespace cv::gpu::canny;\r
  \r
+    CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));\r
      CV_Assert(src.type() == CV_8UC1);\r
  \r
      if( low_thresh > high_thresh )\r
@@ -1818,6 +1837,7 @@ void cv::gpu::Canny(const GpuMat& dx, const GpuMat& dy, CannyBuf& buf, GpuMat& d
  {\r
      using namespace cv::gpu::canny;\r
  \r
+    CV_Assert(TargetArchs::builtWith(SHARED_ATOMICS) && DeviceInfo().supports(SHARED_ATOMICS));\r
      CV_Assert(dx.type() == CV_32SC1 && dy.type() == CV_32SC1 && dx.size() == dy.size());\r
  \r
      if( low_thresh > high_thresh )\r