used new device layer for cv::gpu::addWeighted
authorVladislav Vinogradov <vlad.vinogradov@itseez.com>
Mon, 26 Aug 2013 06:30:04 +0000 (10:30 +0400)
committerVladislav Vinogradov <vlad.vinogradov@itseez.com>
Tue, 1 Oct 2013 08:18:37 +0000 (12:18 +0400)
modules/cudaarithm/src/cuda/add_weighted.cu
modules/cudaarithm/src/element_operations.cpp

index aa305d9..d5c00f6 100644 (file)
 //
 //M*/
 
-#if !defined CUDA_DISABLER
+#include "opencv2/opencv_modules.hpp"
 
-#include "opencv2/core/cuda/common.hpp"
-#include "opencv2/core/cuda/functional.hpp"
-#include "opencv2/core/cuda/transform.hpp"
-#include "opencv2/core/cuda/saturate_cast.hpp"
+#ifndef HAVE_OPENCV_CUDEV
 
-#include "arithm_func_traits.hpp"
+#error "opencv_cudev is required"
 
-using namespace cv::cuda;
-using namespace cv::cuda::device;
+#else
 
-namespace arithm
-{
-    template <typename T> struct UseDouble_
-    {
-        enum {value = 0};
-    };
-    template <> struct UseDouble_<double>
-    {
-        enum {value = 1};
-    };
-    template <typename T1, typename T2, typename D> struct UseDouble
-    {
-        enum {value = (UseDouble_<T1>::value || UseDouble_<T2>::value || UseDouble_<D>::value)};
-    };
-
-    template <typename T1, typename T2, typename D, bool useDouble> struct AddWeighted_;
-    template <typename T1, typename T2, typename D> struct AddWeighted_<T1, T2, D, false> : binary_function<T1, T2, D>
-    {
-        float alpha;
-        float beta;
-        float gamma;
+#include "opencv2/cudaarithm.hpp"
+#include "opencv2/cudev.hpp"
 
-        __host__ AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(static_cast<float>(alpha_)), beta(static_cast<float>(beta_)), gamma(static_cast<float>(gamma_)) {}
+using namespace cv::cudev;
 
-        __device__ __forceinline__ D operator ()(T1 a, T2 b) const
-        {
-            return saturate_cast<D>(a * alpha + b * beta + gamma);
-        }
-    };
-    template <typename T1, typename T2, typename D> struct AddWeighted_<T1, T2, D, true> : binary_function<T1, T2, D>
+namespace
+{
+    template <typename T1, typename T2, typename D, typename S> struct AddWeightedOp : binary_function<T1, T2, D>
     {
-        double alpha;
-        double beta;
-        double gamma;
-
-        __host__ AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {}
+        S alpha;
+        S beta;
+        S gamma;
 
         __device__ __forceinline__ D operator ()(T1 a, T2 b) const
         {
             return saturate_cast<D>(a * alpha + b * beta + gamma);
         }
     };
-    template <typename T1, typename T2, typename D> struct AddWeighted : AddWeighted_<T1, T2, D, UseDouble<T1, T2, D>::value>
-    {
-        AddWeighted(double alpha_, double beta_, double gamma_) : AddWeighted_<T1, T2, D, UseDouble<T1, T2, D>::value>(alpha_, beta_, gamma_) {}
-    };
-}
 
-namespace cv { namespace cuda { namespace device
-{
-    template <typename T1, typename T2, typename D, size_t src1_size, size_t src2_size, size_t dst_size> struct AddWeightedTraits : DefaultTransformFunctorTraits< arithm::AddWeighted<T1, T2, D> >
+    template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
     {
     };
-    template <typename T1, typename T2, typename D, size_t src_size, size_t dst_size> struct AddWeightedTraits<T1, T2, D, src_size, src_size, dst_size> : arithm::ArithmFuncTraits<src_size, dst_size>
+    template <> struct TransformPolicy<double> : DefaultTransformPolicy
     {
+        enum {
+            shift = 1
+        };
     };
 
-    template <typename T1, typename T2, typename D> struct TransformFunctorTraits< arithm::AddWeighted<T1, T2, D> > : AddWeightedTraits<T1, T2, D, sizeof(T1), sizeof(T2), sizeof(D)>
-    {
-    };
-}}}
-
-namespace arithm
-{
     template <typename T1, typename T2, typename D>
-    void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream)
+    void addWeightedImpl(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, Stream& stream)
     {
-        AddWeighted<T1, T2, D> op(alpha, beta, gamma);
-
-        device::transform((PtrStepSz<T1>) src1, (PtrStepSz<T2>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
-    }
-
-    template void addWeighted<uchar, uchar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, uchar, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, uchar, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, uchar, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, uchar, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, uchar, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, uchar, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<uchar, schar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, schar, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, schar, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, schar, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, schar, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, schar, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, schar, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<uchar, ushort, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, ushort, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, ushort, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, ushort, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, ushort, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, ushort, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, ushort, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<uchar, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<uchar, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<uchar, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<uchar, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<uchar, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
+        typedef typename LargerType<T1, T2>::type larger_type1;
+        typedef typename LargerType<larger_type1, D>::type larger_type2;
+        typedef typename LargerType<larger_type2, float>::type scalar_type;
 
+        AddWeightedOp<T1, T2, D, scalar_type> op;
+        op.alpha = static_cast<scalar_type>(alpha);
+        op.beta = static_cast<scalar_type>(beta);
+        op.gamma = static_cast<scalar_type>(gamma);
 
-    template void addWeighted<schar, schar, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, schar, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, schar, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, schar, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, schar, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, schar, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, schar, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<schar, ushort, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, ushort, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, ushort, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, ushort, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, ushort, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, ushort, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, ushort, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<schar, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<schar, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<schar, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<schar, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<schar, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-
-
-    template void addWeighted<ushort, ushort, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, ushort, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, ushort, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, ushort, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, ushort, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, ushort, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, ushort, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<ushort, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<ushort, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<ushort, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<ushort, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<ushort, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-
-
-    template void addWeighted<short, short, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, short, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, short, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, short, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, short, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, short, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, short, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<short, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<short, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
-    template void addWeighted<short, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<short, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-
+        gridTransformBinary_< TransformPolicy<scalar_type> >(globPtr<T1>(src1), globPtr<T2>(src2), globPtr<D>(dst), op, stream);
+    }
+}
 
+void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, double beta, double gamma, OutputArray _dst, int ddepth, Stream& stream)
+{
+    typedef void (*func_t)(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, Stream& stream);
+    static const func_t funcs[7][7][7] =
+    {
+        {
+            {
+                addWeightedImpl<uchar, uchar, uchar >,
+                addWeightedImpl<uchar, uchar, schar >,
+                addWeightedImpl<uchar, uchar, ushort>,
+                addWeightedImpl<uchar, uchar, short >,
+                addWeightedImpl<uchar, uchar, int   >,
+                addWeightedImpl<uchar, uchar, float >,
+                addWeightedImpl<uchar, uchar, double>
+            },
+            {
+                addWeightedImpl<uchar, schar, uchar >,
+                addWeightedImpl<uchar, schar, schar >,
+                addWeightedImpl<uchar, schar, ushort>,
+                addWeightedImpl<uchar, schar, short >,
+                addWeightedImpl<uchar, schar, int   >,
+                addWeightedImpl<uchar, schar, float >,
+                addWeightedImpl<uchar, schar, double>
+            },
+            {
+                addWeightedImpl<uchar, ushort, uchar >,
+                addWeightedImpl<uchar, ushort, schar >,
+                addWeightedImpl<uchar, ushort, ushort>,
+                addWeightedImpl<uchar, ushort, short >,
+                addWeightedImpl<uchar, ushort, int   >,
+                addWeightedImpl<uchar, ushort, float >,
+                addWeightedImpl<uchar, ushort, double>
+            },
+            {
+                addWeightedImpl<uchar, short, uchar >,
+                addWeightedImpl<uchar, short, schar >,
+                addWeightedImpl<uchar, short, ushort>,
+                addWeightedImpl<uchar, short, short >,
+                addWeightedImpl<uchar, short, int   >,
+                addWeightedImpl<uchar, short, float >,
+                addWeightedImpl<uchar, short, double>
+            },
+            {
+                addWeightedImpl<uchar, int, uchar >,
+                addWeightedImpl<uchar, int, schar >,
+                addWeightedImpl<uchar, int, ushort>,
+                addWeightedImpl<uchar, int, short >,
+                addWeightedImpl<uchar, int, int   >,
+                addWeightedImpl<uchar, int, float >,
+                addWeightedImpl<uchar, int, double>
+            },
+            {
+                addWeightedImpl<uchar, float, uchar >,
+                addWeightedImpl<uchar, float, schar >,
+                addWeightedImpl<uchar, float, ushort>,
+                addWeightedImpl<uchar, float, short >,
+                addWeightedImpl<uchar, float, int   >,
+                addWeightedImpl<uchar, float, float >,
+                addWeightedImpl<uchar, float, double>
+            },
+            {
+                addWeightedImpl<uchar, double, uchar >,
+                addWeightedImpl<uchar, double, schar >,
+                addWeightedImpl<uchar, double, ushort>,
+                addWeightedImpl<uchar, double, short >,
+                addWeightedImpl<uchar, double, int   >,
+                addWeightedImpl<uchar, double, float >,
+                addWeightedImpl<uchar, double, double>
+            }
+        },
+        {
+            {
+                0/*addWeightedImpl<schar, uchar, uchar >*/,
+                0/*addWeightedImpl<schar, uchar, schar >*/,
+                0/*addWeightedImpl<schar, uchar, ushort>*/,
+                0/*addWeightedImpl<schar, uchar, short >*/,
+                0/*addWeightedImpl<schar, uchar, int   >*/,
+                0/*addWeightedImpl<schar, uchar, float >*/,
+                0/*addWeightedImpl<schar, uchar, double>*/
+            },
+            {
+                addWeightedImpl<schar, schar, uchar >,
+                addWeightedImpl<schar, schar, schar >,
+                addWeightedImpl<schar, schar, ushort>,
+                addWeightedImpl<schar, schar, short >,
+                addWeightedImpl<schar, schar, int   >,
+                addWeightedImpl<schar, schar, float >,
+                addWeightedImpl<schar, schar, double>
+            },
+            {
+                addWeightedImpl<schar, ushort, uchar >,
+                addWeightedImpl<schar, ushort, schar >,
+                addWeightedImpl<schar, ushort, ushort>,
+                addWeightedImpl<schar, ushort, short >,
+                addWeightedImpl<schar, ushort, int   >,
+                addWeightedImpl<schar, ushort, float >,
+                addWeightedImpl<schar, ushort, double>
+            },
+            {
+                addWeightedImpl<schar, short, uchar >,
+                addWeightedImpl<schar, short, schar >,
+                addWeightedImpl<schar, short, ushort>,
+                addWeightedImpl<schar, short, short >,
+                addWeightedImpl<schar, short, int   >,
+                addWeightedImpl<schar, short, float >,
+                addWeightedImpl<schar, short, double>
+            },
+            {
+                addWeightedImpl<schar, int, uchar >,
+                addWeightedImpl<schar, int, schar >,
+                addWeightedImpl<schar, int, ushort>,
+                addWeightedImpl<schar, int, short >,
+                addWeightedImpl<schar, int, int   >,
+                addWeightedImpl<schar, int, float >,
+                addWeightedImpl<schar, int, double>
+            },
+            {
+                addWeightedImpl<schar, float, uchar >,
+                addWeightedImpl<schar, float, schar >,
+                addWeightedImpl<schar, float, ushort>,
+                addWeightedImpl<schar, float, short >,
+                addWeightedImpl<schar, float, int   >,
+                addWeightedImpl<schar, float, float >,
+                addWeightedImpl<schar, float, double>
+            },
+            {
+                addWeightedImpl<schar, double, uchar >,
+                addWeightedImpl<schar, double, schar >,
+                addWeightedImpl<schar, double, ushort>,
+                addWeightedImpl<schar, double, short >,
+                addWeightedImpl<schar, double, int   >,
+                addWeightedImpl<schar, double, float >,
+                addWeightedImpl<schar, double, double>
+            }
+        },
+        {
+            {
+                0/*addWeightedImpl<ushort, uchar, uchar >*/,
+                0/*addWeightedImpl<ushort, uchar, schar >*/,
+                0/*addWeightedImpl<ushort, uchar, ushort>*/,
+                0/*addWeightedImpl<ushort, uchar, short >*/,
+                0/*addWeightedImpl<ushort, uchar, int   >*/,
+                0/*addWeightedImpl<ushort, uchar, float >*/,
+                0/*addWeightedImpl<ushort, uchar, double>*/
+            },
+            {
+                0/*addWeightedImpl<ushort, schar, uchar >*/,
+                0/*addWeightedImpl<ushort, schar, schar >*/,
+                0/*addWeightedImpl<ushort, schar, ushort>*/,
+                0/*addWeightedImpl<ushort, schar, short >*/,
+                0/*addWeightedImpl<ushort, schar, int   >*/,
+                0/*addWeightedImpl<ushort, schar, float >*/,
+                0/*addWeightedImpl<ushort, schar, double>*/
+            },
+            {
+                addWeightedImpl<ushort, ushort, uchar >,
+                addWeightedImpl<ushort, ushort, schar >,
+                addWeightedImpl<ushort, ushort, ushort>,
+                addWeightedImpl<ushort, ushort, short >,
+                addWeightedImpl<ushort, ushort, int   >,
+                addWeightedImpl<ushort, ushort, float >,
+                addWeightedImpl<ushort, ushort, double>
+            },
+            {
+                addWeightedImpl<ushort, short, uchar >,
+                addWeightedImpl<ushort, short, schar >,
+                addWeightedImpl<ushort, short, ushort>,
+                addWeightedImpl<ushort, short, short >,
+                addWeightedImpl<ushort, short, int   >,
+                addWeightedImpl<ushort, short, float >,
+                addWeightedImpl<ushort, short, double>
+            },
+            {
+                addWeightedImpl<ushort, int, uchar >,
+                addWeightedImpl<ushort, int, schar >,
+                addWeightedImpl<ushort, int, ushort>,
+                addWeightedImpl<ushort, int, short >,
+                addWeightedImpl<ushort, int, int   >,
+                addWeightedImpl<ushort, int, float >,
+                addWeightedImpl<ushort, int, double>
+            },
+            {
+                addWeightedImpl<ushort, float, uchar >,
+                addWeightedImpl<ushort, float, schar >,
+                addWeightedImpl<ushort, float, ushort>,
+                addWeightedImpl<ushort, float, short >,
+                addWeightedImpl<ushort, float, int   >,
+                addWeightedImpl<ushort, float, float >,
+                addWeightedImpl<ushort, float, double>
+            },
+            {
+                addWeightedImpl<ushort, double, uchar >,
+                addWeightedImpl<ushort, double, schar >,
+                addWeightedImpl<ushort, double, ushort>,
+                addWeightedImpl<ushort, double, short >,
+                addWeightedImpl<ushort, double, int   >,
+                addWeightedImpl<ushort, double, float >,
+                addWeightedImpl<ushort, double, double>
+            }
+        },
+        {
+            {
+                0/*addWeightedImpl<short, uchar, uchar >*/,
+                0/*addWeightedImpl<short, uchar, schar >*/,
+                0/*addWeightedImpl<short, uchar, ushort>*/,
+                0/*addWeightedImpl<short, uchar, short >*/,
+                0/*addWeightedImpl<short, uchar, int   >*/,
+                0/*addWeightedImpl<short, uchar, float >*/,
+                0/*addWeightedImpl<short, uchar, double>*/
+            },
+            {
+                0/*addWeightedImpl<short, schar, uchar >*/,
+                0/*addWeightedImpl<short, schar, schar >*/,
+                0/*addWeightedImpl<short, schar, ushort>*/,
+                0/*addWeightedImpl<short, schar, short >*/,
+                0/*addWeightedImpl<short, schar, int   >*/,
+                0/*addWeightedImpl<short, schar, float >*/,
+                0/*addWeightedImpl<short, schar, double>*/
+            },
+            {
+                0/*addWeightedImpl<short, ushort, uchar >*/,
+                0/*addWeightedImpl<short, ushort, schar >*/,
+                0/*addWeightedImpl<short, ushort, ushort>*/,
+                0/*addWeightedImpl<short, ushort, short >*/,
+                0/*addWeightedImpl<short, ushort, int   >*/,
+                0/*addWeightedImpl<short, ushort, float >*/,
+                0/*addWeightedImpl<short, ushort, double>*/
+            },
+            {
+                addWeightedImpl<short, short, uchar >,
+                addWeightedImpl<short, short, schar >,
+                addWeightedImpl<short, short, ushort>,
+                addWeightedImpl<short, short, short >,
+                addWeightedImpl<short, short, int   >,
+                addWeightedImpl<short, short, float >,
+                addWeightedImpl<short, short, double>
+            },
+            {
+                addWeightedImpl<short, int, uchar >,
+                addWeightedImpl<short, int, schar >,
+                addWeightedImpl<short, int, ushort>,
+                addWeightedImpl<short, int, short >,
+                addWeightedImpl<short, int, int   >,
+                addWeightedImpl<short, int, float >,
+                addWeightedImpl<short, int, double>
+            },
+            {
+                addWeightedImpl<short, float, uchar >,
+                addWeightedImpl<short, float, schar >,
+                addWeightedImpl<short, float, ushort>,
+                addWeightedImpl<short, float, short >,
+                addWeightedImpl<short, float, int   >,
+                addWeightedImpl<short, float, float >,
+                addWeightedImpl<short, float, double>
+            },
+            {
+                addWeightedImpl<short, double, uchar >,
+                addWeightedImpl<short, double, schar >,
+                addWeightedImpl<short, double, ushort>,
+                addWeightedImpl<short, double, short >,
+                addWeightedImpl<short, double, int   >,
+                addWeightedImpl<short, double, float >,
+                addWeightedImpl<short, double, double>
+            }
+        },
+        {
+            {
+                0/*addWeightedImpl<int, uchar, uchar >*/,
+                0/*addWeightedImpl<int, uchar, schar >*/,
+                0/*addWeightedImpl<int, uchar, ushort>*/,
+                0/*addWeightedImpl<int, uchar, short >*/,
+                0/*addWeightedImpl<int, uchar, int   >*/,
+                0/*addWeightedImpl<int, uchar, float >*/,
+                0/*addWeightedImpl<int, uchar, double>*/
+            },
+            {
+                0/*addWeightedImpl<int, schar, uchar >*/,
+                0/*addWeightedImpl<int, schar, schar >*/,
+                0/*addWeightedImpl<int, schar, ushort>*/,
+                0/*addWeightedImpl<int, schar, short >*/,
+                0/*addWeightedImpl<int, schar, int   >*/,
+                0/*addWeightedImpl<int, schar, float >*/,
+                0/*addWeightedImpl<int, schar, double>*/
+            },
+            {
+                0/*addWeightedImpl<int, ushort, uchar >*/,
+                0/*addWeightedImpl<int, ushort, schar >*/,
+                0/*addWeightedImpl<int, ushort, ushort>*/,
+                0/*addWeightedImpl<int, ushort, short >*/,
+                0/*addWeightedImpl<int, ushort, int   >*/,
+                0/*addWeightedImpl<int, ushort, float >*/,
+                0/*addWeightedImpl<int, ushort, double>*/
+            },
+            {
+                0/*addWeightedImpl<int, short, uchar >*/,
+                0/*addWeightedImpl<int, short, schar >*/,
+                0/*addWeightedImpl<int, short, ushort>*/,
+                0/*addWeightedImpl<int, short, short >*/,
+                0/*addWeightedImpl<int, short, int   >*/,
+                0/*addWeightedImpl<int, short, float >*/,
+                0/*addWeightedImpl<int, short, double>*/
+            },
+            {
+                addWeightedImpl<int, int, uchar >,
+                addWeightedImpl<int, int, schar >,
+                addWeightedImpl<int, int, ushort>,
+                addWeightedImpl<int, int, short >,
+                addWeightedImpl<int, int, int   >,
+                addWeightedImpl<int, int, float >,
+                addWeightedImpl<int, int, double>
+            },
+            {
+                addWeightedImpl<int, float, uchar >,
+                addWeightedImpl<int, float, schar >,
+                addWeightedImpl<int, float, ushort>,
+                addWeightedImpl<int, float, short >,
+                addWeightedImpl<int, float, int   >,
+                addWeightedImpl<int, float, float >,
+                addWeightedImpl<int, float, double>
+            },
+            {
+                addWeightedImpl<int, double, uchar >,
+                addWeightedImpl<int, double, schar >,
+                addWeightedImpl<int, double, ushort>,
+                addWeightedImpl<int, double, short >,
+                addWeightedImpl<int, double, int   >,
+                addWeightedImpl<int, double, float >,
+                addWeightedImpl<int, double, double>
+            }
+        },
+        {
+            {
+                0/*addWeightedImpl<float, uchar, uchar >*/,
+                0/*addWeightedImpl<float, uchar, schar >*/,
+                0/*addWeightedImpl<float, uchar, ushort>*/,
+                0/*addWeightedImpl<float, uchar, short >*/,
+                0/*addWeightedImpl<float, uchar, int   >*/,
+                0/*addWeightedImpl<float, uchar, float >*/,
+                0/*addWeightedImpl<float, uchar, double>*/
+            },
+            {
+                0/*addWeightedImpl<float, schar, uchar >*/,
+                0/*addWeightedImpl<float, schar, schar >*/,
+                0/*addWeightedImpl<float, schar, ushort>*/,
+                0/*addWeightedImpl<float, schar, short >*/,
+                0/*addWeightedImpl<float, schar, int   >*/,
+                0/*addWeightedImpl<float, schar, float >*/,
+                0/*addWeightedImpl<float, schar, double>*/
+            },
+            {
+                0/*addWeightedImpl<float, ushort, uchar >*/,
+                0/*addWeightedImpl<float, ushort, schar >*/,
+                0/*addWeightedImpl<float, ushort, ushort>*/,
+                0/*addWeightedImpl<float, ushort, short >*/,
+                0/*addWeightedImpl<float, ushort, int   >*/,
+                0/*addWeightedImpl<float, ushort, float >*/,
+                0/*addWeightedImpl<float, ushort, double>*/
+            },
+            {
+                0/*addWeightedImpl<float, short, uchar >*/,
+                0/*addWeightedImpl<float, short, schar >*/,
+                0/*addWeightedImpl<float, short, ushort>*/,
+                0/*addWeightedImpl<float, short, short >*/,
+                0/*addWeightedImpl<float, short, int   >*/,
+                0/*addWeightedImpl<float, short, float >*/,
+                0/*addWeightedImpl<float, short, double>*/
+            },
+            {
+                0/*addWeightedImpl<float, int, uchar >*/,
+                0/*addWeightedImpl<float, int, schar >*/,
+                0/*addWeightedImpl<float, int, ushort>*/,
+                0/*addWeightedImpl<float, int, short >*/,
+                0/*addWeightedImpl<float, int, int   >*/,
+                0/*addWeightedImpl<float, int, float >*/,
+                0/*addWeightedImpl<float, int, double>*/
+            },
+            {
+                addWeightedImpl<float, float, uchar >,
+                addWeightedImpl<float, float, schar >,
+                addWeightedImpl<float, float, ushort>,
+                addWeightedImpl<float, float, short >,
+                addWeightedImpl<float, float, int   >,
+                addWeightedImpl<float, float, float >,
+                addWeightedImpl<float, float, double>
+            },
+            {
+                addWeightedImpl<float, double, uchar >,
+                addWeightedImpl<float, double, schar >,
+                addWeightedImpl<float, double, ushort>,
+                addWeightedImpl<float, double, short >,
+                addWeightedImpl<float, double, int   >,
+                addWeightedImpl<float, double, float >,
+                addWeightedImpl<float, double, double>
+            }
+        },
+        {
+            {
+                0/*addWeightedImpl<double, uchar, uchar >*/,
+                0/*addWeightedImpl<double, uchar, schar >*/,
+                0/*addWeightedImpl<double, uchar, ushort>*/,
+                0/*addWeightedImpl<double, uchar, short >*/,
+                0/*addWeightedImpl<double, uchar, int   >*/,
+                0/*addWeightedImpl<double, uchar, float >*/,
+                0/*addWeightedImpl<double, uchar, double>*/
+            },
+            {
+                0/*addWeightedImpl<double, schar, uchar >*/,
+                0/*addWeightedImpl<double, schar, schar >*/,
+                0/*addWeightedImpl<double, schar, ushort>*/,
+                0/*addWeightedImpl<double, schar, short >*/,
+                0/*addWeightedImpl<double, schar, int   >*/,
+                0/*addWeightedImpl<double, schar, float >*/,
+                0/*addWeightedImpl<double, schar, double>*/
+            },
+            {
+                0/*addWeightedImpl<double, ushort, uchar >*/,
+                0/*addWeightedImpl<double, ushort, schar >*/,
+                0/*addWeightedImpl<double, ushort, ushort>*/,
+                0/*addWeightedImpl<double, ushort, short >*/,
+                0/*addWeightedImpl<double, ushort, int   >*/,
+                0/*addWeightedImpl<double, ushort, float >*/,
+                0/*addWeightedImpl<double, ushort, double>*/
+            },
+            {
+                0/*addWeightedImpl<double, short, uchar >*/,
+                0/*addWeightedImpl<double, short, schar >*/,
+                0/*addWeightedImpl<double, short, ushort>*/,
+                0/*addWeightedImpl<double, short, short >*/,
+                0/*addWeightedImpl<double, short, int   >*/,
+                0/*addWeightedImpl<double, short, float >*/,
+                0/*addWeightedImpl<double, short, double>*/
+            },
+            {
+                0/*addWeightedImpl<double, int, uchar >*/,
+                0/*addWeightedImpl<double, int, schar >*/,
+                0/*addWeightedImpl<double, int, ushort>*/,
+                0/*addWeightedImpl<double, int, short >*/,
+                0/*addWeightedImpl<double, int, int   >*/,
+                0/*addWeightedImpl<double, int, float >*/,
+                0/*addWeightedImpl<double, int, double>*/
+            },
+            {
+                0/*addWeightedImpl<double, float, uchar >*/,
+                0/*addWeightedImpl<double, float, schar >*/,
+                0/*addWeightedImpl<double, float, ushort>*/,
+                0/*addWeightedImpl<double, float, short >*/,
+                0/*addWeightedImpl<double, float, int   >*/,
+                0/*addWeightedImpl<double, float, float >*/,
+                0/*addWeightedImpl<double, float, double>*/
+            },
+            {
+                addWeightedImpl<double, double, uchar >,
+                addWeightedImpl<double, double, schar >,
+                addWeightedImpl<double, double, ushort>,
+                addWeightedImpl<double, double, short >,
+                addWeightedImpl<double, double, int   >,
+                addWeightedImpl<double, double, float >,
+                addWeightedImpl<double, double, double>
+            }
+        }
+    };
 
-    template void addWeighted<int, int, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, int, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, int, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, int, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, int, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, int, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, int, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    GpuMat src1 = _src1.getGpuMat();
+    GpuMat src2 = _src2.getGpuMat();
 
-    template void addWeighted<int, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    int sdepth1 = src1.depth();
+    int sdepth2 = src2.depth();
 
-    template void addWeighted<int, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<int, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    ddepth = ddepth >= 0 ? CV_MAT_DEPTH(ddepth) : std::max(sdepth1, sdepth2);
+    const int cn = src1.channels();
 
+    CV_DbgAssert( src2.size() == src1.size() && src2.channels() == cn );
+    CV_DbgAssert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F );
 
+    _dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn));
+    GpuMat dst = _dst.getGpuMat();
 
-    template void addWeighted<float, float, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, float, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, float, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, float, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, float, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, float, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, float, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    GpuMat src1_ = src1.reshape(1);
+    GpuMat src2_ = src2.reshape(1);
+    GpuMat dst_ = dst.reshape(1);
 
-    template void addWeighted<float, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<float, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    if (sdepth1 > sdepth2)
+    {
+        src1_.swap(src2_);
+        std::swap(alpha, beta);
+        std::swap(sdepth1, sdepth2);
+    }
 
+    const func_t func = funcs[sdepth1][sdepth2][ddepth];
 
+    if (!func)
+        CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
 
-    template void addWeighted<double, double, uchar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<double, double, schar>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<double, double, ushort>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<double, double, short>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<double, double, int>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<double, double, float>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    template void addWeighted<double, double, double>(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
+    func(src1_, alpha, src2_, beta, gamma, dst_, stream);
 }
 
-#endif /* CUDA_DISABLER */
+#endif
index f05004a..11d6b87 100644 (file)
@@ -450,517 +450,6 @@ void cv::cuda::max(InputArray src1, InputArray src2, OutputArray dst, Stream& st
 }
 
 ////////////////////////////////////////////////////////////////////////
-// addWeighted
-
-namespace arithm
-{
-    template <typename T1, typename T2, typename D>
-    void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-}
-
-void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, double beta, double gamma, OutputArray _dst, int ddepth, Stream& stream)
-{
-    typedef void (*func_t)(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream);
-    static const func_t funcs[7][7][7] =
-    {
-        {
-            {
-                arithm::addWeighted<unsigned char, unsigned char, unsigned char >,
-                arithm::addWeighted<unsigned char, unsigned char, signed char >,
-                arithm::addWeighted<unsigned char, unsigned char, unsigned short>,
-                arithm::addWeighted<unsigned char, unsigned char, short >,
-                arithm::addWeighted<unsigned char, unsigned char, int   >,
-                arithm::addWeighted<unsigned char, unsigned char, float >,
-                arithm::addWeighted<unsigned char, unsigned char, double>
-            },
-            {
-                arithm::addWeighted<unsigned char, signed char, unsigned char >,
-                arithm::addWeighted<unsigned char, signed char, signed char >,
-                arithm::addWeighted<unsigned char, signed char, unsigned short>,
-                arithm::addWeighted<unsigned char, signed char, short >,
-                arithm::addWeighted<unsigned char, signed char, int   >,
-                arithm::addWeighted<unsigned char, signed char, float >,
-                arithm::addWeighted<unsigned char, signed char, double>
-            },
-            {
-                arithm::addWeighted<unsigned char, unsigned short, unsigned char >,
-                arithm::addWeighted<unsigned char, unsigned short, signed char >,
-                arithm::addWeighted<unsigned char, unsigned short, unsigned short>,
-                arithm::addWeighted<unsigned char, unsigned short, short >,
-                arithm::addWeighted<unsigned char, unsigned short, int   >,
-                arithm::addWeighted<unsigned char, unsigned short, float >,
-                arithm::addWeighted<unsigned char, unsigned short, double>
-            },
-            {
-                arithm::addWeighted<unsigned char, short, unsigned char >,
-                arithm::addWeighted<unsigned char, short, signed char >,
-                arithm::addWeighted<unsigned char, short, unsigned short>,
-                arithm::addWeighted<unsigned char, short, short >,
-                arithm::addWeighted<unsigned char, short, int   >,
-                arithm::addWeighted<unsigned char, short, float >,
-                arithm::addWeighted<unsigned char, short, double>
-            },
-            {
-                arithm::addWeighted<unsigned char, int, unsigned char >,
-                arithm::addWeighted<unsigned char, int, signed char >,
-                arithm::addWeighted<unsigned char, int, unsigned short>,
-                arithm::addWeighted<unsigned char, int, short >,
-                arithm::addWeighted<unsigned char, int, int   >,
-                arithm::addWeighted<unsigned char, int, float >,
-                arithm::addWeighted<unsigned char, int, double>
-            },
-            {
-                arithm::addWeighted<unsigned char, float, unsigned char >,
-                arithm::addWeighted<unsigned char, float, signed char >,
-                arithm::addWeighted<unsigned char, float, unsigned short>,
-                arithm::addWeighted<unsigned char, float, short >,
-                arithm::addWeighted<unsigned char, float, int   >,
-                arithm::addWeighted<unsigned char, float, float >,
-                arithm::addWeighted<unsigned char, float, double>
-            },
-            {
-                arithm::addWeighted<unsigned char, double, unsigned char >,
-                arithm::addWeighted<unsigned char, double, signed char >,
-                arithm::addWeighted<unsigned char, double, unsigned short>,
-                arithm::addWeighted<unsigned char, double, short >,
-                arithm::addWeighted<unsigned char, double, int   >,
-                arithm::addWeighted<unsigned char, double, float >,
-                arithm::addWeighted<unsigned char, double, double>
-            }
-        },
-        {
-            {
-                0/*arithm::addWeighted<signed char, unsigned char, unsigned char >*/,
-                0/*arithm::addWeighted<signed char, unsigned char, signed char >*/,
-                0/*arithm::addWeighted<signed char, unsigned char, unsigned short>*/,
-                0/*arithm::addWeighted<signed char, unsigned char, short >*/,
-                0/*arithm::addWeighted<signed char, unsigned char, int   >*/,
-                0/*arithm::addWeighted<signed char, unsigned char, float >*/,
-                0/*arithm::addWeighted<signed char, unsigned char, double>*/
-            },
-            {
-                arithm::addWeighted<signed char, signed char, unsigned char >,
-                arithm::addWeighted<signed char, signed char, signed char >,
-                arithm::addWeighted<signed char, signed char, unsigned short>,
-                arithm::addWeighted<signed char, signed char, short >,
-                arithm::addWeighted<signed char, signed char, int   >,
-                arithm::addWeighted<signed char, signed char, float >,
-                arithm::addWeighted<signed char, signed char, double>
-            },
-            {
-                arithm::addWeighted<signed char, unsigned short, unsigned char >,
-                arithm::addWeighted<signed char, unsigned short, signed char >,
-                arithm::addWeighted<signed char, unsigned short, unsigned short>,
-                arithm::addWeighted<signed char, unsigned short, short >,
-                arithm::addWeighted<signed char, unsigned short, int   >,
-                arithm::addWeighted<signed char, unsigned short, float >,
-                arithm::addWeighted<signed char, unsigned short, double>
-            },
-            {
-                arithm::addWeighted<signed char, short, unsigned char >,
-                arithm::addWeighted<signed char, short, signed char >,
-                arithm::addWeighted<signed char, short, unsigned short>,
-                arithm::addWeighted<signed char, short, short >,
-                arithm::addWeighted<signed char, short, int   >,
-                arithm::addWeighted<signed char, short, float >,
-                arithm::addWeighted<signed char, short, double>
-            },
-            {
-                arithm::addWeighted<signed char, int, unsigned char >,
-                arithm::addWeighted<signed char, int, signed char >,
-                arithm::addWeighted<signed char, int, unsigned short>,
-                arithm::addWeighted<signed char, int, short >,
-                arithm::addWeighted<signed char, int, int   >,
-                arithm::addWeighted<signed char, int, float >,
-                arithm::addWeighted<signed char, int, double>
-            },
-            {
-                arithm::addWeighted<signed char, float, unsigned char >,
-                arithm::addWeighted<signed char, float, signed char >,
-                arithm::addWeighted<signed char, float, unsigned short>,
-                arithm::addWeighted<signed char, float, short >,
-                arithm::addWeighted<signed char, float, int   >,
-                arithm::addWeighted<signed char, float, float >,
-                arithm::addWeighted<signed char, float, double>
-            },
-            {
-                arithm::addWeighted<signed char, double, unsigned char >,
-                arithm::addWeighted<signed char, double, signed char >,
-                arithm::addWeighted<signed char, double, unsigned short>,
-                arithm::addWeighted<signed char, double, short >,
-                arithm::addWeighted<signed char, double, int   >,
-                arithm::addWeighted<signed char, double, float >,
-                arithm::addWeighted<signed char, double, double>
-            }
-        },
-        {
-            {
-                0/*arithm::addWeighted<unsigned short, unsigned char, unsigned char >*/,
-                0/*arithm::addWeighted<unsigned short, unsigned char, signed char >*/,
-                0/*arithm::addWeighted<unsigned short, unsigned char, unsigned short>*/,
-                0/*arithm::addWeighted<unsigned short, unsigned char, short >*/,
-                0/*arithm::addWeighted<unsigned short, unsigned char, int   >*/,
-                0/*arithm::addWeighted<unsigned short, unsigned char, float >*/,
-                0/*arithm::addWeighted<unsigned short, unsigned char, double>*/
-            },
-            {
-                0/*arithm::addWeighted<unsigned short, signed char, unsigned char >*/,
-                0/*arithm::addWeighted<unsigned short, signed char, signed char >*/,
-                0/*arithm::addWeighted<unsigned short, signed char, unsigned short>*/,
-                0/*arithm::addWeighted<unsigned short, signed char, short >*/,
-                0/*arithm::addWeighted<unsigned short, signed char, int   >*/,
-                0/*arithm::addWeighted<unsigned short, signed char, float >*/,
-                0/*arithm::addWeighted<unsigned short, signed char, double>*/
-            },
-            {
-                arithm::addWeighted<unsigned short, unsigned short, unsigned char >,
-                arithm::addWeighted<unsigned short, unsigned short, signed char >,
-                arithm::addWeighted<unsigned short, unsigned short, unsigned short>,
-                arithm::addWeighted<unsigned short, unsigned short, short >,
-                arithm::addWeighted<unsigned short, unsigned short, int   >,
-                arithm::addWeighted<unsigned short, unsigned short, float >,
-                arithm::addWeighted<unsigned short, unsigned short, double>
-            },
-            {
-                arithm::addWeighted<unsigned short, short, unsigned char >,
-                arithm::addWeighted<unsigned short, short, signed char >,
-                arithm::addWeighted<unsigned short, short, unsigned short>,
-                arithm::addWeighted<unsigned short, short, short >,
-                arithm::addWeighted<unsigned short, short, int   >,
-                arithm::addWeighted<unsigned short, short, float >,
-                arithm::addWeighted<unsigned short, short, double>
-            },
-            {
-                arithm::addWeighted<unsigned short, int, unsigned char >,
-                arithm::addWeighted<unsigned short, int, signed char >,
-                arithm::addWeighted<unsigned short, int, unsigned short>,
-                arithm::addWeighted<unsigned short, int, short >,
-                arithm::addWeighted<unsigned short, int, int   >,
-                arithm::addWeighted<unsigned short, int, float >,
-                arithm::addWeighted<unsigned short, int, double>
-            },
-            {
-                arithm::addWeighted<unsigned short, float, unsigned char >,
-                arithm::addWeighted<unsigned short, float, signed char >,
-                arithm::addWeighted<unsigned short, float, unsigned short>,
-                arithm::addWeighted<unsigned short, float, short >,
-                arithm::addWeighted<unsigned short, float, int   >,
-                arithm::addWeighted<unsigned short, float, float >,
-                arithm::addWeighted<unsigned short, float, double>
-            },
-            {
-                arithm::addWeighted<unsigned short, double, unsigned char >,
-                arithm::addWeighted<unsigned short, double, signed char >,
-                arithm::addWeighted<unsigned short, double, unsigned short>,
-                arithm::addWeighted<unsigned short, double, short >,
-                arithm::addWeighted<unsigned short, double, int   >,
-                arithm::addWeighted<unsigned short, double, float >,
-                arithm::addWeighted<unsigned short, double, double>
-            }
-        },
-        {
-            {
-                0/*arithm::addWeighted<short, unsigned char, unsigned char >*/,
-                0/*arithm::addWeighted<short, unsigned char, signed char >*/,
-                0/*arithm::addWeighted<short, unsigned char, unsigned short>*/,
-                0/*arithm::addWeighted<short, unsigned char, short >*/,
-                0/*arithm::addWeighted<short, unsigned char, int   >*/,
-                0/*arithm::addWeighted<short, unsigned char, float >*/,
-                0/*arithm::addWeighted<short, unsigned char, double>*/
-            },
-            {
-                0/*arithm::addWeighted<short, signed char, unsigned char >*/,
-                0/*arithm::addWeighted<short, signed char, signed char >*/,
-                0/*arithm::addWeighted<short, signed char, unsigned short>*/,
-                0/*arithm::addWeighted<short, signed char, short >*/,
-                0/*arithm::addWeighted<short, signed char, int   >*/,
-                0/*arithm::addWeighted<short, signed char, float >*/,
-                0/*arithm::addWeighted<short, signed char, double>*/
-            },
-            {
-                0/*arithm::addWeighted<short, unsigned short, unsigned char >*/,
-                0/*arithm::addWeighted<short, unsigned short, signed char >*/,
-                0/*arithm::addWeighted<short, unsigned short, unsigned short>*/,
-                0/*arithm::addWeighted<short, unsigned short, short >*/,
-                0/*arithm::addWeighted<short, unsigned short, int   >*/,
-                0/*arithm::addWeighted<short, unsigned short, float >*/,
-                0/*arithm::addWeighted<short, unsigned short, double>*/
-            },
-            {
-                arithm::addWeighted<short, short, unsigned char >,
-                arithm::addWeighted<short, short, signed char >,
-                arithm::addWeighted<short, short, unsigned short>,
-                arithm::addWeighted<short, short, short >,
-                arithm::addWeighted<short, short, int   >,
-                arithm::addWeighted<short, short, float >,
-                arithm::addWeighted<short, short, double>
-            },
-            {
-                arithm::addWeighted<short, int, unsigned char >,
-                arithm::addWeighted<short, int, signed char >,
-                arithm::addWeighted<short, int, unsigned short>,
-                arithm::addWeighted<short, int, short >,
-                arithm::addWeighted<short, int, int   >,
-                arithm::addWeighted<short, int, float >,
-                arithm::addWeighted<short, int, double>
-            },
-            {
-                arithm::addWeighted<short, float, unsigned char >,
-                arithm::addWeighted<short, float, signed char >,
-                arithm::addWeighted<short, float, unsigned short>,
-                arithm::addWeighted<short, float, short >,
-                arithm::addWeighted<short, float, int   >,
-                arithm::addWeighted<short, float, float >,
-                arithm::addWeighted<short, float, double>
-            },
-            {
-                arithm::addWeighted<short, double, unsigned char >,
-                arithm::addWeighted<short, double, signed char >,
-                arithm::addWeighted<short, double, unsigned short>,
-                arithm::addWeighted<short, double, short >,
-                arithm::addWeighted<short, double, int   >,
-                arithm::addWeighted<short, double, float >,
-                arithm::addWeighted<short, double, double>
-            }
-        },
-        {
-            {
-                0/*arithm::addWeighted<int, unsigned char, unsigned char >*/,
-                0/*arithm::addWeighted<int, unsigned char, signed char >*/,
-                0/*arithm::addWeighted<int, unsigned char, unsigned short>*/,
-                0/*arithm::addWeighted<int, unsigned char, short >*/,
-                0/*arithm::addWeighted<int, unsigned char, int   >*/,
-                0/*arithm::addWeighted<int, unsigned char, float >*/,
-                0/*arithm::addWeighted<int, unsigned char, double>*/
-            },
-            {
-                0/*arithm::addWeighted<int, signed char, unsigned char >*/,
-                0/*arithm::addWeighted<int, signed char, signed char >*/,
-                0/*arithm::addWeighted<int, signed char, unsigned short>*/,
-                0/*arithm::addWeighted<int, signed char, short >*/,
-                0/*arithm::addWeighted<int, signed char, int   >*/,
-                0/*arithm::addWeighted<int, signed char, float >*/,
-                0/*arithm::addWeighted<int, signed char, double>*/
-            },
-            {
-                0/*arithm::addWeighted<int, unsigned short, unsigned char >*/,
-                0/*arithm::addWeighted<int, unsigned short, signed char >*/,
-                0/*arithm::addWeighted<int, unsigned short, unsigned short>*/,
-                0/*arithm::addWeighted<int, unsigned short, short >*/,
-                0/*arithm::addWeighted<int, unsigned short, int   >*/,
-                0/*arithm::addWeighted<int, unsigned short, float >*/,
-                0/*arithm::addWeighted<int, unsigned short, double>*/
-            },
-            {
-                0/*arithm::addWeighted<int, short, unsigned char >*/,
-                0/*arithm::addWeighted<int, short, signed char >*/,
-                0/*arithm::addWeighted<int, short, unsigned short>*/,
-                0/*arithm::addWeighted<int, short, short >*/,
-                0/*arithm::addWeighted<int, short, int   >*/,
-                0/*arithm::addWeighted<int, short, float >*/,
-                0/*arithm::addWeighted<int, short, double>*/
-            },
-            {
-                arithm::addWeighted<int, int, unsigned char >,
-                arithm::addWeighted<int, int, signed char >,
-                arithm::addWeighted<int, int, unsigned short>,
-                arithm::addWeighted<int, int, short >,
-                arithm::addWeighted<int, int, int   >,
-                arithm::addWeighted<int, int, float >,
-                arithm::addWeighted<int, int, double>
-            },
-            {
-                arithm::addWeighted<int, float, unsigned char >,
-                arithm::addWeighted<int, float, signed char >,
-                arithm::addWeighted<int, float, unsigned short>,
-                arithm::addWeighted<int, float, short >,
-                arithm::addWeighted<int, float, int   >,
-                arithm::addWeighted<int, float, float >,
-                arithm::addWeighted<int, float, double>
-            },
-            {
-                arithm::addWeighted<int, double, unsigned char >,
-                arithm::addWeighted<int, double, signed char >,
-                arithm::addWeighted<int, double, unsigned short>,
-                arithm::addWeighted<int, double, short >,
-                arithm::addWeighted<int, double, int   >,
-                arithm::addWeighted<int, double, float >,
-                arithm::addWeighted<int, double, double>
-            }
-        },
-        {
-            {
-                0/*arithm::addWeighted<float, unsigned char, unsigned char >*/,
-                0/*arithm::addWeighted<float, unsigned char, signed char >*/,
-                0/*arithm::addWeighted<float, unsigned char, unsigned short>*/,
-                0/*arithm::addWeighted<float, unsigned char, short >*/,
-                0/*arithm::addWeighted<float, unsigned char, int   >*/,
-                0/*arithm::addWeighted<float, unsigned char, float >*/,
-                0/*arithm::addWeighted<float, unsigned char, double>*/
-            },
-            {
-                0/*arithm::addWeighted<float, signed char, unsigned char >*/,
-                0/*arithm::addWeighted<float, signed char, signed char >*/,
-                0/*arithm::addWeighted<float, signed char, unsigned short>*/,
-                0/*arithm::addWeighted<float, signed char, short >*/,
-                0/*arithm::addWeighted<float, signed char, int   >*/,
-                0/*arithm::addWeighted<float, signed char, float >*/,
-                0/*arithm::addWeighted<float, signed char, double>*/
-            },
-            {
-                0/*arithm::addWeighted<float, unsigned short, unsigned char >*/,
-                0/*arithm::addWeighted<float, unsigned short, signed char >*/,
-                0/*arithm::addWeighted<float, unsigned short, unsigned short>*/,
-                0/*arithm::addWeighted<float, unsigned short, short >*/,
-                0/*arithm::addWeighted<float, unsigned short, int   >*/,
-                0/*arithm::addWeighted<float, unsigned short, float >*/,
-                0/*arithm::addWeighted<float, unsigned short, double>*/
-            },
-            {
-                0/*arithm::addWeighted<float, short, unsigned char >*/,
-                0/*arithm::addWeighted<float, short, signed char >*/,
-                0/*arithm::addWeighted<float, short, unsigned short>*/,
-                0/*arithm::addWeighted<float, short, short >*/,
-                0/*arithm::addWeighted<float, short, int   >*/,
-                0/*arithm::addWeighted<float, short, float >*/,
-                0/*arithm::addWeighted<float, short, double>*/
-            },
-            {
-                0/*arithm::addWeighted<float, int, unsigned char >*/,
-                0/*arithm::addWeighted<float, int, signed char >*/,
-                0/*arithm::addWeighted<float, int, unsigned short>*/,
-                0/*arithm::addWeighted<float, int, short >*/,
-                0/*arithm::addWeighted<float, int, int   >*/,
-                0/*arithm::addWeighted<float, int, float >*/,
-                0/*arithm::addWeighted<float, int, double>*/
-            },
-            {
-                arithm::addWeighted<float, float, unsigned char >,
-                arithm::addWeighted<float, float, signed char >,
-                arithm::addWeighted<float, float, unsigned short>,
-                arithm::addWeighted<float, float, short >,
-                arithm::addWeighted<float, float, int   >,
-                arithm::addWeighted<float, float, float >,
-                arithm::addWeighted<float, float, double>
-            },
-            {
-                arithm::addWeighted<float, double, unsigned char >,
-                arithm::addWeighted<float, double, signed char >,
-                arithm::addWeighted<float, double, unsigned short>,
-                arithm::addWeighted<float, double, short >,
-                arithm::addWeighted<float, double, int   >,
-                arithm::addWeighted<float, double, float >,
-                arithm::addWeighted<float, double, double>
-            }
-        },
-        {
-            {
-                0/*arithm::addWeighted<double, unsigned char, unsigned char >*/,
-                0/*arithm::addWeighted<double, unsigned char, signed char >*/,
-                0/*arithm::addWeighted<double, unsigned char, unsigned short>*/,
-                0/*arithm::addWeighted<double, unsigned char, short >*/,
-                0/*arithm::addWeighted<double, unsigned char, int   >*/,
-                0/*arithm::addWeighted<double, unsigned char, float >*/,
-                0/*arithm::addWeighted<double, unsigned char, double>*/
-            },
-            {
-                0/*arithm::addWeighted<double, signed char, unsigned char >*/,
-                0/*arithm::addWeighted<double, signed char, signed char >*/,
-                0/*arithm::addWeighted<double, signed char, unsigned short>*/,
-                0/*arithm::addWeighted<double, signed char, short >*/,
-                0/*arithm::addWeighted<double, signed char, int   >*/,
-                0/*arithm::addWeighted<double, signed char, float >*/,
-                0/*arithm::addWeighted<double, signed char, double>*/
-            },
-            {
-                0/*arithm::addWeighted<double, unsigned short, unsigned char >*/,
-                0/*arithm::addWeighted<double, unsigned short, signed char >*/,
-                0/*arithm::addWeighted<double, unsigned short, unsigned short>*/,
-                0/*arithm::addWeighted<double, unsigned short, short >*/,
-                0/*arithm::addWeighted<double, unsigned short, int   >*/,
-                0/*arithm::addWeighted<double, unsigned short, float >*/,
-                0/*arithm::addWeighted<double, unsigned short, double>*/
-            },
-            {
-                0/*arithm::addWeighted<double, short, unsigned char >*/,
-                0/*arithm::addWeighted<double, short, signed char >*/,
-                0/*arithm::addWeighted<double, short, unsigned short>*/,
-                0/*arithm::addWeighted<double, short, short >*/,
-                0/*arithm::addWeighted<double, short, int   >*/,
-                0/*arithm::addWeighted<double, short, float >*/,
-                0/*arithm::addWeighted<double, short, double>*/
-            },
-            {
-                0/*arithm::addWeighted<double, int, unsigned char >*/,
-                0/*arithm::addWeighted<double, int, signed char >*/,
-                0/*arithm::addWeighted<double, int, unsigned short>*/,
-                0/*arithm::addWeighted<double, int, short >*/,
-                0/*arithm::addWeighted<double, int, int   >*/,
-                0/*arithm::addWeighted<double, int, float >*/,
-                0/*arithm::addWeighted<double, int, double>*/
-            },
-            {
-                0/*arithm::addWeighted<double, float, unsigned char >*/,
-                0/*arithm::addWeighted<double, float, signed char >*/,
-                0/*arithm::addWeighted<double, float, unsigned short>*/,
-                0/*arithm::addWeighted<double, float, short >*/,
-                0/*arithm::addWeighted<double, float, int   >*/,
-                0/*arithm::addWeighted<double, float, float >*/,
-                0/*arithm::addWeighted<double, float, double>*/
-            },
-            {
-                arithm::addWeighted<double, double, unsigned char >,
-                arithm::addWeighted<double, double, signed char >,
-                arithm::addWeighted<double, double, unsigned short>,
-                arithm::addWeighted<double, double, short >,
-                arithm::addWeighted<double, double, int   >,
-                arithm::addWeighted<double, double, float >,
-                arithm::addWeighted<double, double, double>
-            }
-        }
-    };
-
-    GpuMat src1 = _src1.getGpuMat();
-    GpuMat src2 = _src2.getGpuMat();
-
-    int sdepth1 = src1.depth();
-    int sdepth2 = src2.depth();
-    ddepth = ddepth >= 0 ? CV_MAT_DEPTH(ddepth) : std::max(sdepth1, sdepth2);
-    const int cn = src1.channels();
-
-    CV_Assert( src2.size() == src1.size() && src2.channels() == cn );
-    CV_Assert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F );
-
-    if (sdepth1 == CV_64F || sdepth2 == CV_64F || ddepth == CV_64F)
-    {
-        if (!deviceSupports(NATIVE_DOUBLE))
-            CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
-    }
-
-    _dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn));
-    GpuMat dst = _dst.getGpuMat();
-
-    PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step);
-    PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step);
-    PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step);
-
-    if (sdepth1 > sdepth2)
-    {
-        std::swap(src1_.data, src2_.data);
-        std::swap(src1_.step, src2_.step);
-        std::swap(alpha, beta);
-        std::swap(sdepth1, sdepth2);
-    }
-
-    const func_t func = funcs[sdepth1][sdepth2][ddepth];
-
-    if (!func)
-        CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
-
-    func(src1_, alpha, src2_, beta, gamma, dst_, StreamAccessor::getStream(stream));
-}
-
-////////////////////////////////////////////////////////////////////////
 // threshold
 
 namespace arithm