//
//M*/
-#if !defined CUDA_DISABLER
+#include "opencv2/opencv_modules.hpp"
-#include "opencv2/core/cuda/common.hpp"
-#include "opencv2/core/cuda/functional.hpp"
-#include "opencv2/core/cuda/transform.hpp"
-#include "opencv2/core/cuda/saturate_cast.hpp"
-#include "opencv2/core/cuda/simd_functions.hpp"
+#ifndef HAVE_OPENCV_CUDEV
-#include "arithm_func_traits.hpp"
+#error "opencv_cudev is required"
-using namespace cv::cuda;
-using namespace cv::cuda::device;
+#else
-namespace arithm
+#include "opencv2/cudev.hpp"
+
+using namespace cv::cudev;
+
+void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int);
+
+namespace
{
- struct VSub4 : binary_function<uint, uint, uint>
+ template <typename T, typename D> struct SubOp1 : binary_function<T, T, D>
{
- __device__ __forceinline__ uint operator ()(uint a, uint b) const
+ __device__ __forceinline__ D operator ()(T a, T b) const
{
- return vsub4(a, b);
+ return saturate_cast<D>(a - b);
}
-
- __host__ __device__ __forceinline__ VSub4() {}
- __host__ __device__ __forceinline__ VSub4(const VSub4&) {}
};
- struct VSub2 : binary_function<uint, uint, uint>
+ template <typename T, typename D>
+ void subMat_v1(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
+ {
+ if (mask.data)
+ gridTransformBinary(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), SubOp1<T, D>(), globPtr<uchar>(mask), stream);
+ else
+ gridTransformBinary(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), SubOp1<T, D>(), stream);
+ }
+
+ struct SubOp2 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
return vsub2(a, b);
}
-
- __host__ __device__ __forceinline__ VSub2() {}
- __host__ __device__ __forceinline__ VSub2(const VSub2&) {}
};
- template <typename T, typename D> struct SubMat : binary_function<T, T, D>
+ void subMat_v2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
- __device__ __forceinline__ D operator ()(T a, T b) const
+ const int vcols = src1.cols >> 1;
+
+ GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
+ GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
+ GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
+
+ gridTransformBinary(src1_, src2_, dst_, SubOp2(), stream);
+ }
+
+ struct SubOp4 : binary_function<uint, uint, uint>
+ {
+ __device__ __forceinline__ uint operator ()(uint a, uint b) const
{
- return saturate_cast<D>(a - b);
+ return vsub4(a, b);
}
-
- __host__ __device__ __forceinline__ SubMat() {}
- __host__ __device__ __forceinline__ SubMat(const SubMat&) {}
};
+
+ void subMat_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
+ {
+ const int vcols = src1.cols >> 2;
+
+ GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
+ GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
+ GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
+
+ gridTransformBinary(src1_, src2_, dst_, SubOp4(), stream);
+ }
}
-namespace cv { namespace cuda { namespace device
+void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int)
{
- template <> struct TransformFunctorTraits< arithm::VSub4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
+ typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream);
+ static const func_t funcs[7][7] =
{
+ {
+ subMat_v1<uchar, uchar>,
+ subMat_v1<uchar, schar>,
+ subMat_v1<uchar, ushort>,
+ subMat_v1<uchar, short>,
+ subMat_v1<uchar, int>,
+ subMat_v1<uchar, float>,
+ subMat_v1<uchar, double>
+ },
+ {
+ subMat_v1<schar, uchar>,
+ subMat_v1<schar, schar>,
+ subMat_v1<schar, ushort>,
+ subMat_v1<schar, short>,
+ subMat_v1<schar, int>,
+ subMat_v1<schar, float>,
+ subMat_v1<schar, double>
+ },
+ {
+ 0 /*subMat_v1<ushort, uchar>*/,
+ 0 /*subMat_v1<ushort, schar>*/,
+ subMat_v1<ushort, ushort>,
+ subMat_v1<ushort, short>,
+ subMat_v1<ushort, int>,
+ subMat_v1<ushort, float>,
+ subMat_v1<ushort, double>
+ },
+ {
+ 0 /*subMat_v1<short, uchar>*/,
+ 0 /*subMat_v1<short, schar>*/,
+ subMat_v1<short, ushort>,
+ subMat_v1<short, short>,
+ subMat_v1<short, int>,
+ subMat_v1<short, float>,
+ subMat_v1<short, double>
+ },
+ {
+ 0 /*subMat_v1<int, uchar>*/,
+ 0 /*subMat_v1<int, schar>*/,
+ 0 /*subMat_v1<int, ushort>*/,
+ 0 /*subMat_v1<int, short>*/,
+ subMat_v1<int, int>,
+ subMat_v1<int, float>,
+ subMat_v1<int, double>
+ },
+ {
+ 0 /*subMat_v1<float, uchar>*/,
+ 0 /*subMat_v1<float, schar>*/,
+ 0 /*subMat_v1<float, ushort>*/,
+ 0 /*subMat_v1<float, short>*/,
+ 0 /*subMat_v1<float, int>*/,
+ subMat_v1<float, float>,
+ subMat_v1<float, double>
+ },
+ {
+ 0 /*subMat_v1<double, uchar>*/,
+ 0 /*subMat_v1<double, schar>*/,
+ 0 /*subMat_v1<double, ushort>*/,
+ 0 /*subMat_v1<double, short>*/,
+ 0 /*subMat_v1<double, int>*/,
+ 0 /*subMat_v1<double, float>*/,
+ subMat_v1<double, double>
+ }
};
- template <> struct TransformFunctorTraits< arithm::VSub2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
- {
- };
+ const int sdepth = src1.depth();
+ const int ddepth = dst.depth();
- template <typename T, typename D> struct TransformFunctorTraits< arithm::SubMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
- {
- };
-}}}
+ CV_DbgAssert( sdepth < 7 && ddepth < 7 );
-namespace arithm
-{
- void subMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
- {
- device::transform(src1, src2, dst, VSub4(), WithOutMask(), stream);
- }
+ GpuMat src1_ = src1.reshape(1);
+ GpuMat src2_ = src2.reshape(1);
+ GpuMat dst_ = dst.reshape(1);
- void subMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
+ if (mask.empty() && (sdepth == CV_8U || sdepth == CV_16U) && ddepth == sdepth)
{
- device::transform(src1, src2, dst, VSub2(), WithOutMask(), stream);
- }
+ const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
+ const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
+ const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
- template <typename T, typename D>
- void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
- {
- if (mask.data)
- device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), mask, stream);
- else
- device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), WithOutMask(), stream);
+ const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
+
+ if (isAllAligned)
+ {
+ if (sdepth == CV_8U && (src1_.cols & 3) == 0)
+ {
+ subMat_v4(src1_, src2_, dst_, stream);
+ return;
+ }
+ else if (sdepth == CV_16U && (src1_.cols & 1) == 0)
+ {
+ subMat_v2(src1_, src2_, dst_, stream);
+ return;
+ }
+ }
}
- template void subMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<uchar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<uchar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<uchar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<uchar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<uchar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<uchar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
- template void subMat<schar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<schar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<schar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<schar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<schar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<schar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<schar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
- //template void subMat<ushort, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subMat<ushort, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<ushort, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<ushort, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<ushort, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<ushort, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<ushort, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
- //template void subMat<short, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subMat<short, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<short, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<short, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<short, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<short, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<short, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
- //template void subMat<int, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subMat<int, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subMat<int, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subMat<int, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<int, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<int, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
- //template void subMat<float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subMat<float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subMat<float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subMat<float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subMat<float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<float, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
- //template void subMat<double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subMat<double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subMat<double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subMat<double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subMat<double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subMat<double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subMat<double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+ const func_t func = funcs[sdepth][ddepth];
+
+ if (!func)
+ CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
+
+ func(src1_, src2_, dst_, mask, stream);
}
-#endif // CUDA_DISABLER
+#endif
//
//M*/
-#if !defined CUDA_DISABLER
+#include "opencv2/opencv_modules.hpp"
-#include "opencv2/core/cuda/common.hpp"
-#include "opencv2/core/cuda/functional.hpp"
-#include "opencv2/core/cuda/transform.hpp"
-#include "opencv2/core/cuda/saturate_cast.hpp"
-#include "opencv2/core/cuda/simd_functions.hpp"
+#ifndef HAVE_OPENCV_CUDEV
-#include "arithm_func_traits.hpp"
+#error "opencv_cudev is required"
-using namespace cv::cuda;
-using namespace cv::cuda::device;
+#else
-namespace arithm
+#include "opencv2/cudev.hpp"
+
+using namespace cv::cudev;
+
+void subScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int);
+
+namespace
{
- template <typename T, typename S, typename D> struct SubScalar : unary_function<T, D>
+ template <typename SrcType, typename ScalarType, typename DstType> struct SubScalarOp : unary_function<SrcType, DstType>
{
- S val;
- int scale;
+ ScalarType val;
+
+ __device__ __forceinline__ DstType operator ()(SrcType a) const
+ {
+ return saturate_cast<DstType>(saturate_cast<ScalarType>(a) - val);
+ }
+ };
- __host__ SubScalar(S val_, int scale_) : val(val_), scale(scale_) {}
+ template <typename SrcType, typename ScalarType, typename DstType> struct SubScalarOpInv : unary_function<SrcType, DstType>
+ {
+ ScalarType val;
- __device__ __forceinline__ D operator ()(T a) const
+ __device__ __forceinline__ DstType operator ()(SrcType a) const
{
- return saturate_cast<D>(scale * (a - val));
+ return saturate_cast<DstType>(val - saturate_cast<ScalarType>(a));
}
};
-}
-namespace cv { namespace cuda { namespace device
-{
- template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::SubScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
+ template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
{
};
-}}}
+ template <> struct TransformPolicy<double> : DefaultTransformPolicy
+ {
+ enum {
+ shift = 1
+ };
+ };
-namespace arithm
-{
- template <typename T, typename S, typename D>
- void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
+ template <typename SrcType, typename ScalarDepth, typename DstType>
+ void subScalarImpl(const GpuMat& src, cv::Scalar value, bool inv, GpuMat& dst, const GpuMat& mask, Stream& stream)
{
- SubScalar<T, S, D> op(static_cast<S>(val), inv ? -1 : 1);
+ typedef typename MakeVec<ScalarDepth, VecTraits<SrcType>::cn>::type ScalarType;
- if (mask.data)
- device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
+ cv::Scalar_<ScalarDepth> value_ = value;
+
+ if (inv)
+ {
+ SubScalarOpInv<SrcType, ScalarType, DstType> op;
+ op.val = VecTraits<ScalarType>::make(value_.val);
+
+ if (mask.data)
+ gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, globPtr<uchar>(mask), stream);
+ else
+ gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, stream);
+ }
else
- device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
+ {
+ SubScalarOp<SrcType, ScalarType, DstType> op;
+ op.val = VecTraits<ScalarType>::make(value_.val);
+
+ if (mask.data)
+ gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, globPtr<uchar>(mask), stream);
+ else
+ gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, stream);
+ }
}
+}
+
+void subScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int)
+{
+ typedef void (*func_t)(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, Stream& stream);
+ static const func_t funcs[7][7][4] =
+ {
+ {
+ {subScalarImpl<uchar, float, uchar>, subScalarImpl<uchar2, float, uchar2>, subScalarImpl<uchar3, float, uchar3>, subScalarImpl<uchar4, float, uchar4>},
+ {subScalarImpl<uchar, float, schar>, subScalarImpl<uchar2, float, char2>, subScalarImpl<uchar3, float, char3>, subScalarImpl<uchar4, float, char4>},
+ {subScalarImpl<uchar, float, ushort>, subScalarImpl<uchar2, float, ushort2>, subScalarImpl<uchar3, float, ushort3>, subScalarImpl<uchar4, float, ushort4>},
+ {subScalarImpl<uchar, float, short>, subScalarImpl<uchar2, float, short2>, subScalarImpl<uchar3, float, short3>, subScalarImpl<uchar4, float, short4>},
+ {subScalarImpl<uchar, float, int>, subScalarImpl<uchar2, float, int2>, subScalarImpl<uchar3, float, int3>, subScalarImpl<uchar4, float, int4>},
+ {subScalarImpl<uchar, float, float>, subScalarImpl<uchar2, float, float2>, subScalarImpl<uchar3, float, float3>, subScalarImpl<uchar4, float, float4>},
+ {subScalarImpl<uchar, double, double>, subScalarImpl<uchar2, double, double2>, subScalarImpl<uchar3, double, double3>, subScalarImpl<uchar4, double, double4>}
+ },
+ {
+ {subScalarImpl<schar, float, uchar>, subScalarImpl<char2, float, uchar2>, subScalarImpl<char3, float, uchar3>, subScalarImpl<char4, float, uchar4>},
+ {subScalarImpl<schar, float, schar>, subScalarImpl<char2, float, char2>, subScalarImpl<char3, float, char3>, subScalarImpl<char4, float, char4>},
+ {subScalarImpl<schar, float, ushort>, subScalarImpl<char2, float, ushort2>, subScalarImpl<char3, float, ushort3>, subScalarImpl<char4, float, ushort4>},
+ {subScalarImpl<schar, float, short>, subScalarImpl<char2, float, short2>, subScalarImpl<char3, float, short3>, subScalarImpl<char4, float, short4>},
+ {subScalarImpl<schar, float, int>, subScalarImpl<char2, float, int2>, subScalarImpl<char3, float, int3>, subScalarImpl<char4, float, int4>},
+ {subScalarImpl<schar, float, float>, subScalarImpl<char2, float, float2>, subScalarImpl<char3, float, float3>, subScalarImpl<char4, float, float4>},
+ {subScalarImpl<schar, double, double>, subScalarImpl<char2, double, double2>, subScalarImpl<char3, double, double3>, subScalarImpl<char4, double, double4>}
+ },
+ {
+ {0 /*subScalarImpl<ushort, float, uchar>*/, 0 /*subScalarImpl<ushort2, float, uchar2>*/, 0 /*subScalarImpl<ushort3, float, uchar3>*/, 0 /*subScalarImpl<ushort4, float, uchar4>*/},
+ {0 /*subScalarImpl<ushort, float, schar>*/, 0 /*subScalarImpl<ushort2, float, char2>*/, 0 /*subScalarImpl<ushort3, float, char3>*/, 0 /*subScalarImpl<ushort4, float, char4>*/},
+ {subScalarImpl<ushort, float, ushort>, subScalarImpl<ushort2, float, ushort2>, subScalarImpl<ushort3, float, ushort3>, subScalarImpl<ushort4, float, ushort4>},
+ {subScalarImpl<ushort, float, short>, subScalarImpl<ushort2, float, short2>, subScalarImpl<ushort3, float, short3>, subScalarImpl<ushort4, float, short4>},
+ {subScalarImpl<ushort, float, int>, subScalarImpl<ushort2, float, int2>, subScalarImpl<ushort3, float, int3>, subScalarImpl<ushort4, float, int4>},
+ {subScalarImpl<ushort, float, float>, subScalarImpl<ushort2, float, float2>, subScalarImpl<ushort3, float, float3>, subScalarImpl<ushort4, float, float4>},
+ {subScalarImpl<ushort, double, double>, subScalarImpl<ushort2, double, double2>, subScalarImpl<ushort3, double, double3>, subScalarImpl<ushort4, double, double4>}
+ },
+ {
+ {0 /*subScalarImpl<short, float, uchar>*/, 0 /*subScalarImpl<short2, float, uchar2>*/, 0 /*subScalarImpl<short3, float, uchar3>*/, 0 /*subScalarImpl<short4, float, uchar4>*/},
+ {0 /*subScalarImpl<short, float, schar>*/, 0 /*subScalarImpl<short2, float, char2>*/, 0 /*subScalarImpl<short3, float, char3>*/, 0 /*subScalarImpl<short4, float, char4>*/},
+ {subScalarImpl<short, float, ushort>, subScalarImpl<short2, float, ushort2>, subScalarImpl<short3, float, ushort3>, subScalarImpl<short4, float, ushort4>},
+ {subScalarImpl<short, float, short>, subScalarImpl<short2, float, short2>, subScalarImpl<short3, float, short3>, subScalarImpl<short4, float, short4>},
+ {subScalarImpl<short, float, int>, subScalarImpl<short2, float, int2>, subScalarImpl<short3, float, int3>, subScalarImpl<short4, float, int4>},
+ {subScalarImpl<short, float, float>, subScalarImpl<short2, float, float2>, subScalarImpl<short3, float, float3>, subScalarImpl<short4, float, float4>},
+ {subScalarImpl<short, double, double>, subScalarImpl<short2, double, double2>, subScalarImpl<short3, double, double3>, subScalarImpl<short4, double, double4>}
+ },
+ {
+ {0 /*subScalarImpl<int, float, uchar>*/, 0 /*subScalarImpl<int2, float, uchar2>*/, 0 /*subScalarImpl<int3, float, uchar3>*/, 0 /*subScalarImpl<int4, float, uchar4>*/},
+ {0 /*subScalarImpl<int, float, schar>*/, 0 /*subScalarImpl<int2, float, char2>*/, 0 /*subScalarImpl<int3, float, char3>*/, 0 /*subScalarImpl<int4, float, char4>*/},
+ {0 /*subScalarImpl<int, float, ushort>*/, 0 /*subScalarImpl<int2, float, ushort2>*/, 0 /*subScalarImpl<int3, float, ushort3>*/, 0 /*subScalarImpl<int4, float, ushort4>*/},
+ {0 /*subScalarImpl<int, float, short>*/, 0 /*subScalarImpl<int2, float, short2>*/, 0 /*subScalarImpl<int3, float, short3>*/, 0 /*subScalarImpl<int4, float, short4>*/},
+ {subScalarImpl<int, float, int>, subScalarImpl<int2, float, int2>, subScalarImpl<int3, float, int3>, subScalarImpl<int4, float, int4>},
+ {subScalarImpl<int, float, float>, subScalarImpl<int2, float, float2>, subScalarImpl<int3, float, float3>, subScalarImpl<int4, float, float4>},
+ {subScalarImpl<int, double, double>, subScalarImpl<int2, double, double2>, subScalarImpl<int3, double, double3>, subScalarImpl<int4, double, double4>}
+ },
+ {
+ {0 /*subScalarImpl<float, float, uchar>*/, 0 /*subScalarImpl<float2, float, uchar2>*/, 0 /*subScalarImpl<float3, float, uchar3>*/, 0 /*subScalarImpl<float4, float, uchar4>*/},
+ {0 /*subScalarImpl<float, float, schar>*/, 0 /*subScalarImpl<float2, float, char2>*/, 0 /*subScalarImpl<float3, float, char3>*/, 0 /*subScalarImpl<float4, float, char4>*/},
+ {0 /*subScalarImpl<float, float, ushort>*/, 0 /*subScalarImpl<float2, float, ushort2>*/, 0 /*subScalarImpl<float3, float, ushort3>*/, 0 /*subScalarImpl<float4, float, ushort4>*/},
+ {0 /*subScalarImpl<float, float, short>*/, 0 /*subScalarImpl<float2, float, short2>*/, 0 /*subScalarImpl<float3, float, short3>*/, 0 /*subScalarImpl<float4, float, short4>*/},
+ {0 /*subScalarImpl<float, float, int>*/, 0 /*subScalarImpl<float2, float, int2>*/, 0 /*subScalarImpl<float3, float, int3>*/, 0 /*subScalarImpl<float4, float, int4>*/},
+ {subScalarImpl<float, float, float>, subScalarImpl<float2, float, float2>, subScalarImpl<float3, float, float3>, subScalarImpl<float4, float, float4>},
+ {subScalarImpl<float, double, double>, subScalarImpl<float2, double, double2>, subScalarImpl<float3, double, double3>, subScalarImpl<float4, double, double4>}
+ },
+ {
+ {0 /*subScalarImpl<double, double, uchar>*/, 0 /*subScalarImpl<double2, double, uchar2>*/, 0 /*subScalarImpl<double3, double, uchar3>*/, 0 /*subScalarImpl<double4, double, uchar4>*/},
+ {0 /*subScalarImpl<double, double, schar>*/, 0 /*subScalarImpl<double2, double, char2>*/, 0 /*subScalarImpl<double3, double, char3>*/, 0 /*subScalarImpl<double4, double, char4>*/},
+ {0 /*subScalarImpl<double, double, ushort>*/, 0 /*subScalarImpl<double2, double, ushort2>*/, 0 /*subScalarImpl<double3, double, ushort3>*/, 0 /*subScalarImpl<double4, double, ushort4>*/},
+ {0 /*subScalarImpl<double, double, short>*/, 0 /*subScalarImpl<double2, double, short2>*/, 0 /*subScalarImpl<double3, double, short3>*/, 0 /*subScalarImpl<double4, double, short4>*/},
+ {0 /*subScalarImpl<double, double, int>*/, 0 /*subScalarImpl<double2, double, int2>*/, 0 /*subScalarImpl<double3, double, int3>*/, 0 /*subScalarImpl<double4, double, int4>*/},
+ {0 /*subScalarImpl<double, double, float>*/, 0 /*subScalarImpl<double2, double, float2>*/, 0 /*subScalarImpl<double3, double, float3>*/, 0 /*subScalarImpl<double4, double, float4>*/},
+ {subScalarImpl<double, double, double>, subScalarImpl<double2, double, double2>, subScalarImpl<double3, double, double3>, subScalarImpl<double4, double, double4>}
+ }
+ };
+
+ const int sdepth = src.depth();
+ const int ddepth = dst.depth();
+ const int cn = src.channels();
+
+ CV_DbgAssert( sdepth < 7 && ddepth < 7 && cn <= 4 );
+
+ const func_t func = funcs[sdepth][ddepth][cn - 1];
+
+ if (!func)
+ CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
- template void subScalar<uchar, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<uchar, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<uchar, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<uchar, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<uchar, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<uchar, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<uchar, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
- template void subScalar<schar, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<schar, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<schar, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<schar, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<schar, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<schar, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<schar, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
- //template void subScalar<ushort, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subScalar<ushort, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<ushort, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<ushort, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<ushort, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<ushort, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<ushort, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
- //template void subScalar<short, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subScalar<short, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<short, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<short, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<short, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<short, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<short, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
- //template void subScalar<int, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subScalar<int, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subScalar<int, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subScalar<int, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<int, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<int, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<int, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
- //template void subScalar<float, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subScalar<float, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subScalar<float, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subScalar<float, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subScalar<float, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<float, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<float, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-
- //template void subScalar<double, double, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subScalar<double, double, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subScalar<double, double, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subScalar<double, double, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subScalar<double, double, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- //template void subScalar<double, double, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- template void subScalar<double, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
+ func(src, val, inv, dst, mask, stream);
}
-#endif // CUDA_DISABLER
+#endif
////////////////////////////////////////////////////////////////////////
// subtract
-namespace arithm
-{
- void subMat_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
- void subMat_v2(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
-
- template <typename T, typename D>
- void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-}
-
-static void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int)
-{
- typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- static const func_t funcs[7][7] =
- {
- {
- arithm::subMat<unsigned char, unsigned char>,
- arithm::subMat<unsigned char, signed char>,
- arithm::subMat<unsigned char, unsigned short>,
- arithm::subMat<unsigned char, short>,
- arithm::subMat<unsigned char, int>,
- arithm::subMat<unsigned char, float>,
- arithm::subMat<unsigned char, double>
- },
- {
- arithm::subMat<signed char, unsigned char>,
- arithm::subMat<signed char, signed char>,
- arithm::subMat<signed char, unsigned short>,
- arithm::subMat<signed char, short>,
- arithm::subMat<signed char, int>,
- arithm::subMat<signed char, float>,
- arithm::subMat<signed char, double>
- },
- {
- 0 /*arithm::subMat<unsigned short, unsigned char>*/,
- 0 /*arithm::subMat<unsigned short, signed char>*/,
- arithm::subMat<unsigned short, unsigned short>,
- arithm::subMat<unsigned short, short>,
- arithm::subMat<unsigned short, int>,
- arithm::subMat<unsigned short, float>,
- arithm::subMat<unsigned short, double>
- },
- {
- 0 /*arithm::subMat<short, unsigned char>*/,
- 0 /*arithm::subMat<short, signed char>*/,
- arithm::subMat<short, unsigned short>,
- arithm::subMat<short, short>,
- arithm::subMat<short, int>,
- arithm::subMat<short, float>,
- arithm::subMat<short, double>
- },
- {
- 0 /*arithm::subMat<int, unsigned char>*/,
- 0 /*arithm::subMat<int, signed char>*/,
- 0 /*arithm::subMat<int, unsigned short>*/,
- 0 /*arithm::subMat<int, short>*/,
- arithm::subMat<int, int>,
- arithm::subMat<int, float>,
- arithm::subMat<int, double>
- },
- {
- 0 /*arithm::subMat<float, unsigned char>*/,
- 0 /*arithm::subMat<float, signed char>*/,
- 0 /*arithm::subMat<float, unsigned short>*/,
- 0 /*arithm::subMat<float, short>*/,
- 0 /*arithm::subMat<float, int>*/,
- arithm::subMat<float, float>,
- arithm::subMat<float, double>
- },
- {
- 0 /*arithm::subMat<double, unsigned char>*/,
- 0 /*arithm::subMat<double, signed char>*/,
- 0 /*arithm::subMat<double, unsigned short>*/,
- 0 /*arithm::subMat<double, short>*/,
- 0 /*arithm::subMat<double, int>*/,
- 0 /*arithm::subMat<double, float>*/,
- arithm::subMat<double, double>
- }
- };
-
- const int sdepth = src1.depth();
- const int ddepth = dst.depth();
- const int cn = src1.channels();
-
- cudaStream_t stream = StreamAccessor::getStream(_stream);
-
- PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step);
- PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step);
- PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step);
-
- if (mask.empty() && (sdepth == CV_8U || sdepth == CV_16U) && ddepth == sdepth)
- {
- const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
- const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
- const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
-
- const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
-
- if (isAllAligned)
- {
- if (sdepth == CV_8U && (src1_.cols & 3) == 0)
- {
- const int vcols = src1_.cols >> 2;
-
- arithm::subMat_v4(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
- PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
- PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
- stream);
-
- return;
- }
- else if (sdepth == CV_16U && (src1_.cols & 1) == 0)
- {
- const int vcols = src1_.cols >> 1;
-
- arithm::subMat_v2(PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
- PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
- PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
- stream);
-
- return;
- }
- }
- }
+void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int);
- const func_t func = funcs[sdepth][ddepth];
-
- if (!func)
- CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
-
- func(src1_, src2_, dst_, mask, stream);
-}
-
-namespace arithm
-{
- template <typename T, typename S, typename D>
- void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
-}
-
-static void subScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int)
-{
- typedef void (*func_t)(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
- static const func_t funcs[7][7] =
- {
- {
- arithm::subScalar<unsigned char, float, unsigned char>,
- arithm::subScalar<unsigned char, float, signed char>,
- arithm::subScalar<unsigned char, float, unsigned short>,
- arithm::subScalar<unsigned char, float, short>,
- arithm::subScalar<unsigned char, float, int>,
- arithm::subScalar<unsigned char, float, float>,
- arithm::subScalar<unsigned char, double, double>
- },
- {
- arithm::subScalar<signed char, float, unsigned char>,
- arithm::subScalar<signed char, float, signed char>,
- arithm::subScalar<signed char, float, unsigned short>,
- arithm::subScalar<signed char, float, short>,
- arithm::subScalar<signed char, float, int>,
- arithm::subScalar<signed char, float, float>,
- arithm::subScalar<signed char, double, double>
- },
- {
- 0 /*arithm::subScalar<unsigned short, float, unsigned char>*/,
- 0 /*arithm::subScalar<unsigned short, float, signed char>*/,
- arithm::subScalar<unsigned short, float, unsigned short>,
- arithm::subScalar<unsigned short, float, short>,
- arithm::subScalar<unsigned short, float, int>,
- arithm::subScalar<unsigned short, float, float>,
- arithm::subScalar<unsigned short, double, double>
- },
- {
- 0 /*arithm::subScalar<short, float, unsigned char>*/,
- 0 /*arithm::subScalar<short, float, signed char>*/,
- arithm::subScalar<short, float, unsigned short>,
- arithm::subScalar<short, float, short>,
- arithm::subScalar<short, float, int>,
- arithm::subScalar<short, float, float>,
- arithm::subScalar<short, double, double>
- },
- {
- 0 /*arithm::subScalar<int, float, unsigned char>*/,
- 0 /*arithm::subScalar<int, float, signed char>*/,
- 0 /*arithm::subScalar<int, float, unsigned short>*/,
- 0 /*arithm::subScalar<int, float, short>*/,
- arithm::subScalar<int, float, int>,
- arithm::subScalar<int, float, float>,
- arithm::subScalar<int, double, double>
- },
- {
- 0 /*arithm::subScalar<float, float, unsigned char>*/,
- 0 /*arithm::subScalar<float, float, signed char>*/,
- 0 /*arithm::subScalar<float, float, unsigned short>*/,
- 0 /*arithm::subScalar<float, float, short>*/,
- 0 /*arithm::subScalar<float, float, int>*/,
- arithm::subScalar<float, float, float>,
- arithm::subScalar<float, double, double>
- },
- {
- 0 /*arithm::subScalar<double, double, unsigned char>*/,
- 0 /*arithm::subScalar<double, double, signed char>*/,
- 0 /*arithm::subScalar<double, double, unsigned short>*/,
- 0 /*arithm::subScalar<double, double, short>*/,
- 0 /*arithm::subScalar<double, double, int>*/,
- 0 /*arithm::subScalar<double, double, float>*/,
- arithm::subScalar<double, double, double>
- }
- };
-
- typedef void (*npp_func_t)(const PtrStepSzb src, Scalar sc, PtrStepb dst, cudaStream_t stream);
- static const npp_func_t npp_funcs[7][4] =
- {
- {NppArithmScalar<CV_8U , 1, nppiSubC_8u_C1RSfs >::call, 0 , NppArithmScalar<CV_8U , 3, nppiSubC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiSubC_8u_C4RSfs >::call},
- {0 , 0 , 0 , 0 },
- {NppArithmScalar<CV_16U, 1, nppiSubC_16u_C1RSfs>::call, 0 , NppArithmScalar<CV_16U, 3, nppiSubC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiSubC_16u_C4RSfs>::call},
- {NppArithmScalar<CV_16S, 1, nppiSubC_16s_C1RSfs>::call, NppArithmScalar<CV_16S, 2, nppiSubC_16sc_C1RSfs>::call, NppArithmScalar<CV_16S, 3, nppiSubC_16s_C3RSfs>::call, NppArithmScalar<CV_16S, 4, nppiSubC_16s_C4RSfs>::call},
- {NppArithmScalar<CV_32S, 1, nppiSubC_32s_C1RSfs>::call, NppArithmScalar<CV_32S, 2, nppiSubC_32sc_C1RSfs>::call, NppArithmScalar<CV_32S, 3, nppiSubC_32s_C3RSfs>::call, 0 },
- {NppArithmScalar<CV_32F, 1, nppiSubC_32f_C1R >::call, NppArithmScalar<CV_32F, 2, nppiSubC_32fc_C1R >::call, NppArithmScalar<CV_32F, 3, nppiSubC_32f_C3R >::call, NppArithmScalar<CV_32F, 4, nppiSubC_32f_C4R >::call},
- {0 , 0 , 0 , 0 }
- };
-
- const int sdepth = src.depth();
- const int ddepth = dst.depth();
- const int cn = src.channels();
-
- cudaStream_t stream = StreamAccessor::getStream(_stream);
-
- const npp_func_t npp_func = npp_funcs[sdepth][cn - 1];
- if (ddepth == sdepth && cn > 1 && npp_func != 0 && !inv)
- {
- npp_func(src, val, dst, stream);
- return;
- }
-
- CV_Assert( cn == 1 );
-
- const func_t func = funcs[sdepth][ddepth];
-
- if (!func)
- CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
-
- func(src, val[0], inv, dst, mask, stream);
-}
+void subScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int);
void cv::cuda::subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, Stream& stream)
{