From 6cf4371eb4adbe9df164f724ffad46713e2dbdf5 Mon Sep 17 00:00:00 2001 From: chacha21 Date: Fri, 3 Mar 2017 18:24:12 +0100 Subject: [PATCH] make cuda::absdiff support multi-channel scalars I took the subScalar.cu code and changed the inner operation --- modules/cudaarithm/src/cuda/absdiff_scalar.cu | 109 +++++++++++++++++++++----- 1 file changed, 88 insertions(+), 21 deletions(-) diff --git a/modules/cudaarithm/src/cuda/absdiff_scalar.cu b/modules/cudaarithm/src/cuda/absdiff_scalar.cu index 3ffd066..225298b 100644 --- a/modules/cudaarithm/src/cuda/absdiff_scalar.cu +++ b/modules/cudaarithm/src/cuda/absdiff_scalar.cu @@ -56,14 +56,14 @@ void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const G namespace { - template struct AbsDiffScalarOp : unary_function + template struct AbsDiffScalarOp : unary_function { - S val; + ScalarType val; - __device__ __forceinline__ T operator ()(T a) const + __device__ __forceinline__ DstType operator ()(SrcType a) const { - abs_func f; - return saturate_cast(f(a - val)); + abs_func f; + return saturate_cast(f(saturate_cast(a) - val)); } }; @@ -77,34 +77,101 @@ namespace }; }; - template - void absDiffScalarImpl(const GpuMat& src, double value, GpuMat& dst, Stream& stream) + template + void absDiffScalarImpl(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream) { - AbsDiffScalarOp op; - op.val = static_cast(value); + typedef typename MakeVec::cn>::type ScalarType; + + cv::Scalar_ value_ = value; + + AbsDiffScalarOp op; + op.val = VecTraits::make(value_.val); gridTransformUnary_< TransformPolicy >(globPtr(src), globPtr(dst), op, stream); } } void absDiffScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int) { - typedef void (*func_t)(const GpuMat& src, double val, GpuMat& dst, Stream& stream); - static const func_t funcs[] = + typedef void (*func_t)(const GpuMat& src, cv::Scalar val, GpuMat& dst, Stream& stream); + static const func_t funcs[7][7][4] = { - absDiffScalarImpl, - absDiffScalarImpl, - absDiffScalarImpl, - absDiffScalarImpl, - absDiffScalarImpl, - absDiffScalarImpl, - absDiffScalarImpl + { + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + }, + { + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + }, + { + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + }, + { + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + }, + { + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + }, + { + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + }, + { + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/, 0 /*absDiffScalarImpl*/}, + {absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl, absDiffScalarImpl} + } }; - const int depth = src.depth(); + const int sdepth = src.depth(); + const int ddepth = dst.depth(); + const int cn = src.channels(); + + CV_DbgAssert( sdepth <= CV_64F && ddepth <= CV_64F && cn <= 4 ); + + const func_t func = funcs[sdepth][ddepth][cn - 1]; - CV_DbgAssert( depth <= CV_64F ); + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); - funcs[depth](src, val[0], dst, stream); + func(src, val, dst, stream); } #endif -- 2.7.4