"${OPENCV_HAL_LINKER_LIBS}"
)
+if(OPENCV_CORE_EXCLUDE_C_API)
+ ocv_target_compile_definitions(${the_module} PRIVATE "OPENCV_EXCLUDE_C_API=1")
+endif()
+
ocv_add_accuracy_tests()
ocv_add_perf_tests()
* logical operations *
\****************************************************************************************/
-void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize )
-{
- int scn = (int)sc.total(), cn = CV_MAT_CN(buftype);
- size_t esz = CV_ELEM_SIZE(buftype);
- getConvertFunc(sc.depth(), buftype)(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0);
- // unroll the scalar
- if( scn < cn )
- {
- CV_Assert( scn == 1 );
- size_t esz1 = CV_ELEM_SIZE1(buftype);
- for( size_t i = esz1; i < esz; i++ )
- scbuf[i] = scbuf[i - esz1];
- }
- for( size_t i = esz; i < blocksize*esz; i++ )
- scbuf[i] = scbuf[i - esz];
-}
-
-
enum { OCL_OP_ADD=0, OCL_OP_SUB=1, OCL_OP_RSUB=2, OCL_OP_ABSDIFF=3, OCL_OP_MUL=4,
OCL_OP_MUL_SCALE=5, OCL_OP_DIV_SCALE=6, OCL_OP_RECIP_SCALE=7, OCL_OP_ADDW=8,
OCL_OP_AND=9, OCL_OP_OR=10, OCL_OP_XOR=11, OCL_OP_NOT=12, OCL_OP_MIN=13, OCL_OP_MAX=14,
return recipTab;
}
-}
-
-void cv::multiply(InputArray src1, InputArray src2,
+void multiply(InputArray src1, InputArray src2,
OutputArray dst, double scale, int dtype)
{
CV_INSTRUMENT_REGION();
true, &scale, std::abs(scale - 1.0) < DBL_EPSILON ? OCL_OP_MUL : OCL_OP_MUL_SCALE);
}
-void cv::divide(InputArray src1, InputArray src2,
+void divide(InputArray src1, InputArray src2,
OutputArray dst, double scale, int dtype)
{
CV_INSTRUMENT_REGION();
arithm_op(src1, src2, dst, noArray(), dtype, getDivTab(), true, &scale, OCL_OP_DIV_SCALE);
}
-void cv::divide(double scale, InputArray src2,
+void divide(double scale, InputArray src2,
OutputArray dst, int dtype)
{
CV_INSTRUMENT_REGION();
arithm_op(src2, src2, dst, noArray(), dtype, getRecipTab(), true, &scale, OCL_OP_RECIP_SCALE);
}
+UMat UMat::mul(InputArray m, double scale) const
+{
+ UMat dst;
+ multiply(*this, m, dst, scale);
+ return dst;
+}
+
/****************************************************************************************\
* addWeighted *
\****************************************************************************************/
-namespace cv
-{
-
static BinaryFuncC* getAddWeightedTab()
{
static BinaryFuncC addWeightedTab[] =
}
}
+
+#ifndef OPENCV_EXCLUDE_C_API
+
/****************************************************************************************\
* Earlier API: cvAdd etc. *
\****************************************************************************************/
cv::max( src1, value, dst );
}
+#endif // OPENCV_EXCLUDE_C_API
/* End of file. */
#include "precomp.hpp"
+#ifndef OPENCV_EXCLUDE_C_API
+
#define CV_ORIGIN_TL 0
#define CV_ORIGIN_BL 1
template<> void DefaultDeleter<CvFileStorage>::operator ()(CvFileStorage* obj) const
{ cvReleaseFileStorage(&obj); }
-template <typename T> static inline
-void scalarToRawData_(const Scalar& s, T * const buf, const int cn, const int unroll_to)
-{
- int i = 0;
- for(; i < cn; i++)
- buf[i] = saturate_cast<T>(s.val[i]);
- for(; i < unroll_to; i++)
- buf[i] = buf[i-cn];
-}
+} // cv::
-void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to)
+
+/* universal functions */
+CV_IMPL void
+cvRelease( void** struct_ptr )
{
- CV_INSTRUMENT_REGION();
+ CvTypeInfo* info;
- const int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
- CV_Assert(cn <= 4);
- switch(depth)
+ if( !struct_ptr )
+ CV_Error( CV_StsNullPtr, "NULL double pointer" );
+
+ if( *struct_ptr )
{
- case CV_8U:
- scalarToRawData_<uchar>(s, (uchar*)_buf, cn, unroll_to);
- break;
- case CV_8S:
- scalarToRawData_<schar>(s, (schar*)_buf, cn, unroll_to);
- break;
- case CV_16U:
- scalarToRawData_<ushort>(s, (ushort*)_buf, cn, unroll_to);
- break;
- case CV_16S:
- scalarToRawData_<short>(s, (short*)_buf, cn, unroll_to);
- break;
- case CV_32S:
- scalarToRawData_<int>(s, (int*)_buf, cn, unroll_to);
- break;
- case CV_32F:
- scalarToRawData_<float>(s, (float*)_buf, cn, unroll_to);
- break;
- case CV_64F:
- scalarToRawData_<double>(s, (double*)_buf, cn, unroll_to);
- break;
- default:
- CV_Error(CV_StsUnsupportedFormat,"");
+ info = cvTypeOf( *struct_ptr );
+ if( !info )
+ CV_Error( CV_StsError, "Unknown object type" );
+ if( !info->release )
+ CV_Error( CV_StsError, "release function pointer is NULL" );
+
+ info->release( struct_ptr );
+ *struct_ptr = 0;
}
}
-} // cv::
+
+void* cvClone( const void* struct_ptr )
+{
+ void* struct_copy = 0;
+ CvTypeInfo* info;
+
+ if( !struct_ptr )
+ CV_Error( CV_StsNullPtr, "NULL structure pointer" );
+
+ info = cvTypeOf( struct_ptr );
+ if( !info )
+ CV_Error( CV_StsError, "Unknown object type" );
+ if( !info->clone )
+ CV_Error( CV_StsError, "clone function pointer is NULL" );
+
+ struct_copy = info->clone( struct_ptr );
+ return struct_copy;
+}
+#endif // OPENCV_EXCLUDE_C_API
/* End of file. */
#include "precomp.hpp"
+#ifndef OPENCV_EXCLUDE_C_API
CV_IMPL void
cvSplit( const void* srcarr, void* dstarr0, void* dstarr1, void* dstarr2, void* dstarr3 )
CV_Assert( dst.size() == src.size() && src.channels() == dst.channels() );
cv::normalize( src, dst, a, b, norm_type, dst.type(), mask );
}
+
+#endif // OPENCV_EXCLUDE_C_API
#include "convert_scale.simd.hpp"
#include "convert_scale.simd_declarations.hpp" // defines CV_CPU_DISPATCH_MODES_ALL=AVX2,...,BASELINE based on CMakeLists.txt content
-
namespace cv
{
}
}
-//==================================================================================================
-
-#ifdef HAVE_OPENCL
-
-static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype,
- double scale, double delta )
-{
- UMat src = _src.getUMat();
-
- if( _mask.empty() )
- src.convertTo( _dst, dtype, scale, delta );
- else if (src.channels() <= 4)
- {
- const ocl::Device & dev = ocl::Device::getDefault();
-
- int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype),
- ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, std::max(sdepth, ddepth)),
- rowsPerWI = dev.isIntel() ? 4 : 1;
-
- float fscale = static_cast<float>(scale), fdelta = static_cast<float>(delta);
- bool haveScale = std::fabs(scale - 1) > DBL_EPSILON,
- haveZeroScale = !(std::fabs(scale) > DBL_EPSILON),
- haveDelta = std::fabs(delta) > DBL_EPSILON,
- doubleSupport = dev.doubleFPConfig() > 0;
-
- if (!haveScale && !haveDelta && stype == dtype)
- {
- _src.copyTo(_dst, _mask);
- return true;
- }
- if (haveZeroScale)
- {
- _dst.setTo(Scalar(delta), _mask);
- return true;
- }
-
- if ((sdepth == CV_64F || ddepth == CV_64F) && !doubleSupport)
- return false;
-
- char cvt[2][40];
- String opts = format("-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d"
- " -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s",
- ocl::typeToStr(stype), ocl::typeToStr(dtype),
- ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), cn,
- rowsPerWI, ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]),
- ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
- doubleSupport ? " -D DOUBLE_SUPPORT" : "",
- haveScale ? " -D HAVE_SCALE" : "",
- haveDelta ? " -D HAVE_DELTA" : "",
- ocl::typeToStr(sdepth), ocl::typeToStr(ddepth));
-
- ocl::Kernel k("normalizek", ocl::core::normalize_oclsrc, opts);
- if (k.empty())
- return false;
-
- UMat mask = _mask.getUMat(), dst = _dst.getUMat();
-
- ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
- maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
- dstarg = ocl::KernelArg::ReadWrite(dst);
-
- if (haveScale)
- {
- if (haveDelta)
- k.args(srcarg, maskarg, dstarg, fscale, fdelta);
- else
- k.args(srcarg, maskarg, dstarg, fscale);
- }
- else
- {
- if (haveDelta)
- k.args(srcarg, maskarg, dstarg, fdelta);
- else
- k.args(srcarg, maskarg, dstarg);
- }
-
- size_t globalsize[2] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
- return k.run(2, globalsize, NULL, false);
- }
- else
- {
- UMat temp;
- src.convertTo( temp, dtype, scale, delta );
- temp.copyTo( _dst, _mask );
- }
-
- return true;
-}
-
-#endif
-
-void normalize(InputArray _src, InputOutputArray _dst, double a, double b,
- int norm_type, int rtype, InputArray _mask)
-{
- CV_INSTRUMENT_REGION();
-
- double scale = 1, shift = 0;
- int type = _src.type(), depth = CV_MAT_DEPTH(type);
-
- if( rtype < 0 )
- rtype = _dst.fixedType() ? _dst.depth() : depth;
-
- if( norm_type == CV_MINMAX )
- {
- double smin = 0, smax = 0;
- double dmin = MIN( a, b ), dmax = MAX( a, b );
- minMaxIdx( _src, &smin, &smax, 0, 0, _mask );
- scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0);
- if( rtype == CV_32F )
- {
- scale = (float)scale;
- shift = (float)dmin - (float)(smin*scale);
- }
- else
- shift = dmin - smin*scale;
- }
- else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C )
- {
- scale = norm( _src, norm_type, _mask );
- scale = scale > DBL_EPSILON ? a/scale : 0.;
- shift = 0;
- }
- else
- CV_Error( CV_StsBadArg, "Unknown/unsupported norm type" );
-
- CV_OCL_RUN(_dst.isUMat(),
- ocl_normalize(_src, _dst, _mask, rtype, scale, shift))
-
- Mat src = _src.getMat();
- if( _mask.empty() )
- src.convertTo( _dst, rtype, scale, shift );
- else
- {
- Mat temp;
- src.convertTo( temp, rtype, scale, shift );
- temp.copyTo( _dst, _mask );
- }
-}
-
} // namespace
namespace cv
{
+template <typename T> static inline
+void scalarToRawData_(const Scalar& s, T * const buf, const int cn, const int unroll_to)
+{
+ int i = 0;
+ for(; i < cn; i++)
+ buf[i] = saturate_cast<T>(s.val[i]);
+ for(; i < unroll_to; i++)
+ buf[i] = buf[i-cn];
+}
+
+void scalarToRawData(const Scalar& s, void* _buf, int type, int unroll_to)
+{
+ CV_INSTRUMENT_REGION();
+
+ const int depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
+ CV_Assert(cn <= 4);
+ switch(depth)
+ {
+ case CV_8U:
+ scalarToRawData_<uchar>(s, (uchar*)_buf, cn, unroll_to);
+ break;
+ case CV_8S:
+ scalarToRawData_<schar>(s, (schar*)_buf, cn, unroll_to);
+ break;
+ case CV_16U:
+ scalarToRawData_<ushort>(s, (ushort*)_buf, cn, unroll_to);
+ break;
+ case CV_16S:
+ scalarToRawData_<short>(s, (short*)_buf, cn, unroll_to);
+ break;
+ case CV_32S:
+ scalarToRawData_<int>(s, (int*)_buf, cn, unroll_to);
+ break;
+ case CV_32F:
+ scalarToRawData_<float>(s, (float*)_buf, cn, unroll_to);
+ break;
+ case CV_64F:
+ scalarToRawData_<double>(s, (double*)_buf, cn, unroll_to);
+ break;
+#if CV_VERSION_MAJOR >= 4
+ case CV_16F:
+ scalarToRawData_<float16_t>(s, (float16_t*)_buf, cn, unroll_to);
+ break;
+#endif
+ default:
+ CV_Error(CV_StsUnsupportedFormat,"");
+ }
+}
+
+void convertAndUnrollScalar( const Mat& sc, int buftype, uchar* scbuf, size_t blocksize )
+{
+ int scn = (int)sc.total(), cn = CV_MAT_CN(buftype);
+ size_t esz = CV_ELEM_SIZE(buftype);
+ BinaryFunc cvtFn = getConvertFunc(sc.depth(), buftype);
+ CV_Assert(cvtFn);
+ cvtFn(sc.ptr(), 1, 0, 1, scbuf, 1, Size(std::min(cn, scn), 1), 0);
+ // unroll the scalar
+ if( scn < cn )
+ {
+ CV_Assert( scn == 1 );
+ size_t esz1 = CV_ELEM_SIZE1(buftype);
+ for( size_t i = esz1; i < esz; i++ )
+ scbuf[i] = scbuf[i - esz1];
+ }
+ for( size_t i = esz; i < blocksize*esz; i++ )
+ scbuf[i] = scbuf[i - esz];
+}
+
+
template<typename T> static void
copyMask_(const uchar* _src, size_t sstep, const uchar* mask, size_t mstep, uchar* _dst, size_t dstep, Size size)
{
return *this;
}
-#if CV_SIMD128
-template<typename V> CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
-{
- typedef typename V::lane_type T;
- int end = (int)(size.width*esz);
- int width = (end + 1)/2;
- int width_1 = width & -v_uint8x16::nlanes;
- int i, j;
-
-#if CV_STRONG_ALIGNMENT
- CV_Assert(isAligned<sizeof(T)>(src, dst));
-#endif
-
- for( ; size.height--; src += sstep, dst += dstep )
- {
- for( i = 0, j = end; i < width_1; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes )
- {
- V t0, t1;
-
- t0 = v_load((T*)((uchar*)src + i));
- t1 = v_load((T*)((uchar*)src + j - v_uint8x16::nlanes));
- t0 = v_reverse(t0);
- t1 = v_reverse(t1);
- v_store((T*)(dst + j - v_uint8x16::nlanes), t0);
- v_store((T*)(dst + i), t1);
- }
- if (isAligned<sizeof(T)>(src, dst))
- {
- for ( ; i < width; i += sizeof(T), j -= sizeof(T) )
- {
- T t0, t1;
-
- t0 = *((T*)((uchar*)src + i));
- t1 = *((T*)((uchar*)src + j - sizeof(T)));
- *((T*)(dst + j - sizeof(T))) = t0;
- *((T*)(dst + i)) = t1;
- }
- }
- else
- {
- for ( ; i < width; i += sizeof(T), j -= sizeof(T) )
- {
- for (int k = 0; k < (int)sizeof(T); k++)
- {
- uchar t0, t1;
-
- t0 = *((uchar*)src + i + k);
- t1 = *((uchar*)src + j + k - sizeof(T));
- *(dst + j + k - sizeof(T)) = t0;
- *(dst + i + k) = t1;
- }
- }
- }
- }
-}
-
-template<typename T1, typename T2> CV_ALWAYS_INLINE void flipHoriz_double( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
-{
- int end = (int)(size.width*esz);
- int width = (end + 1)/2;
-
-#if CV_STRONG_ALIGNMENT
- CV_Assert(isAligned<sizeof(T1)>(src, dst));
- CV_Assert(isAligned<sizeof(T2)>(src, dst));
-#endif
-
- for( ; size.height--; src += sstep, dst += dstep )
- {
- for ( int i = 0, j = end; i < width; i += sizeof(T1) + sizeof(T2), j -= sizeof(T1) + sizeof(T2) )
- {
- T1 t0, t1;
- T2 t2, t3;
-
- t0 = *((T1*)((uchar*)src + i));
- t2 = *((T2*)((uchar*)src + i + sizeof(T1)));
- t1 = *((T1*)((uchar*)src + j - sizeof(T1) - sizeof(T2)));
- t3 = *((T2*)((uchar*)src + j - sizeof(T2)));
- *((T1*)(dst + j - sizeof(T1) - sizeof(T2))) = t0;
- *((T2*)(dst + j - sizeof(T2))) = t2;
- *((T1*)(dst + i)) = t1;
- *((T2*)(dst + i + sizeof(T1))) = t3;
- }
- }
-}
-#endif
-
-static void
-flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
-{
-#if CV_SIMD
-#if CV_STRONG_ALIGNMENT
- size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep;
-#endif
- if (esz == 2 * v_uint8x16::nlanes)
- {
- int end = (int)(size.width*esz);
- int width = end/2;
-
- for( ; size.height--; src += sstep, dst += dstep )
- {
- for( int i = 0, j = end - 2 * v_uint8x16::nlanes; i < width; i += 2 * v_uint8x16::nlanes, j -= 2 * v_uint8x16::nlanes )
- {
-#if CV_SIMD256
- v_uint8x32 t0, t1;
-
- t0 = v256_load((uchar*)src + i);
- t1 = v256_load((uchar*)src + j);
- v_store(dst + j, t0);
- v_store(dst + i, t1);
-#else
- v_uint8x16 t0, t1, t2, t3;
-
- t0 = v_load((uchar*)src + i);
- t1 = v_load((uchar*)src + i + v_uint8x16::nlanes);
- t2 = v_load((uchar*)src + j);
- t3 = v_load((uchar*)src + j + v_uint8x16::nlanes);
- v_store(dst + j, t0);
- v_store(dst + j + v_uint8x16::nlanes, t1);
- v_store(dst + i, t2);
- v_store(dst + i + v_uint8x16::nlanes, t3);
-#endif
- }
- }
- }
- else if (esz == v_uint8x16::nlanes)
- {
- int end = (int)(size.width*esz);
- int width = end/2;
-
- for( ; size.height--; src += sstep, dst += dstep )
- {
- for( int i = 0, j = end - v_uint8x16::nlanes; i < width; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes )
- {
- v_uint8x16 t0, t1;
-
- t0 = v_load((uchar*)src + i);
- t1 = v_load((uchar*)src + j);
- v_store(dst + j, t0);
- v_store(dst + i, t1);
- }
- }
- }
- else if (esz == 8
-#if CV_STRONG_ALIGNMENT
- && isAligned<sizeof(uint64)>(alignmentMark)
-#endif
- )
- {
- flipHoriz_single<v_uint64x2>(src, sstep, dst, dstep, size, esz);
- }
- else if (esz == 4
-#if CV_STRONG_ALIGNMENT
- && isAligned<sizeof(unsigned)>(alignmentMark)
-#endif
- )
- {
- flipHoriz_single<v_uint32x4>(src, sstep, dst, dstep, size, esz);
- }
- else if (esz == 2
-#if CV_STRONG_ALIGNMENT
- && isAligned<sizeof(ushort)>(alignmentMark)
-#endif
- )
- {
- flipHoriz_single<v_uint16x8>(src, sstep, dst, dstep, size, esz);
- }
- else if (esz == 1)
- {
- flipHoriz_single<v_uint8x16>(src, sstep, dst, dstep, size, esz);
- }
- else if (esz == 24
-#if CV_STRONG_ALIGNMENT
- && isAligned<sizeof(uint64_t)>(alignmentMark)
-#endif
- )
- {
- int end = (int)(size.width*esz);
- int width = (end + 1)/2;
-
- for( ; size.height--; src += sstep, dst += dstep )
- {
- for ( int i = 0, j = end; i < width; i += v_uint8x16::nlanes + sizeof(uint64_t), j -= v_uint8x16::nlanes + sizeof(uint64_t) )
- {
- v_uint8x16 t0, t1;
- uint64_t t2, t3;
-
- t0 = v_load((uchar*)src + i);
- t2 = *((uint64_t*)((uchar*)src + i + v_uint8x16::nlanes));
- t1 = v_load((uchar*)src + j - v_uint8x16::nlanes - sizeof(uint64_t));
- t3 = *((uint64_t*)((uchar*)src + j - sizeof(uint64_t)));
- v_store(dst + j - v_uint8x16::nlanes - sizeof(uint64_t), t0);
- *((uint64_t*)(dst + j - sizeof(uint64_t))) = t2;
- v_store(dst + i, t1);
- *((uint64_t*)(dst + i + v_uint8x16::nlanes)) = t3;
- }
- }
- }
-#if !CV_STRONG_ALIGNMENT
- else if (esz == 12)
- {
- flipHoriz_double<uint64_t,uint>(src, sstep, dst, dstep, size, esz);
- }
- else if (esz == 6)
- {
- flipHoriz_double<uint,ushort>(src, sstep, dst, dstep, size, esz);
- }
- else if (esz == 3)
- {
- flipHoriz_double<ushort,uchar>(src, sstep, dst, dstep, size, esz);
- }
-#endif
- else
-#endif // CV_SIMD
- {
- int i, j, limit = (int)(((size.width + 1)/2)*esz);
- AutoBuffer<int> _tab(size.width*esz);
- int* tab = _tab.data();
-
- for( i = 0; i < size.width; i++ )
- for( size_t k = 0; k < esz; k++ )
- tab[i*esz + k] = (int)((size.width - i - 1)*esz + k);
-
- for( ; size.height--; src += sstep, dst += dstep )
- {
- for( i = 0; i < limit; i++ )
- {
- j = tab[i];
- uchar t0 = src[i], t1 = src[j];
- dst[i] = t1; dst[j] = t0;
- }
- }
- }
-}
-
-static void
-flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, size_t esz )
-{
- const uchar* src1 = src0 + (size.height - 1)*sstep;
- uchar* dst1 = dst0 + (size.height - 1)*dstep;
- size.width *= (int)esz;
-
- for( int y = 0; y < (size.height + 1)/2; y++, src0 += sstep, src1 -= sstep,
- dst0 += dstep, dst1 -= dstep )
- {
- int i = 0;
-#if CV_SIMD
-#if CV_STRONG_ALIGNMENT
- if (isAligned<sizeof(int)>(src0, src1, dst0, dst1))
-#endif
- {
- for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH)
- {
- v_int32 t0 = vx_load((int*)(src0 + i));
- v_int32 t1 = vx_load((int*)(src1 + i));
- vx_store((int*)(dst0 + i), t1);
- vx_store((int*)(dst1 + i), t0);
- }
- }
-#if CV_STRONG_ALIGNMENT
- else
- {
- for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH)
- {
- v_uint8 t0 = vx_load(src0 + i);
- v_uint8 t1 = vx_load(src1 + i);
- vx_store(dst0 + i, t1);
- vx_store(dst1 + i, t0);
- }
- }
-#endif
-#endif
-
- if (isAligned<sizeof(int)>(src0, src1, dst0, dst1))
- {
- for( ; i <= size.width - 16; i += 16 )
- {
- int t0 = ((int*)(src0 + i))[0];
- int t1 = ((int*)(src1 + i))[0];
-
- ((int*)(dst0 + i))[0] = t1;
- ((int*)(dst1 + i))[0] = t0;
-
- t0 = ((int*)(src0 + i))[1];
- t1 = ((int*)(src1 + i))[1];
-
- ((int*)(dst0 + i))[1] = t1;
- ((int*)(dst1 + i))[1] = t0;
-
- t0 = ((int*)(src0 + i))[2];
- t1 = ((int*)(src1 + i))[2];
-
- ((int*)(dst0 + i))[2] = t1;
- ((int*)(dst1 + i))[2] = t0;
-
- t0 = ((int*)(src0 + i))[3];
- t1 = ((int*)(src1 + i))[3];
-
- ((int*)(dst0 + i))[3] = t1;
- ((int*)(dst1 + i))[3] = t0;
- }
-
- for( ; i <= size.width - 4; i += 4 )
- {
- int t0 = ((int*)(src0 + i))[0];
- int t1 = ((int*)(src1 + i))[0];
-
- ((int*)(dst0 + i))[0] = t1;
- ((int*)(dst1 + i))[0] = t0;
- }
- }
-
- for( ; i < size.width; i++ )
- {
- uchar t0 = src0[i];
- uchar t1 = src1[i];
-
- dst0[i] = t1;
- dst1[i] = t0;
- }
- }
-}
-
-#ifdef HAVE_OPENCL
-
-enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS };
-
-static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
-{
- CV_Assert(flipCode >= -1 && flipCode <= 1);
-
- const ocl::Device & dev = ocl::Device::getDefault();
- int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
- flipType, kercn = std::min(ocl::predictOptimalVectorWidth(_src, _dst), 4);
-
- bool doubleSupport = dev.doubleFPConfig() > 0;
- if (!doubleSupport && depth == CV_64F)
- kercn = cn;
-
- if (cn > 4)
- return false;
-
- const char * kernelName;
- if (flipCode == 0)
- kernelName = "arithm_flip_rows", flipType = FLIP_ROWS;
- else if (flipCode > 0)
- kernelName = "arithm_flip_cols", flipType = FLIP_COLS;
- else
- kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH;
-
- int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
- kercn = (cn!=3 || flipType == FLIP_ROWS) ? std::max(kercn, cn) : cn;
-
- ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
- format( "-D T=%s -D T1=%s -D DEPTH=%d -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d",
- kercn != cn ? ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)) : ocl::vecopTypeToStr(CV_MAKE_TYPE(depth, kercn)),
- kercn != cn ? ocl::typeToStr(depth) : ocl::vecopTypeToStr(depth), depth, cn, pxPerWIy, kercn));
- if (k.empty())
- return false;
-
- Size size = _src.size();
- _dst.create(size, type);
- UMat src = _src.getUMat(), dst = _dst.getUMat();
-
- int cols = size.width * cn / kercn, rows = size.height;
- cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols;
- rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows;
-
- k.args(ocl::KernelArg::ReadOnlyNoSize(src),
- ocl::KernelArg::WriteOnly(dst, cn, kercn), rows, cols);
-
- size_t maxWorkGroupSize = dev.maxWorkGroupSize();
- CV_Assert(maxWorkGroupSize % 4 == 0);
-
- size_t globalsize[2] = { (size_t)cols, ((size_t)rows + pxPerWIy - 1) / pxPerWIy },
- localsize[2] = { maxWorkGroupSize / 4, 4 };
- return k.run(2, globalsize, (flipType == FLIP_COLS) && !dev.isIntel() ? localsize : NULL, false);
-}
-
-#endif
-
-#if defined HAVE_IPP
-static bool ipp_flip(Mat &src, Mat &dst, int flip_mode)
-{
-#ifdef HAVE_IPP_IW
- CV_INSTRUMENT_REGION_IPP();
-
- // Details: https://github.com/opencv/opencv/issues/12943
- if (flip_mode <= 0 /* swap rows */
- && cv::ipp::getIppTopFeatures() != ippCPUID_SSE42
- && (int64_t)(src.total()) * src.elemSize() >= CV_BIG_INT(0x80000000)/*2Gb*/
- )
- return false;
-
- IppiAxis ippMode;
- if(flip_mode < 0)
- ippMode = ippAxsBoth;
- else if(flip_mode == 0)
- ippMode = ippAxsHorizontal;
- else
- ippMode = ippAxsVertical;
-
- try
- {
- ::ipp::IwiImage iwSrc = ippiGetImage(src);
- ::ipp::IwiImage iwDst = ippiGetImage(dst);
-
- CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, iwSrc, iwDst, ippMode);
- }
- catch(const ::ipp::IwException &)
- {
- return false;
- }
-
- return true;
-#else
- CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(flip_mode);
- return false;
-#endif
-}
-#endif
-
-
-void flip( InputArray _src, OutputArray _dst, int flip_mode )
-{
- CV_INSTRUMENT_REGION();
-
- CV_Assert( _src.dims() <= 2 );
- Size size = _src.size();
-
- if (flip_mode < 0)
- {
- if (size.width == 1)
- flip_mode = 0;
- if (size.height == 1)
- flip_mode = 1;
- }
-
- if ((size.width == 1 && flip_mode > 0) ||
- (size.height == 1 && flip_mode == 0))
- {
- return _src.copyTo(_dst);
- }
-
- CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src, _dst, flip_mode))
-
- Mat src = _src.getMat();
- int type = src.type();
- _dst.create( size, type );
- Mat dst = _dst.getMat();
-
- CV_IPP_RUN_FAST(ipp_flip(src, dst, flip_mode));
-
- size_t esz = CV_ELEM_SIZE(type);
-
- if( flip_mode <= 0 )
- flipVert( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz );
- else
- flipHoriz( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz );
-
- if( flip_mode < 0 )
- flipHoriz( dst.ptr(), dst.step, dst.ptr(), dst.step, dst.size(), esz );
-}
-
-void rotate(InputArray _src, OutputArray _dst, int rotateMode)
-{
- CV_Assert(_src.dims() <= 2);
-
- switch (rotateMode)
- {
- case ROTATE_90_CLOCKWISE:
- transpose(_src, _dst);
- flip(_dst, _dst, 1);
- break;
- case ROTATE_180:
- flip(_src, _dst, -1);
- break;
- case ROTATE_90_COUNTERCLOCKWISE:
- transpose(_src, _dst);
- flip(_dst, _dst, 0);
- break;
- default:
- break;
- }
-}
#if defined HAVE_OPENCL && !defined __APPLE__
}
}
+
+#ifndef OPENCV_EXCLUDE_C_API
+
/* dst = src */
CV_IMPL void
cvCopy( const void* srcarr, void* dstarr, const void* maskarr )
cv::repeat(src, dst.rows/src.rows, dst.cols/src.cols, dst);
}
+#endif // OPENCV_EXCLUDE_C_API
/* End of file. */
//M*/
#include "precomp.hpp"
+#ifndef OPENCV_EXCLUDE_C_API
+
/* default alignment for dynamic data strucutures, resided in storages. */
#define CV_STRUCT_ALIGN ((int)sizeof(double))
}
+#endif // OPENCV_EXCLUDE_C_API
/* End of file. */
return optimalDFTSizeTab[b];
}
+
+#ifndef OPENCV_EXCLUDE_C_API
+
CV_IMPL void
cvDFT( const CvArr* srcarr, CvArr* dstarr, int flags, int nonzero_rows )
{
return cv::getOptimalDFTSize(size0);
}
+#endif // OPENCV_EXCLUDE_C_API
/* End of file. */
(double*)alignPtr(buffer, sizeof(double)), DBL_EPSILON*2 );
}
-}
-
/****************************************************************************************\
* Determinant of the matrix *
\****************************************************************************************/
m(0,1)*((double)m(1,0)*m(2,2) - (double)m(1,2)*m(2,0)) + \
m(0,2)*((double)m(1,0)*m(2,1) - (double)m(1,1)*m(2,0)))
-double cv::determinant( InputArray _mat )
+double determinant( InputArray _mat )
{
CV_INSTRUMENT_REGION();
#define Df( y, x ) ((float*)(dstdata + y*dststep))[x]
#define Dd( y, x ) ((double*)(dstdata + y*dststep))[x]
-double cv::invert( InputArray _src, OutputArray _dst, int method )
+double invert( InputArray _src, OutputArray _dst, int method )
{
CV_INSTRUMENT_REGION();
return result;
}
+UMat UMat::inv(int method) const
+{
+ UMat m;
+ invert(*this, m, method);
+ return m;
+}
/****************************************************************************************\
* Solving a linear system *
\****************************************************************************************/
-bool cv::solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method )
+bool solve( InputArray _src, InputArray _src2arg, OutputArray _dst, int method )
{
CV_INSTRUMENT_REGION();
/////////////////// finding eigenvalues and eigenvectors of a symmetric matrix ///////////////
-bool cv::eigen( InputArray _src, OutputArray _evals, OutputArray _evects )
+bool eigen( InputArray _src, OutputArray _evals, OutputArray _evects )
{
CV_INSTRUMENT_REGION();
const bool evecNeeded = _evects.needed();
const int esOptions = evecNeeded ? Eigen::ComputeEigenvectors : Eigen::EigenvaluesOnly;
_evals.create(n, 1, type);
- cv::Mat evals = _evals.getMat();
+ Mat evals = _evals.getMat();
if ( type == CV_64F )
{
Eigen::MatrixXd src_eig, zeros_eig;
#endif
}
-namespace cv
-{
-
static void _SVDcompute( InputArray _aarr, OutputArray _w,
OutputArray _u, OutputArray _vt, int flags )
{
}
+
+#ifndef OPENCV_EXCLUDE_C_API
+
CV_IMPL double
cvDet( const CvArr* arr )
{
cv::SVD::backSubst(w, u, v, rhs, dst);
CV_Assert( dst.data == dst0.data );
}
+#endif // OPENCV_EXCLUDE_C_API
}
+
+#ifndef OPENCV_EXCLUDE_C_API
+
CV_IMPL float cvCbrt(float value) { return cv::cubeRoot(value); }
CV_IMPL float cvFastArctan(float y, float x) { return cv::fastAtan2(y, x); }
return cv::checkRange(cv::cvarrToMat(arr), (flags & CV_CHECK_QUIET) != 0, 0, minVal, maxVal );
}
+#endif // OPENCV_EXCLUDE_C_API
/*
Finds real roots of cubic, quadratic or linear equation.
}
+#ifndef OPENCV_EXCLUDE_C_API
+
CV_IMPL int
cvSolveCubic( const CvMat* coeffs, CvMat* roots )
{
CV_Assert( _r.data == _r0.data ); // check that the array of roots was not reallocated
}
+#endif // OPENCV_EXCLUDE_C_API
// Common constants for dispatched code
return r;
}
+
+#ifdef HAVE_OPENCL
+
+static bool ocl_dot( InputArray _src1, InputArray _src2, double & res )
+{
+ UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1);
+
+ int type = src1.type(), depth = CV_MAT_DEPTH(type),
+ kercn = ocl::predictOptimalVectorWidth(src1, src2);
+ bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
+
+ if ( !doubleSupport && depth == CV_64F )
+ return false;
+
+ int dbsize = ocl::Device::getDefault().maxComputeUnits();
+ size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
+ int ddepth = std::max(CV_32F, depth);
+
+ int wgs2_aligned = 1;
+ while (wgs2_aligned < (int)wgs)
+ wgs2_aligned <<= 1;
+ wgs2_aligned >>= 1;
+
+ char cvt[40];
+ ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
+ format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT "
+ "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d",
+ ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth),
+ ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
+ ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt),
+ (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
+ _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "",
+ _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn));
+ if (k.empty())
+ return false;
+
+ UMat db(1, dbsize, ddepth);
+
+ ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
+ src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
+ dbarg = ocl::KernelArg::PtrWriteOnly(db);
+
+ k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg);
+
+ size_t globalsize = dbsize * wgs;
+ if (k.run(1, &globalsize, &wgs, false))
+ {
+ res = sum(db.getMat(ACCESS_READ))[0];
+ return true;
+ }
+ return false;
+}
+
+#endif
+
+double UMat::dot(InputArray m) const
+{
+ CV_INSTRUMENT_REGION();
+
+ CV_Assert(m.sameSize(*this) && m.type() == type());
+
+#ifdef HAVE_OPENCL
+ double r = 0;
+ CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r)
+#endif
+
+ return getMat(ACCESS_READ).dot(m);
+}
+
} // namespace cv::
+
+#ifndef OPENCV_EXCLUDE_C_API
/****************************************************************************************\
* Earlier API *
\****************************************************************************************/
CV_Assert(dst0.data == dst.data);
}
+#endif // OPENCV_EXCLUDE_C_API
+
/* End of file. */
#include "opencv2/core/mat.hpp"
#include "opencv2/core/types_c.h"
+#ifndef OPENCV_EXCLUDE_C_API
// glue
CvMatND cvMatND(const cv::Mat& m)
}
}
-
CV_IMPL int
cvKMeans2( const CvArr* _samples, int cluster_count, CvArr* _labels,
CvTermCriteria termcrit, int attempts, CvRNG*,
*_compactness = compactness;
return 1;
}
+
+#endif // OPENCV_EXCLUDE_C_API
}
}
+
+namespace cv {
+
+UMat UMat::eye(int rows, int cols, int type)
+{
+ return UMat::eye(Size(cols, rows), type);
+}
+
+UMat UMat::eye(Size size, int type)
+{
+ UMat m(size, type);
+ setIdentity(m);
+ return m;
+}
+
+} // namespace
+
//////////////////////////////////////////// trace ///////////////////////////////////////////
cv::Scalar cv::trace( InputArray _m )
return cv::sum(m.diag());
}
-////////////////////////////////////// transpose /////////////////////////////////////////
-
-namespace cv
-{
-
-template<typename T> static void
-transpose_( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz )
-{
- int i=0, j, m = sz.width, n = sz.height;
-
- #if CV_ENABLE_UNROLLED
- for(; i <= m - 4; i += 4 )
- {
- T* d0 = (T*)(dst + dstep*i);
- T* d1 = (T*)(dst + dstep*(i+1));
- T* d2 = (T*)(dst + dstep*(i+2));
- T* d3 = (T*)(dst + dstep*(i+3));
-
- for( j = 0; j <= n - 4; j += 4 )
- {
- const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j);
- const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1));
- const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2));
- const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3));
-
- d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0];
- d1[j] = s0[1]; d1[j+1] = s1[1]; d1[j+2] = s2[1]; d1[j+3] = s3[1];
- d2[j] = s0[2]; d2[j+1] = s1[2]; d2[j+2] = s2[2]; d2[j+3] = s3[2];
- d3[j] = s0[3]; d3[j+1] = s1[3]; d3[j+2] = s2[3]; d3[j+3] = s3[3];
- }
-
- for( ; j < n; j++ )
- {
- const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep);
- d0[j] = s0[0]; d1[j] = s0[1]; d2[j] = s0[2]; d3[j] = s0[3];
- }
- }
- #endif
- for( ; i < m; i++ )
- {
- T* d0 = (T*)(dst + dstep*i);
- j = 0;
- #if CV_ENABLE_UNROLLED
- for(; j <= n - 4; j += 4 )
- {
- const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j);
- const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1));
- const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2));
- const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3));
-
- d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0];
- }
- #endif
- for( ; j < n; j++ )
- {
- const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep);
- d0[j] = s0[0];
- }
- }
-}
-
-template<typename T> static void
-transposeI_( uchar* data, size_t step, int n )
-{
- for( int i = 0; i < n; i++ )
- {
- T* row = (T*)(data + step*i);
- uchar* data1 = data + i*sizeof(T);
- for( int j = i+1; j < n; j++ )
- std::swap( row[j], *(T*)(data1 + step*j) );
- }
-}
-
-typedef void (*TransposeFunc)( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz );
-typedef void (*TransposeInplaceFunc)( uchar* data, size_t step, int n );
-
-#define DEF_TRANSPOSE_FUNC(suffix, type) \
-static void transpose_##suffix( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) \
-{ transpose_<type>(src, sstep, dst, dstep, sz); } \
-\
-static void transposeI_##suffix( uchar* data, size_t step, int n ) \
-{ transposeI_<type>(data, step, n); }
-
-DEF_TRANSPOSE_FUNC(8u, uchar)
-DEF_TRANSPOSE_FUNC(16u, ushort)
-DEF_TRANSPOSE_FUNC(8uC3, Vec3b)
-DEF_TRANSPOSE_FUNC(32s, int)
-DEF_TRANSPOSE_FUNC(16uC3, Vec3s)
-DEF_TRANSPOSE_FUNC(32sC2, Vec2i)
-DEF_TRANSPOSE_FUNC(32sC3, Vec3i)
-DEF_TRANSPOSE_FUNC(32sC4, Vec4i)
-DEF_TRANSPOSE_FUNC(32sC6, Vec6i)
-DEF_TRANSPOSE_FUNC(32sC8, Vec8i)
-
-static TransposeFunc transposeTab[] =
-{
- 0, transpose_8u, transpose_16u, transpose_8uC3, transpose_32s, 0, transpose_16uC3, 0,
- transpose_32sC2, 0, 0, 0, transpose_32sC3, 0, 0, 0, transpose_32sC4,
- 0, 0, 0, 0, 0, 0, 0, transpose_32sC6, 0, 0, 0, 0, 0, 0, 0, transpose_32sC8
-};
-
-static TransposeInplaceFunc transposeInplaceTab[] =
-{
- 0, transposeI_8u, transposeI_16u, transposeI_8uC3, transposeI_32s, 0, transposeI_16uC3, 0,
- transposeI_32sC2, 0, 0, 0, transposeI_32sC3, 0, 0, 0, transposeI_32sC4,
- 0, 0, 0, 0, 0, 0, 0, transposeI_32sC6, 0, 0, 0, 0, 0, 0, 0, transposeI_32sC8
-};
-
-#ifdef HAVE_OPENCL
-
-static bool ocl_transpose( InputArray _src, OutputArray _dst )
-{
- const ocl::Device & dev = ocl::Device::getDefault();
- const int TILE_DIM = 32, BLOCK_ROWS = 8;
- int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type),
- rowsPerWI = dev.isIntel() ? 4 : 1;
-
- UMat src = _src.getUMat();
- _dst.create(src.cols, src.rows, type);
- UMat dst = _dst.getUMat();
-
- String kernelName("transpose");
- bool inplace = dst.u == src.u;
-
- if (inplace)
- {
- CV_Assert(dst.cols == dst.rows);
- kernelName += "_inplace";
- }
- else
- {
- // check required local memory size
- size_t required_local_memory = (size_t) TILE_DIM*(TILE_DIM+1)*CV_ELEM_SIZE(type);
- if (required_local_memory > ocl::Device::getDefault().localMemSize())
- return false;
- }
-
- ocl::Kernel k(kernelName.c_str(), ocl::core::transpose_oclsrc,
- format("-D T=%s -D T1=%s -D cn=%d -D TILE_DIM=%d -D BLOCK_ROWS=%d -D rowsPerWI=%d%s",
- ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth),
- cn, TILE_DIM, BLOCK_ROWS, rowsPerWI, inplace ? " -D INPLACE" : ""));
- if (k.empty())
- return false;
-
- if (inplace)
- k.args(ocl::KernelArg::ReadWriteNoSize(dst), dst.rows);
- else
- k.args(ocl::KernelArg::ReadOnly(src),
- ocl::KernelArg::WriteOnlyNoSize(dst));
-
- size_t localsize[2] = { TILE_DIM, BLOCK_ROWS };
- size_t globalsize[2] = { (size_t)src.cols, inplace ? ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI : (divUp((size_t)src.rows, TILE_DIM) * BLOCK_ROWS) };
-
- if (inplace && dev.isIntel())
- {
- localsize[0] = 16;
- localsize[1] = dev.maxWorkGroupSize() / localsize[0];
- }
-
- return k.run(2, globalsize, localsize, false);
-}
-
-#endif
-
-#ifdef HAVE_IPP
-static bool ipp_transpose( Mat &src, Mat &dst )
-{
- CV_INSTRUMENT_REGION_IPP();
-
- int type = src.type();
- typedef IppStatus (CV_STDCALL * IppiTranspose)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize);
- typedef IppStatus (CV_STDCALL * IppiTransposeI)(const void * pSrcDst, int srcDstStep, IppiSize roiSize);
- IppiTranspose ippiTranspose = 0;
- IppiTransposeI ippiTranspose_I = 0;
-
- if (dst.data == src.data && dst.cols == dst.rows)
- {
- CV_SUPPRESS_DEPRECATED_START
- ippiTranspose_I =
- type == CV_8UC1 ? (IppiTransposeI)ippiTranspose_8u_C1IR :
- type == CV_8UC3 ? (IppiTransposeI)ippiTranspose_8u_C3IR :
- type == CV_8UC4 ? (IppiTransposeI)ippiTranspose_8u_C4IR :
- type == CV_16UC1 ? (IppiTransposeI)ippiTranspose_16u_C1IR :
- type == CV_16UC3 ? (IppiTransposeI)ippiTranspose_16u_C3IR :
- type == CV_16UC4 ? (IppiTransposeI)ippiTranspose_16u_C4IR :
- type == CV_16SC1 ? (IppiTransposeI)ippiTranspose_16s_C1IR :
- type == CV_16SC3 ? (IppiTransposeI)ippiTranspose_16s_C3IR :
- type == CV_16SC4 ? (IppiTransposeI)ippiTranspose_16s_C4IR :
- type == CV_32SC1 ? (IppiTransposeI)ippiTranspose_32s_C1IR :
- type == CV_32SC3 ? (IppiTransposeI)ippiTranspose_32s_C3IR :
- type == CV_32SC4 ? (IppiTransposeI)ippiTranspose_32s_C4IR :
- type == CV_32FC1 ? (IppiTransposeI)ippiTranspose_32f_C1IR :
- type == CV_32FC3 ? (IppiTransposeI)ippiTranspose_32f_C3IR :
- type == CV_32FC4 ? (IppiTransposeI)ippiTranspose_32f_C4IR : 0;
- CV_SUPPRESS_DEPRECATED_END
- }
- else
- {
- ippiTranspose =
- type == CV_8UC1 ? (IppiTranspose)ippiTranspose_8u_C1R :
- type == CV_8UC3 ? (IppiTranspose)ippiTranspose_8u_C3R :
- type == CV_8UC4 ? (IppiTranspose)ippiTranspose_8u_C4R :
- type == CV_16UC1 ? (IppiTranspose)ippiTranspose_16u_C1R :
- type == CV_16UC3 ? (IppiTranspose)ippiTranspose_16u_C3R :
- type == CV_16UC4 ? (IppiTranspose)ippiTranspose_16u_C4R :
- type == CV_16SC1 ? (IppiTranspose)ippiTranspose_16s_C1R :
- type == CV_16SC3 ? (IppiTranspose)ippiTranspose_16s_C3R :
- type == CV_16SC4 ? (IppiTranspose)ippiTranspose_16s_C4R :
- type == CV_32SC1 ? (IppiTranspose)ippiTranspose_32s_C1R :
- type == CV_32SC3 ? (IppiTranspose)ippiTranspose_32s_C3R :
- type == CV_32SC4 ? (IppiTranspose)ippiTranspose_32s_C4R :
- type == CV_32FC1 ? (IppiTranspose)ippiTranspose_32f_C1R :
- type == CV_32FC3 ? (IppiTranspose)ippiTranspose_32f_C3R :
- type == CV_32FC4 ? (IppiTranspose)ippiTranspose_32f_C4R : 0;
- }
-
- IppiSize roiSize = { src.cols, src.rows };
- if (ippiTranspose != 0)
- {
- if (CV_INSTRUMENT_FUN_IPP(ippiTranspose, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, roiSize) >= 0)
- return true;
- }
- else if (ippiTranspose_I != 0)
- {
- if (CV_INSTRUMENT_FUN_IPP(ippiTranspose_I, dst.ptr(), (int)dst.step, roiSize) >= 0)
- return true;
- }
- return false;
-}
-#endif
-
-}
-
-
-void cv::transpose( InputArray _src, OutputArray _dst )
-{
- CV_INSTRUMENT_REGION();
-
- int type = _src.type(), esz = CV_ELEM_SIZE(type);
- CV_Assert( _src.dims() <= 2 && esz <= 32 );
-
- CV_OCL_RUN(_dst.isUMat(),
- ocl_transpose(_src, _dst))
-
- Mat src = _src.getMat();
- if( src.empty() )
- {
- _dst.release();
- return;
- }
-
- _dst.create(src.cols, src.rows, src.type());
- Mat dst = _dst.getMat();
-
- // handle the case of single-column/single-row matrices, stored in STL vectors.
- if( src.rows != dst.cols || src.cols != dst.rows )
- {
- CV_Assert( src.size() == dst.size() && (src.cols == 1 || src.rows == 1) );
- src.copyTo(dst);
- return;
- }
-
- CV_IPP_RUN_FAST(ipp_transpose(src, dst))
-
- if( dst.data == src.data )
- {
- TransposeInplaceFunc func = transposeInplaceTab[esz];
- CV_Assert( func != 0 );
- CV_Assert( dst.cols == dst.rows );
- func( dst.ptr(), dst.step, dst.rows );
- }
- else
- {
- TransposeFunc func = transposeTab[esz];
- CV_Assert( func != 0 );
- func( src.ptr(), src.step, dst.ptr(), dst.step, src.size() );
- }
-}
-
////////////////////////////////////// completeSymm /////////////////////////////////////////
--- /dev/null
+// This file is part of OpenCV project.
+// It is subject to the license terms in the LICENSE file found in the top-level directory
+// of this distribution and at http://opencv.org/license.html
+
+#include "precomp.hpp"
+#include "opencl_kernels_core.hpp"
+
+namespace cv {
+
+////////////////////////////////////// transpose /////////////////////////////////////////
+
+template<typename T> static void
+transpose_( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz )
+{
+ int i=0, j, m = sz.width, n = sz.height;
+
+ #if CV_ENABLE_UNROLLED
+ for(; i <= m - 4; i += 4 )
+ {
+ T* d0 = (T*)(dst + dstep*i);
+ T* d1 = (T*)(dst + dstep*(i+1));
+ T* d2 = (T*)(dst + dstep*(i+2));
+ T* d3 = (T*)(dst + dstep*(i+3));
+
+ for( j = 0; j <= n - 4; j += 4 )
+ {
+ const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j);
+ const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1));
+ const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2));
+ const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3));
+
+ d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0];
+ d1[j] = s0[1]; d1[j+1] = s1[1]; d1[j+2] = s2[1]; d1[j+3] = s3[1];
+ d2[j] = s0[2]; d2[j+1] = s1[2]; d2[j+2] = s2[2]; d2[j+3] = s3[2];
+ d3[j] = s0[3]; d3[j+1] = s1[3]; d3[j+2] = s2[3]; d3[j+3] = s3[3];
+ }
+
+ for( ; j < n; j++ )
+ {
+ const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep);
+ d0[j] = s0[0]; d1[j] = s0[1]; d2[j] = s0[2]; d3[j] = s0[3];
+ }
+ }
+ #endif
+ for( ; i < m; i++ )
+ {
+ T* d0 = (T*)(dst + dstep*i);
+ j = 0;
+ #if CV_ENABLE_UNROLLED
+ for(; j <= n - 4; j += 4 )
+ {
+ const T* s0 = (const T*)(src + i*sizeof(T) + sstep*j);
+ const T* s1 = (const T*)(src + i*sizeof(T) + sstep*(j+1));
+ const T* s2 = (const T*)(src + i*sizeof(T) + sstep*(j+2));
+ const T* s3 = (const T*)(src + i*sizeof(T) + sstep*(j+3));
+
+ d0[j] = s0[0]; d0[j+1] = s1[0]; d0[j+2] = s2[0]; d0[j+3] = s3[0];
+ }
+ #endif
+ for( ; j < n; j++ )
+ {
+ const T* s0 = (const T*)(src + i*sizeof(T) + j*sstep);
+ d0[j] = s0[0];
+ }
+ }
+}
+
+template<typename T> static void
+transposeI_( uchar* data, size_t step, int n )
+{
+ for( int i = 0; i < n; i++ )
+ {
+ T* row = (T*)(data + step*i);
+ uchar* data1 = data + i*sizeof(T);
+ for( int j = i+1; j < n; j++ )
+ std::swap( row[j], *(T*)(data1 + step*j) );
+ }
+}
+
+typedef void (*TransposeFunc)( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz );
+typedef void (*TransposeInplaceFunc)( uchar* data, size_t step, int n );
+
+#define DEF_TRANSPOSE_FUNC(suffix, type) \
+static void transpose_##suffix( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size sz ) \
+{ transpose_<type>(src, sstep, dst, dstep, sz); } \
+\
+static void transposeI_##suffix( uchar* data, size_t step, int n ) \
+{ transposeI_<type>(data, step, n); }
+
+DEF_TRANSPOSE_FUNC(8u, uchar)
+DEF_TRANSPOSE_FUNC(16u, ushort)
+DEF_TRANSPOSE_FUNC(8uC3, Vec3b)
+DEF_TRANSPOSE_FUNC(32s, int)
+DEF_TRANSPOSE_FUNC(16uC3, Vec3s)
+DEF_TRANSPOSE_FUNC(32sC2, Vec2i)
+DEF_TRANSPOSE_FUNC(32sC3, Vec3i)
+DEF_TRANSPOSE_FUNC(32sC4, Vec4i)
+DEF_TRANSPOSE_FUNC(32sC6, Vec6i)
+DEF_TRANSPOSE_FUNC(32sC8, Vec8i)
+
+static TransposeFunc transposeTab[] =
+{
+ 0, transpose_8u, transpose_16u, transpose_8uC3, transpose_32s, 0, transpose_16uC3, 0,
+ transpose_32sC2, 0, 0, 0, transpose_32sC3, 0, 0, 0, transpose_32sC4,
+ 0, 0, 0, 0, 0, 0, 0, transpose_32sC6, 0, 0, 0, 0, 0, 0, 0, transpose_32sC8
+};
+
+static TransposeInplaceFunc transposeInplaceTab[] =
+{
+ 0, transposeI_8u, transposeI_16u, transposeI_8uC3, transposeI_32s, 0, transposeI_16uC3, 0,
+ transposeI_32sC2, 0, 0, 0, transposeI_32sC3, 0, 0, 0, transposeI_32sC4,
+ 0, 0, 0, 0, 0, 0, 0, transposeI_32sC6, 0, 0, 0, 0, 0, 0, 0, transposeI_32sC8
+};
+
+#ifdef HAVE_OPENCL
+
+static bool ocl_transpose( InputArray _src, OutputArray _dst )
+{
+ const ocl::Device & dev = ocl::Device::getDefault();
+ const int TILE_DIM = 32, BLOCK_ROWS = 8;
+ int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type),
+ rowsPerWI = dev.isIntel() ? 4 : 1;
+
+ UMat src = _src.getUMat();
+ _dst.create(src.cols, src.rows, type);
+ UMat dst = _dst.getUMat();
+
+ String kernelName("transpose");
+ bool inplace = dst.u == src.u;
+
+ if (inplace)
+ {
+ CV_Assert(dst.cols == dst.rows);
+ kernelName += "_inplace";
+ }
+ else
+ {
+ // check required local memory size
+ size_t required_local_memory = (size_t) TILE_DIM*(TILE_DIM+1)*CV_ELEM_SIZE(type);
+ if (required_local_memory > ocl::Device::getDefault().localMemSize())
+ return false;
+ }
+
+ ocl::Kernel k(kernelName.c_str(), ocl::core::transpose_oclsrc,
+ format("-D T=%s -D T1=%s -D cn=%d -D TILE_DIM=%d -D BLOCK_ROWS=%d -D rowsPerWI=%d%s",
+ ocl::memopTypeToStr(type), ocl::memopTypeToStr(depth),
+ cn, TILE_DIM, BLOCK_ROWS, rowsPerWI, inplace ? " -D INPLACE" : ""));
+ if (k.empty())
+ return false;
+
+ if (inplace)
+ k.args(ocl::KernelArg::ReadWriteNoSize(dst), dst.rows);
+ else
+ k.args(ocl::KernelArg::ReadOnly(src),
+ ocl::KernelArg::WriteOnlyNoSize(dst));
+
+ size_t localsize[2] = { TILE_DIM, BLOCK_ROWS };
+ size_t globalsize[2] = { (size_t)src.cols, inplace ? ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI : (divUp((size_t)src.rows, TILE_DIM) * BLOCK_ROWS) };
+
+ if (inplace && dev.isIntel())
+ {
+ localsize[0] = 16;
+ localsize[1] = dev.maxWorkGroupSize() / localsize[0];
+ }
+
+ return k.run(2, globalsize, localsize, false);
+}
+
+#endif
+
+#ifdef HAVE_IPP
+static bool ipp_transpose( Mat &src, Mat &dst )
+{
+ CV_INSTRUMENT_REGION_IPP();
+
+ int type = src.type();
+ typedef IppStatus (CV_STDCALL * IppiTranspose)(const void * pSrc, int srcStep, void * pDst, int dstStep, IppiSize roiSize);
+ typedef IppStatus (CV_STDCALL * IppiTransposeI)(const void * pSrcDst, int srcDstStep, IppiSize roiSize);
+ IppiTranspose ippiTranspose = 0;
+ IppiTransposeI ippiTranspose_I = 0;
+
+ if (dst.data == src.data && dst.cols == dst.rows)
+ {
+ CV_SUPPRESS_DEPRECATED_START
+ ippiTranspose_I =
+ type == CV_8UC1 ? (IppiTransposeI)ippiTranspose_8u_C1IR :
+ type == CV_8UC3 ? (IppiTransposeI)ippiTranspose_8u_C3IR :
+ type == CV_8UC4 ? (IppiTransposeI)ippiTranspose_8u_C4IR :
+ type == CV_16UC1 ? (IppiTransposeI)ippiTranspose_16u_C1IR :
+ type == CV_16UC3 ? (IppiTransposeI)ippiTranspose_16u_C3IR :
+ type == CV_16UC4 ? (IppiTransposeI)ippiTranspose_16u_C4IR :
+ type == CV_16SC1 ? (IppiTransposeI)ippiTranspose_16s_C1IR :
+ type == CV_16SC3 ? (IppiTransposeI)ippiTranspose_16s_C3IR :
+ type == CV_16SC4 ? (IppiTransposeI)ippiTranspose_16s_C4IR :
+ type == CV_32SC1 ? (IppiTransposeI)ippiTranspose_32s_C1IR :
+ type == CV_32SC3 ? (IppiTransposeI)ippiTranspose_32s_C3IR :
+ type == CV_32SC4 ? (IppiTransposeI)ippiTranspose_32s_C4IR :
+ type == CV_32FC1 ? (IppiTransposeI)ippiTranspose_32f_C1IR :
+ type == CV_32FC3 ? (IppiTransposeI)ippiTranspose_32f_C3IR :
+ type == CV_32FC4 ? (IppiTransposeI)ippiTranspose_32f_C4IR : 0;
+ CV_SUPPRESS_DEPRECATED_END
+ }
+ else
+ {
+ ippiTranspose =
+ type == CV_8UC1 ? (IppiTranspose)ippiTranspose_8u_C1R :
+ type == CV_8UC3 ? (IppiTranspose)ippiTranspose_8u_C3R :
+ type == CV_8UC4 ? (IppiTranspose)ippiTranspose_8u_C4R :
+ type == CV_16UC1 ? (IppiTranspose)ippiTranspose_16u_C1R :
+ type == CV_16UC3 ? (IppiTranspose)ippiTranspose_16u_C3R :
+ type == CV_16UC4 ? (IppiTranspose)ippiTranspose_16u_C4R :
+ type == CV_16SC1 ? (IppiTranspose)ippiTranspose_16s_C1R :
+ type == CV_16SC3 ? (IppiTranspose)ippiTranspose_16s_C3R :
+ type == CV_16SC4 ? (IppiTranspose)ippiTranspose_16s_C4R :
+ type == CV_32SC1 ? (IppiTranspose)ippiTranspose_32s_C1R :
+ type == CV_32SC3 ? (IppiTranspose)ippiTranspose_32s_C3R :
+ type == CV_32SC4 ? (IppiTranspose)ippiTranspose_32s_C4R :
+ type == CV_32FC1 ? (IppiTranspose)ippiTranspose_32f_C1R :
+ type == CV_32FC3 ? (IppiTranspose)ippiTranspose_32f_C3R :
+ type == CV_32FC4 ? (IppiTranspose)ippiTranspose_32f_C4R : 0;
+ }
+
+ IppiSize roiSize = { src.cols, src.rows };
+ if (ippiTranspose != 0)
+ {
+ if (CV_INSTRUMENT_FUN_IPP(ippiTranspose, src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, roiSize) >= 0)
+ return true;
+ }
+ else if (ippiTranspose_I != 0)
+ {
+ if (CV_INSTRUMENT_FUN_IPP(ippiTranspose_I, dst.ptr(), (int)dst.step, roiSize) >= 0)
+ return true;
+ }
+ return false;
+}
+#endif
+
+
+void transpose( InputArray _src, OutputArray _dst )
+{
+ CV_INSTRUMENT_REGION();
+
+ int type = _src.type(), esz = CV_ELEM_SIZE(type);
+ CV_Assert( _src.dims() <= 2 && esz <= 32 );
+
+ CV_OCL_RUN(_dst.isUMat(),
+ ocl_transpose(_src, _dst))
+
+ Mat src = _src.getMat();
+ if( src.empty() )
+ {
+ _dst.release();
+ return;
+ }
+
+ _dst.create(src.cols, src.rows, src.type());
+ Mat dst = _dst.getMat();
+
+ // handle the case of single-column/single-row matrices, stored in STL vectors.
+ if( src.rows != dst.cols || src.cols != dst.rows )
+ {
+ CV_Assert( src.size() == dst.size() && (src.cols == 1 || src.rows == 1) );
+ src.copyTo(dst);
+ return;
+ }
+
+ CV_IPP_RUN_FAST(ipp_transpose(src, dst))
+
+ if( dst.data == src.data )
+ {
+ TransposeInplaceFunc func = transposeInplaceTab[esz];
+ CV_Assert( func != 0 );
+ CV_Assert( dst.cols == dst.rows );
+ func( dst.ptr(), dst.step, dst.rows );
+ }
+ else
+ {
+ TransposeFunc func = transposeTab[esz];
+ CV_Assert( func != 0 );
+ func( src.ptr(), src.step, dst.ptr(), dst.step, src.size() );
+ }
+}
+
+
+#if CV_SIMD128
+template<typename V> CV_ALWAYS_INLINE void flipHoriz_single( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
+{
+ typedef typename V::lane_type T;
+ int end = (int)(size.width*esz);
+ int width = (end + 1)/2;
+ int width_1 = width & -v_uint8x16::nlanes;
+ int i, j;
+
+#if CV_STRONG_ALIGNMENT
+ CV_Assert(isAligned<sizeof(T)>(src, dst));
+#endif
+
+ for( ; size.height--; src += sstep, dst += dstep )
+ {
+ for( i = 0, j = end; i < width_1; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes )
+ {
+ V t0, t1;
+
+ t0 = v_load((T*)((uchar*)src + i));
+ t1 = v_load((T*)((uchar*)src + j - v_uint8x16::nlanes));
+ t0 = v_reverse(t0);
+ t1 = v_reverse(t1);
+ v_store((T*)(dst + j - v_uint8x16::nlanes), t0);
+ v_store((T*)(dst + i), t1);
+ }
+ if (isAligned<sizeof(T)>(src, dst))
+ {
+ for ( ; i < width; i += sizeof(T), j -= sizeof(T) )
+ {
+ T t0, t1;
+
+ t0 = *((T*)((uchar*)src + i));
+ t1 = *((T*)((uchar*)src + j - sizeof(T)));
+ *((T*)(dst + j - sizeof(T))) = t0;
+ *((T*)(dst + i)) = t1;
+ }
+ }
+ else
+ {
+ for ( ; i < width; i += sizeof(T), j -= sizeof(T) )
+ {
+ for (int k = 0; k < (int)sizeof(T); k++)
+ {
+ uchar t0, t1;
+
+ t0 = *((uchar*)src + i + k);
+ t1 = *((uchar*)src + j + k - sizeof(T));
+ *(dst + j + k - sizeof(T)) = t0;
+ *(dst + i + k) = t1;
+ }
+ }
+ }
+ }
+}
+
+template<typename T1, typename T2> CV_ALWAYS_INLINE void flipHoriz_double( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
+{
+ int end = (int)(size.width*esz);
+ int width = (end + 1)/2;
+
+#if CV_STRONG_ALIGNMENT
+ CV_Assert(isAligned<sizeof(T1)>(src, dst));
+ CV_Assert(isAligned<sizeof(T2)>(src, dst));
+#endif
+
+ for( ; size.height--; src += sstep, dst += dstep )
+ {
+ for ( int i = 0, j = end; i < width; i += sizeof(T1) + sizeof(T2), j -= sizeof(T1) + sizeof(T2) )
+ {
+ T1 t0, t1;
+ T2 t2, t3;
+
+ t0 = *((T1*)((uchar*)src + i));
+ t2 = *((T2*)((uchar*)src + i + sizeof(T1)));
+ t1 = *((T1*)((uchar*)src + j - sizeof(T1) - sizeof(T2)));
+ t3 = *((T2*)((uchar*)src + j - sizeof(T2)));
+ *((T1*)(dst + j - sizeof(T1) - sizeof(T2))) = t0;
+ *((T2*)(dst + j - sizeof(T2))) = t2;
+ *((T1*)(dst + i)) = t1;
+ *((T2*)(dst + i + sizeof(T1))) = t3;
+ }
+ }
+}
+#endif
+
+static void
+flipHoriz( const uchar* src, size_t sstep, uchar* dst, size_t dstep, Size size, size_t esz )
+{
+#if CV_SIMD
+#if CV_STRONG_ALIGNMENT
+ size_t alignmentMark = ((size_t)src)|((size_t)dst)|sstep|dstep;
+#endif
+ if (esz == 2 * v_uint8x16::nlanes)
+ {
+ int end = (int)(size.width*esz);
+ int width = end/2;
+
+ for( ; size.height--; src += sstep, dst += dstep )
+ {
+ for( int i = 0, j = end - 2 * v_uint8x16::nlanes; i < width; i += 2 * v_uint8x16::nlanes, j -= 2 * v_uint8x16::nlanes )
+ {
+#if CV_SIMD256
+ v_uint8x32 t0, t1;
+
+ t0 = v256_load((uchar*)src + i);
+ t1 = v256_load((uchar*)src + j);
+ v_store(dst + j, t0);
+ v_store(dst + i, t1);
+#else
+ v_uint8x16 t0, t1, t2, t3;
+
+ t0 = v_load((uchar*)src + i);
+ t1 = v_load((uchar*)src + i + v_uint8x16::nlanes);
+ t2 = v_load((uchar*)src + j);
+ t3 = v_load((uchar*)src + j + v_uint8x16::nlanes);
+ v_store(dst + j, t0);
+ v_store(dst + j + v_uint8x16::nlanes, t1);
+ v_store(dst + i, t2);
+ v_store(dst + i + v_uint8x16::nlanes, t3);
+#endif
+ }
+ }
+ }
+ else if (esz == v_uint8x16::nlanes)
+ {
+ int end = (int)(size.width*esz);
+ int width = end/2;
+
+ for( ; size.height--; src += sstep, dst += dstep )
+ {
+ for( int i = 0, j = end - v_uint8x16::nlanes; i < width; i += v_uint8x16::nlanes, j -= v_uint8x16::nlanes )
+ {
+ v_uint8x16 t0, t1;
+
+ t0 = v_load((uchar*)src + i);
+ t1 = v_load((uchar*)src + j);
+ v_store(dst + j, t0);
+ v_store(dst + i, t1);
+ }
+ }
+ }
+ else if (esz == 8
+#if CV_STRONG_ALIGNMENT
+ && isAligned<sizeof(uint64)>(alignmentMark)
+#endif
+ )
+ {
+ flipHoriz_single<v_uint64x2>(src, sstep, dst, dstep, size, esz);
+ }
+ else if (esz == 4
+#if CV_STRONG_ALIGNMENT
+ && isAligned<sizeof(unsigned)>(alignmentMark)
+#endif
+ )
+ {
+ flipHoriz_single<v_uint32x4>(src, sstep, dst, dstep, size, esz);
+ }
+ else if (esz == 2
+#if CV_STRONG_ALIGNMENT
+ && isAligned<sizeof(ushort)>(alignmentMark)
+#endif
+ )
+ {
+ flipHoriz_single<v_uint16x8>(src, sstep, dst, dstep, size, esz);
+ }
+ else if (esz == 1)
+ {
+ flipHoriz_single<v_uint8x16>(src, sstep, dst, dstep, size, esz);
+ }
+ else if (esz == 24
+#if CV_STRONG_ALIGNMENT
+ && isAligned<sizeof(uint64_t)>(alignmentMark)
+#endif
+ )
+ {
+ int end = (int)(size.width*esz);
+ int width = (end + 1)/2;
+
+ for( ; size.height--; src += sstep, dst += dstep )
+ {
+ for ( int i = 0, j = end; i < width; i += v_uint8x16::nlanes + sizeof(uint64_t), j -= v_uint8x16::nlanes + sizeof(uint64_t) )
+ {
+ v_uint8x16 t0, t1;
+ uint64_t t2, t3;
+
+ t0 = v_load((uchar*)src + i);
+ t2 = *((uint64_t*)((uchar*)src + i + v_uint8x16::nlanes));
+ t1 = v_load((uchar*)src + j - v_uint8x16::nlanes - sizeof(uint64_t));
+ t3 = *((uint64_t*)((uchar*)src + j - sizeof(uint64_t)));
+ v_store(dst + j - v_uint8x16::nlanes - sizeof(uint64_t), t0);
+ *((uint64_t*)(dst + j - sizeof(uint64_t))) = t2;
+ v_store(dst + i, t1);
+ *((uint64_t*)(dst + i + v_uint8x16::nlanes)) = t3;
+ }
+ }
+ }
+#if !CV_STRONG_ALIGNMENT
+ else if (esz == 12)
+ {
+ flipHoriz_double<uint64_t,uint>(src, sstep, dst, dstep, size, esz);
+ }
+ else if (esz == 6)
+ {
+ flipHoriz_double<uint,ushort>(src, sstep, dst, dstep, size, esz);
+ }
+ else if (esz == 3)
+ {
+ flipHoriz_double<ushort,uchar>(src, sstep, dst, dstep, size, esz);
+ }
+#endif
+ else
+#endif // CV_SIMD
+ {
+ int i, j, limit = (int)(((size.width + 1)/2)*esz);
+ AutoBuffer<int> _tab(size.width*esz);
+ int* tab = _tab.data();
+
+ for( i = 0; i < size.width; i++ )
+ for( size_t k = 0; k < esz; k++ )
+ tab[i*esz + k] = (int)((size.width - i - 1)*esz + k);
+
+ for( ; size.height--; src += sstep, dst += dstep )
+ {
+ for( i = 0; i < limit; i++ )
+ {
+ j = tab[i];
+ uchar t0 = src[i], t1 = src[j];
+ dst[i] = t1; dst[j] = t0;
+ }
+ }
+ }
+}
+
+static void
+flipVert( const uchar* src0, size_t sstep, uchar* dst0, size_t dstep, Size size, size_t esz )
+{
+ const uchar* src1 = src0 + (size.height - 1)*sstep;
+ uchar* dst1 = dst0 + (size.height - 1)*dstep;
+ size.width *= (int)esz;
+
+ for( int y = 0; y < (size.height + 1)/2; y++, src0 += sstep, src1 -= sstep,
+ dst0 += dstep, dst1 -= dstep )
+ {
+ int i = 0;
+#if CV_SIMD
+#if CV_STRONG_ALIGNMENT
+ if (isAligned<sizeof(int)>(src0, src1, dst0, dst1))
+#endif
+ {
+ for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH)
+ {
+ v_int32 t0 = vx_load((int*)(src0 + i));
+ v_int32 t1 = vx_load((int*)(src1 + i));
+ vx_store((int*)(dst0 + i), t1);
+ vx_store((int*)(dst1 + i), t0);
+ }
+ }
+#if CV_STRONG_ALIGNMENT
+ else
+ {
+ for (; i <= size.width - CV_SIMD_WIDTH; i += CV_SIMD_WIDTH)
+ {
+ v_uint8 t0 = vx_load(src0 + i);
+ v_uint8 t1 = vx_load(src1 + i);
+ vx_store(dst0 + i, t1);
+ vx_store(dst1 + i, t0);
+ }
+ }
+#endif
+#endif
+
+ if (isAligned<sizeof(int)>(src0, src1, dst0, dst1))
+ {
+ for( ; i <= size.width - 16; i += 16 )
+ {
+ int t0 = ((int*)(src0 + i))[0];
+ int t1 = ((int*)(src1 + i))[0];
+
+ ((int*)(dst0 + i))[0] = t1;
+ ((int*)(dst1 + i))[0] = t0;
+
+ t0 = ((int*)(src0 + i))[1];
+ t1 = ((int*)(src1 + i))[1];
+
+ ((int*)(dst0 + i))[1] = t1;
+ ((int*)(dst1 + i))[1] = t0;
+
+ t0 = ((int*)(src0 + i))[2];
+ t1 = ((int*)(src1 + i))[2];
+
+ ((int*)(dst0 + i))[2] = t1;
+ ((int*)(dst1 + i))[2] = t0;
+
+ t0 = ((int*)(src0 + i))[3];
+ t1 = ((int*)(src1 + i))[3];
+
+ ((int*)(dst0 + i))[3] = t1;
+ ((int*)(dst1 + i))[3] = t0;
+ }
+
+ for( ; i <= size.width - 4; i += 4 )
+ {
+ int t0 = ((int*)(src0 + i))[0];
+ int t1 = ((int*)(src1 + i))[0];
+
+ ((int*)(dst0 + i))[0] = t1;
+ ((int*)(dst1 + i))[0] = t0;
+ }
+ }
+
+ for( ; i < size.width; i++ )
+ {
+ uchar t0 = src0[i];
+ uchar t1 = src1[i];
+
+ dst0[i] = t1;
+ dst1[i] = t0;
+ }
+ }
+}
+
+#ifdef HAVE_OPENCL
+
+enum { FLIP_COLS = 1 << 0, FLIP_ROWS = 1 << 1, FLIP_BOTH = FLIP_ROWS | FLIP_COLS };
+
+static bool ocl_flip(InputArray _src, OutputArray _dst, int flipCode )
+{
+ CV_Assert(flipCode >= -1 && flipCode <= 1);
+
+ const ocl::Device & dev = ocl::Device::getDefault();
+ int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
+ flipType, kercn = std::min(ocl::predictOptimalVectorWidth(_src, _dst), 4);
+
+ bool doubleSupport = dev.doubleFPConfig() > 0;
+ if (!doubleSupport && depth == CV_64F)
+ kercn = cn;
+
+ if (cn > 4)
+ return false;
+
+ const char * kernelName;
+ if (flipCode == 0)
+ kernelName = "arithm_flip_rows", flipType = FLIP_ROWS;
+ else if (flipCode > 0)
+ kernelName = "arithm_flip_cols", flipType = FLIP_COLS;
+ else
+ kernelName = "arithm_flip_rows_cols", flipType = FLIP_BOTH;
+
+ int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
+ kercn = (cn!=3 || flipType == FLIP_ROWS) ? std::max(kercn, cn) : cn;
+
+ ocl::Kernel k(kernelName, ocl::core::flip_oclsrc,
+ format( "-D T=%s -D T1=%s -D DEPTH=%d -D cn=%d -D PIX_PER_WI_Y=%d -D kercn=%d",
+ kercn != cn ? ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)) : ocl::vecopTypeToStr(CV_MAKE_TYPE(depth, kercn)),
+ kercn != cn ? ocl::typeToStr(depth) : ocl::vecopTypeToStr(depth), depth, cn, pxPerWIy, kercn));
+ if (k.empty())
+ return false;
+
+ Size size = _src.size();
+ _dst.create(size, type);
+ UMat src = _src.getUMat(), dst = _dst.getUMat();
+
+ int cols = size.width * cn / kercn, rows = size.height;
+ cols = flipType == FLIP_COLS ? (cols + 1) >> 1 : cols;
+ rows = flipType & FLIP_ROWS ? (rows + 1) >> 1 : rows;
+
+ k.args(ocl::KernelArg::ReadOnlyNoSize(src),
+ ocl::KernelArg::WriteOnly(dst, cn, kercn), rows, cols);
+
+ size_t maxWorkGroupSize = dev.maxWorkGroupSize();
+ CV_Assert(maxWorkGroupSize % 4 == 0);
+
+ size_t globalsize[2] = { (size_t)cols, ((size_t)rows + pxPerWIy - 1) / pxPerWIy },
+ localsize[2] = { maxWorkGroupSize / 4, 4 };
+ return k.run(2, globalsize, (flipType == FLIP_COLS) && !dev.isIntel() ? localsize : NULL, false);
+}
+
+#endif
+
+#if defined HAVE_IPP
+static bool ipp_flip(Mat &src, Mat &dst, int flip_mode)
+{
+#ifdef HAVE_IPP_IW
+ CV_INSTRUMENT_REGION_IPP();
+
+ // Details: https://github.com/opencv/opencv/issues/12943
+ if (flip_mode <= 0 /* swap rows */
+ && cv::ipp::getIppTopFeatures() != ippCPUID_SSE42
+ && (int64_t)(src.total()) * src.elemSize() >= CV_BIG_INT(0x80000000)/*2Gb*/
+ )
+ return false;
+
+ IppiAxis ippMode;
+ if(flip_mode < 0)
+ ippMode = ippAxsBoth;
+ else if(flip_mode == 0)
+ ippMode = ippAxsHorizontal;
+ else
+ ippMode = ippAxsVertical;
+
+ try
+ {
+ ::ipp::IwiImage iwSrc = ippiGetImage(src);
+ ::ipp::IwiImage iwDst = ippiGetImage(dst);
+
+ CV_INSTRUMENT_FUN_IPP(::ipp::iwiMirror, iwSrc, iwDst, ippMode);
+ }
+ catch(const ::ipp::IwException &)
+ {
+ return false;
+ }
+
+ return true;
+#else
+ CV_UNUSED(src); CV_UNUSED(dst); CV_UNUSED(flip_mode);
+ return false;
+#endif
+}
+#endif
+
+
+void flip( InputArray _src, OutputArray _dst, int flip_mode )
+{
+ CV_INSTRUMENT_REGION();
+
+ CV_Assert( _src.dims() <= 2 );
+ Size size = _src.size();
+
+ if (flip_mode < 0)
+ {
+ if (size.width == 1)
+ flip_mode = 0;
+ if (size.height == 1)
+ flip_mode = 1;
+ }
+
+ if ((size.width == 1 && flip_mode > 0) ||
+ (size.height == 1 && flip_mode == 0))
+ {
+ return _src.copyTo(_dst);
+ }
+
+ CV_OCL_RUN( _dst.isUMat(), ocl_flip(_src, _dst, flip_mode))
+
+ Mat src = _src.getMat();
+ int type = src.type();
+ _dst.create( size, type );
+ Mat dst = _dst.getMat();
+
+ CV_IPP_RUN_FAST(ipp_flip(src, dst, flip_mode));
+
+ size_t esz = CV_ELEM_SIZE(type);
+
+ if( flip_mode <= 0 )
+ flipVert( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz );
+ else
+ flipHoriz( src.ptr(), src.step, dst.ptr(), dst.step, src.size(), esz );
+
+ if( flip_mode < 0 )
+ flipHoriz( dst.ptr(), dst.step, dst.ptr(), dst.step, dst.size(), esz );
+}
+
+void rotate(InputArray _src, OutputArray _dst, int rotateMode)
+{
+ CV_Assert(_src.dims() <= 2);
+
+ switch (rotateMode)
+ {
+ case ROTATE_90_CLOCKWISE:
+ transpose(_src, _dst);
+ flip(_dst, _dst, 1);
+ break;
+ case ROTATE_180:
+ flip(_src, _dst, -1);
+ break;
+ case ROTATE_90_COUNTERCLOCKWISE:
+ transpose(_src, _dst);
+ flip(_dst, _dst, 0);
+ break;
+ default:
+ break;
+ }
+}
+
+} // namespace
cuda::GpuMat _InputArray::getGpuMat() const
{
+#ifdef HAVE_CUDA
int k = kind();
if (k == CUDA_GPU_MAT)
return cuda::GpuMat();
CV_Error(cv::Error::StsNotImplemented, "getGpuMat is available only for cuda::GpuMat and cuda::HostMem");
+#else
+ CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
+#endif
}
void _InputArray::getGpuMatVector(std::vector<cuda::GpuMat>& gpumv) const
{
+#ifdef HAVE_CUDA
int k = kind();
if (k == STD_VECTOR_CUDA_GPU_MAT)
{
gpumv = *(std::vector<cuda::GpuMat>*)obj;
}
+#else
+ CV_UNUSED(gpumv);
+ CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
+#endif
}
ogl::Buffer _InputArray::getOGlBuffer() const
{
if (k == STD_VECTOR_CUDA_GPU_MAT)
{
+#ifdef HAVE_CUDA
const std::vector<cuda::GpuMat>& vv = *(const std::vector<cuda::GpuMat>*)obj;
if (i < 0)
return vv.empty() ? Size() : Size((int)vv.size(), 1);
CV_Assert(i < (int)vv.size());
return vv[i].size();
+#else
+ CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
+#endif
}
if( k == STD_VECTOR_UMAT )
if (k == STD_VECTOR_CUDA_GPU_MAT)
{
+#ifdef HAVE_CUDA
const std::vector<cuda::GpuMat>& vv = *(const std::vector<cuda::GpuMat>*)obj;
if (vv.empty())
{
}
CV_Assert(i < (int)vv.size());
return vv[i >= 0 ? i : 0].type();
+#else
+ CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
+#endif
}
if( k == OPENGL_BUFFER )
{
CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == _sz);
CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype);
+#ifdef HAVE_CUDA
((cuda::GpuMat*)obj)->create(_sz, mtype);
return;
+#else
+ CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
+#endif
}
if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == _sz);
CV_Assert(!fixedType() || ((ogl::Buffer*)obj)->type() == mtype);
+#ifdef HAVE_OPENGL
((ogl::Buffer*)obj)->create(_sz, mtype);
return;
+#else
+ CV_Error(Error::StsNotImplemented, "OpenGL support is not enabled in this OpenCV build (missing HAVE_OPENGL)");
+#endif
}
if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == _sz);
CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype);
+#ifdef HAVE_CUDA
((cuda::HostMem*)obj)->create(_sz, mtype);
return;
+#else
+ CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
+#endif
}
int sizes[] = {_sz.height, _sz.width};
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
{
CV_Assert(!fixedSize() || ((cuda::GpuMat*)obj)->size() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((cuda::GpuMat*)obj)->type() == mtype);
+#ifdef HAVE_CUDA
((cuda::GpuMat*)obj)->create(_rows, _cols, mtype);
return;
+#else
+ CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
+#endif
}
if( k == OPENGL_BUFFER && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((ogl::Buffer*)obj)->size() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((ogl::Buffer*)obj)->type() == mtype);
+#ifdef HAVE_OPENGL
((ogl::Buffer*)obj)->create(_rows, _cols, mtype);
return;
+#else
+ CV_Error(Error::StsNotImplemented, "OpenGL support is not enabled in this OpenCV build (missing HAVE_OPENGL)");
+#endif
}
if( k == CUDA_HOST_MEM && i < 0 && !allowTransposed && fixedDepthMask == 0 )
{
CV_Assert(!fixedSize() || ((cuda::HostMem*)obj)->size() == Size(_cols, _rows));
CV_Assert(!fixedType() || ((cuda::HostMem*)obj)->type() == mtype);
+#ifdef HAVE_CUDA
((cuda::HostMem*)obj)->create(_rows, _cols, mtype);
return;
+#else
+ CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
+#endif
}
int sizes[] = {_rows, _cols};
create(2, sizes, mtype, i, allowTransposed, fixedDepthMask);
if( k == CUDA_GPU_MAT )
{
+#ifdef HAVE_CUDA
((cuda::GpuMat*)obj)->release();
return;
+#else
+ CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
+#endif
}
if( k == CUDA_HOST_MEM )
{
+#ifdef HAVE_CUDA
((cuda::HostMem*)obj)->release();
return;
+#else
+ CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
+#endif
}
if( k == OPENGL_BUFFER )
{
+#ifdef HAVE_OPENGL
((ogl::Buffer*)obj)->release();
return;
+#else
+ CV_Error(Error::StsNotImplemented, "OpenGL support is not enabled in this OpenCV build (missing HAVE_OPENGL)");
+#endif
}
if( k == NONE )
}
if (k == STD_VECTOR_CUDA_GPU_MAT)
{
+#ifdef HAVE_CUDA
((std::vector<cuda::GpuMat>*)obj)->clear();
return;
+#else
+ CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
+#endif
}
CV_Error(Error::StsNotImplemented, "Unknown/unsupported array type");
}
((UMat*)obj)->setTo(arr, mask);
else if( k == CUDA_GPU_MAT )
{
+#ifdef HAVE_CUDA
Mat value = arr.getMat();
CV_Assert( checkScalar(value, type(), arr.kind(), _InputArray::CUDA_GPU_MAT) );
((cuda::GpuMat*)obj)->setTo(Scalar(Vec<double, 4>(value.ptr<double>())), mask);
+#else
+ CV_Error(Error::StsNotImplemented, "CUDA support is not enabled in this OpenCV build (missing HAVE_CUDA)");
+#endif
}
else
CV_Error(Error::StsNotImplemented, "");
return d;
}
-}} //cv::hal
+} //cv::hal
//==================================================================================================
-namespace cv
-{
-
template<typename T, typename ST> int
normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn)
{
CV_UNUSED(src); CV_UNUSED(normType); CV_UNUSED(mask); CV_UNUSED(result);
#endif
return false;
-}
-#endif
-
-} // cv::
+} // ipp_norm()
+#endif // HAVE_IPP
-double cv::norm( InputArray _src, int normType, InputArray _mask )
+double norm( InputArray _src, int normType, InputArray _mask )
{
CV_INSTRUMENT_REGION();
//==================================================================================================
#ifdef HAVE_OPENCL
-
-namespace cv {
-
static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result )
{
#ifdef __ANDROID__
result /= (s2 + DBL_EPSILON);
return true;
-}
-
-}
-
-#endif
+} // ocl_norm()
+#endif // HAVE_OPENCL
#ifdef HAVE_IPP
-namespace cv
-{
static bool ipp_norm(InputArray _src1, InputArray _src2, int normType, InputArray _mask, double &result)
{
CV_INSTRUMENT_REGION_IPP();
CV_UNUSED(_src1); CV_UNUSED(_src2); CV_UNUSED(normType); CV_UNUSED(_mask); CV_UNUSED(result);
#endif
return false;
-}
-}
-#endif
+} // ipp_norm
+#endif // HAVE_IPP
-double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
+double norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask )
{
CV_INSTRUMENT_REGION();
return result.d;
}
-cv::Hamming::ResultType cv::Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const
+cv::Hamming::ResultType Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const
{
return cv::hal::normHamming(a, b, size);
}
-double cv::PSNR(InputArray _src1, InputArray _src2)
+double PSNR(InputArray _src1, InputArray _src2)
{
CV_INSTRUMENT_REGION();
double diff = std::sqrt(norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels()));
return 20*log10(255./(diff+DBL_EPSILON));
}
+
+
+#ifdef HAVE_OPENCL
+static bool ocl_normalize( InputArray _src, InputOutputArray _dst, InputArray _mask, int dtype,
+ double scale, double delta )
+{
+ UMat src = _src.getUMat();
+
+ if( _mask.empty() )
+ src.convertTo( _dst, dtype, scale, delta );
+ else if (src.channels() <= 4)
+ {
+ const ocl::Device & dev = ocl::Device::getDefault();
+
+ int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype),
+ ddepth = CV_MAT_DEPTH(dtype), wdepth = std::max(CV_32F, std::max(sdepth, ddepth)),
+ rowsPerWI = dev.isIntel() ? 4 : 1;
+
+ float fscale = static_cast<float>(scale), fdelta = static_cast<float>(delta);
+ bool haveScale = std::fabs(scale - 1) > DBL_EPSILON,
+ haveZeroScale = !(std::fabs(scale) > DBL_EPSILON),
+ haveDelta = std::fabs(delta) > DBL_EPSILON,
+ doubleSupport = dev.doubleFPConfig() > 0;
+
+ if (!haveScale && !haveDelta && stype == dtype)
+ {
+ _src.copyTo(_dst, _mask);
+ return true;
+ }
+ if (haveZeroScale)
+ {
+ _dst.setTo(Scalar(delta), _mask);
+ return true;
+ }
+
+ if ((sdepth == CV_64F || ddepth == CV_64F) && !doubleSupport)
+ return false;
+
+ char cvt[2][40];
+ String opts = format("-D srcT=%s -D dstT=%s -D convertToWT=%s -D cn=%d -D rowsPerWI=%d"
+ " -D convertToDT=%s -D workT=%s%s%s%s -D srcT1=%s -D dstT1=%s",
+ ocl::typeToStr(stype), ocl::typeToStr(dtype),
+ ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), cn,
+ rowsPerWI, ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]),
+ ocl::typeToStr(CV_MAKE_TYPE(wdepth, cn)),
+ doubleSupport ? " -D DOUBLE_SUPPORT" : "",
+ haveScale ? " -D HAVE_SCALE" : "",
+ haveDelta ? " -D HAVE_DELTA" : "",
+ ocl::typeToStr(sdepth), ocl::typeToStr(ddepth));
+
+ ocl::Kernel k("normalizek", ocl::core::normalize_oclsrc, opts);
+ if (k.empty())
+ return false;
+
+ UMat mask = _mask.getUMat(), dst = _dst.getUMat();
+
+ ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
+ maskarg = ocl::KernelArg::ReadOnlyNoSize(mask),
+ dstarg = ocl::KernelArg::ReadWrite(dst);
+
+ if (haveScale)
+ {
+ if (haveDelta)
+ k.args(srcarg, maskarg, dstarg, fscale, fdelta);
+ else
+ k.args(srcarg, maskarg, dstarg, fscale);
+ }
+ else
+ {
+ if (haveDelta)
+ k.args(srcarg, maskarg, dstarg, fdelta);
+ else
+ k.args(srcarg, maskarg, dstarg);
+ }
+
+ size_t globalsize[2] = { (size_t)src.cols, ((size_t)src.rows + rowsPerWI - 1) / rowsPerWI };
+ return k.run(2, globalsize, NULL, false);
+ }
+ else
+ {
+ UMat temp;
+ src.convertTo( temp, dtype, scale, delta );
+ temp.copyTo( _dst, _mask );
+ }
+
+ return true;
+} // ocl_normalize
+#endif // HAVE_OPENCL
+
+void normalize(InputArray _src, InputOutputArray _dst, double a, double b,
+ int norm_type, int rtype, InputArray _mask)
+{
+ CV_INSTRUMENT_REGION();
+
+ double scale = 1, shift = 0;
+ int type = _src.type(), depth = CV_MAT_DEPTH(type);
+
+ if( rtype < 0 )
+ rtype = _dst.fixedType() ? _dst.depth() : depth;
+
+ if( norm_type == CV_MINMAX )
+ {
+ double smin = 0, smax = 0;
+ double dmin = MIN( a, b ), dmax = MAX( a, b );
+ minMaxIdx( _src, &smin, &smax, 0, 0, _mask );
+ scale = (dmax - dmin)*(smax - smin > DBL_EPSILON ? 1./(smax - smin) : 0);
+ if( rtype == CV_32F )
+ {
+ scale = (float)scale;
+ shift = (float)dmin - (float)(smin*scale);
+ }
+ else
+ shift = dmin - smin*scale;
+ }
+ else if( norm_type == CV_L2 || norm_type == CV_L1 || norm_type == CV_C )
+ {
+ scale = norm( _src, norm_type, _mask );
+ scale = scale > DBL_EPSILON ? a/scale : 0.;
+ shift = 0;
+ }
+ else
+ CV_Error( CV_StsBadArg, "Unknown/unsupported norm type" );
+
+ CV_OCL_RUN(_dst.isUMat(),
+ ocl_normalize(_src, _dst, _mask, rtype, scale, shift))
+
+ Mat src = _src.getMat();
+ if( _mask.empty() )
+ src.convertTo( _dst, rtype, scale, shift );
+ else
+ {
+ Mat temp;
+ src.convertTo( temp, rtype, scale, shift );
+ temp.copyTo( _dst, _mask );
+ }
+}
+
+} // namespace
}
-/* universal functions */
-CV_IMPL void
-cvRelease( void** struct_ptr )
-{
- CvTypeInfo* info;
-
- if( !struct_ptr )
- CV_Error( CV_StsNullPtr, "NULL double pointer" );
-
- if( *struct_ptr )
- {
- info = cvTypeOf( *struct_ptr );
- if( !info )
- CV_Error( CV_StsError, "Unknown object type" );
- if( !info->release )
- CV_Error( CV_StsError, "release function pointer is NULL" );
-
- info->release( struct_ptr );
- *struct_ptr = 0;
- }
-}
-
-
-void* cvClone( const void* struct_ptr )
-{
- void* struct_copy = 0;
- CvTypeInfo* info;
-
- if( !struct_ptr )
- CV_Error( CV_StsNullPtr, "NULL structure pointer" );
-
- info = cvTypeOf( struct_ptr );
- if( !info )
- CV_Error( CV_StsError, "Unknown object type" );
- if( !info->clone )
- CV_Error( CV_StsError, "clone function pointer is NULL" );
-
- struct_copy = info->clone( struct_ptr );
- return struct_copy;
-}
-
-
/* reads matrix, image, sequence, graph etc. */
CV_IMPL void*
cvRead( CvFileStorage* fs, CvFileNode* node, CvAttrList* list )
func( dst, rng, iterFactor );
}
+
+#ifndef OPENCV_EXCLUDE_C_API
+
CV_IMPL void
cvRandArr( CvRNG* _rng, CvArr* arr, int disttype, CvScalar param1, CvScalar param2 )
{
cv::randShuffle( dst, iter_factor, &rng );
}
+#endif // OPENCV_EXCLUDE_C_API
+
+
// Mersenne Twister random number generator.
// Inspired by http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/mt19937ar.c
#include "precomp.hpp"
+#ifndef OPENCV_EXCLUDE_C_API
+
CV_IMPL CvScalar cvSum( const CvArr* srcarr )
{
cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1));
return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask);
}
+
+#endif // OPENCV_EXCLUDE_C_API
return m;
}
-UMat UMat::inv(int method) const
-{
- UMat m;
- invert(*this, m, method);
- return m;
-}
-
-UMat UMat::mul(InputArray m, double scale) const
-{
- UMat dst;
- multiply(*this, m, dst, scale);
- return dst;
-}
-
-#ifdef HAVE_OPENCL
-
-static bool ocl_dot( InputArray _src1, InputArray _src2, double & res )
-{
- UMat src1 = _src1.getUMat().reshape(1), src2 = _src2.getUMat().reshape(1);
-
- int type = src1.type(), depth = CV_MAT_DEPTH(type),
- kercn = ocl::predictOptimalVectorWidth(src1, src2);
- bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
-
- if ( !doubleSupport && depth == CV_64F )
- return false;
-
- int dbsize = ocl::Device::getDefault().maxComputeUnits();
- size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
- int ddepth = std::max(CV_32F, depth);
-
- int wgs2_aligned = 1;
- while (wgs2_aligned < (int)wgs)
- wgs2_aligned <<= 1;
- wgs2_aligned >>= 1;
-
- char cvt[40];
- ocl::Kernel k("reduce", ocl::core::reduce_oclsrc,
- format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D ddepth=%d -D convertToDT=%s -D OP_DOT "
- "-D WGS=%d -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d",
- ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), ocl::typeToStr(depth),
- ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)),
- ddepth, ocl::convertTypeStr(depth, ddepth, kercn, cvt),
- (int)wgs, wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "",
- _src1.isContinuous() ? " -D HAVE_SRC_CONT" : "",
- _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", kercn));
- if (k.empty())
- return false;
-
- UMat db(1, dbsize, ddepth);
-
- ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
- src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
- dbarg = ocl::KernelArg::PtrWriteOnly(db);
-
- k.args(src1arg, src1.cols, (int)src1.total(), dbsize, dbarg, src2arg);
-
- size_t globalsize = dbsize * wgs;
- if (k.run(1, &globalsize, &wgs, false))
- {
- res = sum(db.getMat(ACCESS_READ))[0];
- return true;
- }
- return false;
-}
-
-#endif
-
-double UMat::dot(InputArray m) const
-{
- CV_INSTRUMENT_REGION();
-
- CV_Assert(m.sameSize(*this) && m.type() == type());
-
-#ifdef HAVE_OPENCL
- double r = 0;
- CV_OCL_RUN_(dims <= 2, ocl_dot(*this, m, r), r)
-#endif
-
- return getMat(ACCESS_READ).dot(m);
-}
-
UMat UMat::zeros(int rows, int cols, int type)
{
return UMat(rows, cols, type, Scalar::all(0));
return UMat(ndims, sz, type, Scalar(1));
}
-UMat UMat::eye(int rows, int cols, int type)
-{
- return UMat::eye(Size(cols, rows), type);
-}
-
-UMat UMat::eye(Size size, int type)
-{
- UMat m(size, type);
- setIdentity(m);
- return m;
-}
-
}
/* End of file. */