# pragma warning(disable: 4748)
#endif
-#if defined HAVE_IPP && IPP_VERSION_MAJOR >= 7
+#if defined HAVE_IPP && IPP_VERSION_MAJOR*100 + IPP_VERSION_MINOR >= 701
#define USE_IPP_DFT 1
#else
#undef USE_IPP_DFT
int depth = src.depth(), cn = src.channels();
normType &= 7;
- CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
+ CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
+ normType == NORM_L2 || normType == NORM_L2SQR ||
((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src.type() == CV_8U) );
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
size_t total_size = src.total();
int rows = src.size[0], cols = (int)(total_size/rows);
- if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size)
+ if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous()))
+ && cols > 0 && (size_t)rows*cols == total_size
&& (normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) )
{
IppiSize sz = { cols, rows };
((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
size_t total_size = src1.total();
int rows = src1.size[0], cols = (int)(total_size/rows);
- if( src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size)
- && (normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) )
+ if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
+ && cols > 0 && (size_t)rows*cols == total_size
+ && (normType == NORM_INF || normType == NORM_L1 ||
+ normType == NORM_L2 || normType == NORM_L2SQR) )
{
IppiSize sz = { cols, rows };
int type = src1.type();
CV_Assert( src1.size == src2.size && src1.type() == src2.type() );
normType &= 7;
- CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR ||
+ CV_Assert( normType == NORM_INF || normType == NORM_L1 ||
+ normType == NORM_L2 || normType == NORM_L2SQR ||
((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) );
#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
size_t total_size = src1.total();
int rows = src1.size[0], cols = (int)(total_size/rows);
- if( src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size)
+ if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous()))
+ && cols > 0 && (size_t)rows*cols == total_size
&& (normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) )
{
IppiSize sz = { cols, rows };
Base Image Filter
\****************************************************************************************/
+#if defined HAVE_IPP && IPP_VERSION_MAJOR*100 + IPP_VERSION_MINOR >= 701
+#define USE_IPP_SEP_FILTERS 1
+#else
+#undef USE_IPP_SEP_FILTERS
+#endif
+
/*
Various border types, image boundaries are denoted with '|'
RowVec_32f( const Mat& _kernel )
{
kernel = _kernel;
+ haveSSE = checkHardwareSupport(CV_CPU_SSE);
+#ifdef USE_IPP_SEP_FILTERS
+ bufsz = -1;
+#endif
}
int operator()(const uchar* _src, uchar* _dst, int width, int cn) const
{
- if( !checkHardwareSupport(CV_CPU_SSE) )
- return 0;
-
- int i = 0, k, _ksize = kernel.rows + kernel.cols - 1;
+ int _ksize = kernel.rows + kernel.cols - 1;
+ const float* src0 = (const float*)_src;
float* dst = (float*)_dst;
const float* _kx = (const float*)kernel.data;
+
+#ifdef USE_IPP_SEP_FILTERS
+ IppiSize roisz = { width, 1 };
+ if( (cn == 1 || cn == 3) && width >= _ksize*8 )
+ {
+ if( bufsz < 0 )
+ {
+ if( (cn == 1 && ippiFilterRowBorderPipelineGetBufferSize_32f_C1R(roisz, _ksize, &bufsz) < 0) ||
+ (cn == 3 && ippiFilterRowBorderPipelineGetBufferSize_32f_C3R(roisz, _ksize, &bufsz) < 0))
+ return 0;
+ }
+ AutoBuffer<uchar> buf(bufsz + 64);
+ uchar* bufptr = alignPtr((uchar*)buf, 32);
+ int step = (int)(width*sizeof(dst[0])*cn);
+ float borderValue[] = {0.f, 0.f, 0.f};
+ // here is the trick. IPP needs border type and extrapolates the row. We did it already.
+ // So we pass anchor=0 and ignore the right tail of results since they are incorrect there.
+ if( (cn == 1 && ippiFilterRowBorderPipeline_32f_C1R(src0, step, &dst, roisz, _kx, _ksize, 0,
+ ippBorderRepl, borderValue[0], bufptr) < 0) ||
+ (cn == 3 && ippiFilterRowBorderPipeline_32f_C3R(src0, step, &dst, roisz, _kx, _ksize, 0,
+ ippBorderRepl, borderValue, bufptr) < 0))
+ return 0;
+ return width - _ksize + 1;
+ }
+#endif
+
+ if( !haveSSE )
+ return 0;
+
+ int i = 0, k;
width *= cn;
for( ; i <= width - 8; i += 8 )
{
- const float* src = (const float*)_src + i;
+ const float* src = src0 + i;
__m128 f, s0 = _mm_setzero_ps(), s1 = s0, x0, x1;
for( k = 0; k < _ksize; k++, src += cn )
{
}
Mat kernel;
+ bool haveSSE;
+#ifdef USE_IPP_SEP_FILTERS
+ mutable int bufsz;
+#endif
};
IppiRect dstroi = { 0, dsty, dstwidth, dstheight - dsty };
int bufsize;
ippiResizeGetBufSize( srcroi, dstroi, cn, mode, &bufsize );
- Ipp8u *buf;
- buf = ippsMalloc_8u( bufsize );
- IppStatus sts;
- if( func( src.data, ippiSize(src.cols, src.rows), (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, inv_scale_x, inv_scale_y, 0, 0, mode, buf ) < 0 )
+ AutoBuffer<uchar> buf(bufsize + 64);
+ uchar* bufptr = alignPtr((uchar*)buf, 32);
+ if( func( src.data, ippiSize(src.cols, src.rows), (int)src.step[0], srcroi, dst.data, (int)dst.step[0], dstroi, inv_scale_x, inv_scale_y, 0, 0, mode, bufptr ) < 0 )
*ok = false;
- ippsFree(buf);
}
private:
Mat &src;