1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "precomp.hpp"
44 #include "opencl_kernels_imgproc.hpp"
49 static void calcMinEigenVal( const Mat& _cov, Mat& _dst )
52 Size size = _cov.size();
54 volatile bool simd = checkHardwareSupport(CV_CPU_SSE);
57 if( _cov.isContinuous() && _dst.isContinuous() )
59 size.width *= size.height;
63 for( i = 0; i < size.height; i++ )
65 const float* cov = _cov.ptr<float>(i);
66 float* dst = _dst.ptr<float>(i);
71 __m128 half = _mm_set1_ps(0.5f);
72 for( ; j <= size.width - 5; j += 4 )
74 __m128 t0 = _mm_loadu_ps(cov + j*3); // a0 b0 c0 x
75 __m128 t1 = _mm_loadu_ps(cov + j*3 + 3); // a1 b1 c1 x
76 __m128 t2 = _mm_loadu_ps(cov + j*3 + 6); // a2 b2 c2 x
77 __m128 t3 = _mm_loadu_ps(cov + j*3 + 9); // a3 b3 c3 x
79 t = _mm_unpacklo_ps(t0, t1); // a0 a1 b0 b1
80 c = _mm_unpackhi_ps(t0, t1); // c0 c1 x x
81 b = _mm_unpacklo_ps(t2, t3); // a2 a3 b2 b3
82 c = _mm_movelh_ps(c, _mm_unpackhi_ps(t2, t3)); // c0 c1 c2 c3
83 a = _mm_movelh_ps(t, b);
84 b = _mm_movehl_ps(b, t);
85 a = _mm_mul_ps(a, half);
86 c = _mm_mul_ps(c, half);
88 t = _mm_add_ps(_mm_mul_ps(t, t), _mm_mul_ps(b,b));
89 a = _mm_sub_ps(_mm_add_ps(a, c), _mm_sqrt_ps(t));
90 _mm_storeu_ps(dst + j, a);
94 for( ; j < size.width; j++ )
96 float a = cov[j*3]*0.5f;
98 float c = cov[j*3+2]*0.5f;
99 dst[j] = (float)((a + c) - std::sqrt((a - c)*(a - c) + b*b));
105 static void calcHarris( const Mat& _cov, Mat& _dst, double k )
108 Size size = _cov.size();
110 volatile bool simd = checkHardwareSupport(CV_CPU_SSE);
113 if( _cov.isContinuous() && _dst.isContinuous() )
115 size.width *= size.height;
119 for( i = 0; i < size.height; i++ )
121 const float* cov = _cov.ptr<float>(i);
122 float* dst = _dst.ptr<float>(i);
128 __m128 k4 = _mm_set1_ps((float)k);
129 for( ; j <= size.width - 5; j += 4 )
131 __m128 t0 = _mm_loadu_ps(cov + j*3); // a0 b0 c0 x
132 __m128 t1 = _mm_loadu_ps(cov + j*3 + 3); // a1 b1 c1 x
133 __m128 t2 = _mm_loadu_ps(cov + j*3 + 6); // a2 b2 c2 x
134 __m128 t3 = _mm_loadu_ps(cov + j*3 + 9); // a3 b3 c3 x
136 t = _mm_unpacklo_ps(t0, t1); // a0 a1 b0 b1
137 c = _mm_unpackhi_ps(t0, t1); // c0 c1 x x
138 b = _mm_unpacklo_ps(t2, t3); // a2 a3 b2 b3
139 c = _mm_movelh_ps(c, _mm_unpackhi_ps(t2, t3)); // c0 c1 c2 c3
140 a = _mm_movelh_ps(t, b);
141 b = _mm_movehl_ps(b, t);
142 t = _mm_add_ps(a, c);
143 a = _mm_sub_ps(_mm_mul_ps(a, c), _mm_mul_ps(b, b));
144 t = _mm_mul_ps(_mm_mul_ps(k4, t), t);
145 a = _mm_sub_ps(a, t);
146 _mm_storeu_ps(dst + j, a);
150 float32x4_t v_k = vdupq_n_f32((float)k);
152 for( ; j <= size.width - 4; j += 4 )
154 float32x4x3_t v_src = vld3q_f32(cov + j + 3);
155 float32x4_t v_a = v_src.val[0], v_b = v_src.val[1], v_c = v_src.val[2];
156 float32x4_t v_ac_bb = vmlsq_f32(vmulq_f32(v_a, v_c), v_b, v_b);
157 float32x4_t v_ac = vaddq_f32(v_a, v_c);
158 vst1q_f32(dst + j, vmlsq_f32(v_ac_bb, v_k, vmulq_f32(v_ac, v_ac)));
162 for( ; j < size.width; j++ )
165 float b = cov[j*3+1];
166 float c = cov[j*3+2];
167 dst[j] = (float)(a*c - b*b - k*(a + c)*(a + c));
173 static void eigen2x2( const float* cov, float* dst, int n )
175 for( int j = 0; j < n; j++ )
178 double b = cov[j*3+1];
179 double c = cov[j*3+2];
181 double u = (a + c)*0.5;
182 double v = std::sqrt((a - c)*(a - c)*0.25 + b*b);
190 if( e + fabs(y) < 1e-4 )
195 if( e + fabs(y) < 1e-4 )
197 e = 1./(e + fabs(y) + FLT_EPSILON);
202 double d = 1./std::sqrt(x*x + y*y + DBL_EPSILON);
203 dst[6*j] = (float)l1;
204 dst[6*j + 2] = (float)(x*d);
205 dst[6*j + 3] = (float)(y*d);
211 if( e + fabs(y) < 1e-4 )
216 if( e + fabs(y) < 1e-4 )
218 e = 1./(e + fabs(y) + FLT_EPSILON);
223 d = 1./std::sqrt(x*x + y*y + DBL_EPSILON);
224 dst[6*j + 1] = (float)l2;
225 dst[6*j + 4] = (float)(x*d);
226 dst[6*j + 5] = (float)(y*d);
230 static void calcEigenValsVecs( const Mat& _cov, Mat& _dst )
232 Size size = _cov.size();
233 if( _cov.isContinuous() && _dst.isContinuous() )
235 size.width *= size.height;
239 for( int i = 0; i < size.height; i++ )
241 const float* cov = _cov.ptr<float>(i);
242 float* dst = _dst.ptr<float>(i);
244 eigen2x2(cov, dst, size.width);
249 enum { MINEIGENVAL=0, HARRIS=1, EIGENVALSVECS=2 };
253 cornerEigenValsVecs( const Mat& src, Mat& eigenv, int block_size,
254 int aperture_size, int op_type, double k=0.,
255 int borderType=BORDER_DEFAULT )
257 #ifdef HAVE_TEGRA_OPTIMIZATION
258 if (tegra::cornerEigenValsVecs(src, eigenv, block_size, aperture_size, op_type, k, borderType))
262 int depth = src.depth();
263 double scale = (double)(1 << ((aperture_size > 0 ? aperture_size : 3) - 1)) * block_size;
264 if( aperture_size < 0 )
270 CV_Assert( src.type() == CV_8UC1 || src.type() == CV_32FC1 );
273 if( aperture_size > 0 )
275 Sobel( src, Dx, CV_32F, 1, 0, aperture_size, scale, 0, borderType );
276 Sobel( src, Dy, CV_32F, 0, 1, aperture_size, scale, 0, borderType );
280 Scharr( src, Dx, CV_32F, 1, 0, scale, 0, borderType );
281 Scharr( src, Dy, CV_32F, 0, 1, scale, 0, borderType );
284 Size size = src.size();
285 Mat cov( size, CV_32FC3 );
288 for( i = 0; i < size.height; i++ )
290 float* cov_data = cov.ptr<float>(i);
291 const float* dxdata = Dx.ptr<float>(i);
292 const float* dydata = Dy.ptr<float>(i);
294 for( j = 0; j < size.width; j++ )
296 float dx = dxdata[j];
297 float dy = dydata[j];
299 cov_data[j*3] = dx*dx;
300 cov_data[j*3+1] = dx*dy;
301 cov_data[j*3+2] = dy*dy;
305 boxFilter(cov, cov, cov.depth(), Size(block_size, block_size),
306 Point(-1,-1), false, borderType );
308 if( op_type == MINEIGENVAL )
309 calcMinEigenVal( cov, eigenv );
310 else if( op_type == HARRIS )
311 calcHarris( cov, eigenv, k );
312 else if( op_type == EIGENVALSVECS )
313 calcEigenValsVecs( cov, eigenv );
318 static bool extractCovData(InputArray _src, UMat & Dx, UMat & Dy, int depth,
319 float scale, int aperture_size, int borderType)
321 UMat src = _src.getUMat();
325 src.locateROI(wholeSize, ofs);
327 const int sobel_lsz = 16;
328 if ((aperture_size == 3 || aperture_size == 5 || aperture_size == 7 || aperture_size == -1) &&
329 wholeSize.height > sobel_lsz + (aperture_size >> 1) &&
330 wholeSize.width > sobel_lsz + (aperture_size >> 1))
332 CV_Assert(depth == CV_8U || depth == CV_32F);
334 Dx.create(src.size(), CV_32FC1);
335 Dy.create(src.size(), CV_32FC1);
337 size_t localsize[2] = { sobel_lsz, sobel_lsz };
338 size_t globalsize[2] = { localsize[0] * (1 + (src.cols - 1) / localsize[0]),
339 localsize[1] * (1 + (src.rows - 1) / localsize[1]) };
341 int src_offset_x = (int)((src.offset % src.step) / src.elemSize());
342 int src_offset_y = (int)(src.offset / src.step);
344 const char * const borderTypes[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT",
345 "BORDER_WRAP", "BORDER_REFLECT101" };
347 ocl::Kernel k(format("sobel%d", aperture_size).c_str(), ocl::imgproc::covardata_oclsrc,
348 cv::format("-D BLK_X=%d -D BLK_Y=%d -D %s -D SRCTYPE=%s%s",
349 (int)localsize[0], (int)localsize[1], borderTypes[borderType], ocl::typeToStr(depth),
350 aperture_size < 0 ? " -D SCHARR" : ""));
354 k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, src_offset_x, src_offset_y,
355 ocl::KernelArg::WriteOnlyNoSize(Dx), ocl::KernelArg::WriteOnly(Dy),
356 wholeSize.height, wholeSize.width, scale);
358 return k.run(2, globalsize, localsize, false);
362 if (aperture_size > 0)
364 Sobel(_src, Dx, CV_32F, 1, 0, aperture_size, scale, 0, borderType);
365 Sobel(_src, Dy, CV_32F, 0, 1, aperture_size, scale, 0, borderType);
369 Scharr(_src, Dx, CV_32F, 1, 0, scale, 0, borderType);
370 Scharr(_src, Dy, CV_32F, 0, 1, scale, 0, borderType);
377 static bool ocl_cornerMinEigenValVecs(InputArray _src, OutputArray _dst, int block_size,
378 int aperture_size, double k, int borderType, int op_type)
380 CV_Assert(op_type == HARRIS || op_type == MINEIGENVAL);
382 if ( !(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE ||
383 borderType == BORDER_REFLECT || borderType == BORDER_REFLECT_101) )
386 int type = _src.type(), depth = CV_MAT_DEPTH(type);
387 if ( !(type == CV_8UC1 || type == CV_32FC1) )
390 const char * const borderTypes[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT",
391 "BORDER_WRAP", "BORDER_REFLECT101" };
392 const char * const cornerType[] = { "CORNER_MINEIGENVAL", "CORNER_HARRIS", 0 };
395 double scale = (double)(1 << ((aperture_size > 0 ? aperture_size : 3) - 1)) * block_size;
396 if (aperture_size < 0)
403 if (!extractCovData(_src, Dx, Dy, depth, (float)scale, aperture_size, borderType))
406 ocl::Kernel cornelKernel("corner", ocl::imgproc::corner_oclsrc,
407 format("-D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s -D %s",
408 block_size / 2, block_size / 2, block_size, block_size,
409 borderTypes[borderType], cornerType[op_type]));
410 if (cornelKernel.empty())
413 _dst.createSameSize(_src, CV_32FC1);
414 UMat dst = _dst.getUMat();
416 cornelKernel.args(ocl::KernelArg::ReadOnly(Dx), ocl::KernelArg::ReadOnly(Dy),
417 ocl::KernelArg::WriteOnly(dst), (float)k);
419 size_t blockSizeX = 256, blockSizeY = 1;
420 size_t gSize = blockSizeX - block_size / 2 * 2;
421 size_t globalSizeX = (Dx.cols) % gSize == 0 ? Dx.cols / gSize * blockSizeX : (Dx.cols / gSize + 1) * blockSizeX;
422 size_t rows_per_thread = 2;
423 size_t globalSizeY = ((Dx.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ?
424 ((Dx.rows + rows_per_thread - 1) / rows_per_thread) :
425 (((Dx.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY;
427 size_t globalsize[2] = { globalSizeX, globalSizeY }, localsize[2] = { blockSizeX, blockSizeY };
428 return cornelKernel.run(2, globalsize, localsize, false);
431 static bool ocl_preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int borderType, int depth )
433 UMat Dx, Dy, D2x, D2y, Dxy;
435 if (!extractCovData(_src, Dx, Dy, depth, 1, ksize, borderType))
438 Sobel( _src, D2x, CV_32F, 2, 0, ksize, 1, 0, borderType );
439 Sobel( _src, D2y, CV_32F, 0, 2, ksize, 1, 0, borderType );
440 Sobel( _src, Dxy, CV_32F, 1, 1, ksize, 1, 0, borderType );
442 _dst.create( _src.size(), CV_32FC1 );
443 UMat dst = _dst.getUMat();
445 double factor = 1 << (ksize - 1);
448 factor = 1./(factor * factor * factor);
450 ocl::Kernel k("preCornerDetect", ocl::imgproc::precornerdetect_oclsrc);
454 k.args(ocl::KernelArg::ReadOnlyNoSize(Dx), ocl::KernelArg::ReadOnlyNoSize(Dy),
455 ocl::KernelArg::ReadOnlyNoSize(D2x), ocl::KernelArg::ReadOnlyNoSize(D2y),
456 ocl::KernelArg::ReadOnlyNoSize(Dxy), ocl::KernelArg::WriteOnly(dst), (float)factor);
458 size_t globalsize[2] = { dst.cols, dst.rows };
459 return k.run(2, globalsize, NULL, false);
466 void cv::cornerMinEigenVal( InputArray _src, OutputArray _dst, int blockSize, int ksize, int borderType )
468 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
469 ocl_cornerMinEigenValVecs(_src, _dst, blockSize, ksize, 0.0, borderType, MINEIGENVAL))
471 Mat src = _src.getMat();
472 _dst.create( src.size(), CV_32FC1 );
473 Mat dst = _dst.getMat();
474 #if defined(HAVE_IPP) && (IPP_VERSION_MAJOR >= 8)
475 typedef IppStatus (CV_STDCALL * ippiMinEigenValGetBufferSize)(IppiSize, int, int, int*);
476 typedef IppStatus (CV_STDCALL * ippiMinEigenVal)(const void*, int, Ipp32f*, int, IppiSize, IppiKernelType, int, int, Ipp8u*);
477 IppiKernelType kerType;
481 kerType = ippKernelScharr;
485 kerType = ippKernelSobel;
487 bool isolated = (borderType & BORDER_ISOLATED) != 0;
488 int borderTypeNI = borderType & ~BORDER_ISOLATED;
489 if ((borderTypeNI == BORDER_REPLICATE && (!src.isSubmatrix() || isolated)) &&
490 (kerSize == 3 || kerSize == 5) && (blockSize == 3 || blockSize == 5))
492 ippiMinEigenValGetBufferSize getBufferSizeFunc = 0;
493 ippiMinEigenVal minEigenValFunc = 0;
494 float norm_coef = 0.f;
496 if (src.type() == CV_8UC1)
498 getBufferSizeFunc = (ippiMinEigenValGetBufferSize) ippiMinEigenValGetBufferSize_8u32f_C1R;
499 minEigenValFunc = (ippiMinEigenVal) ippiMinEigenVal_8u32f_C1R;
500 norm_coef = 1.f / 255.f;
501 } else if (src.type() == CV_32FC1)
503 getBufferSizeFunc = (ippiMinEigenValGetBufferSize) ippiMinEigenValGetBufferSize_32f_C1R;
504 minEigenValFunc = (ippiMinEigenVal) ippiMinEigenVal_32f_C1R;
507 norm_coef = kerType == ippKernelSobel ? norm_coef : norm_coef / 2.45f;
509 if (getBufferSizeFunc && minEigenValFunc)
512 IppiSize srcRoi = { src.cols, src.rows };
513 IppStatus ok = getBufferSizeFunc(srcRoi, kerSize, blockSize, &bufferSize);
516 AutoBuffer<uchar> buffer(bufferSize);
517 ok = minEigenValFunc(src.ptr(), (int) src.step, dst.ptr<Ipp32f>(), (int) dst.step, srcRoi, kerType, kerSize, blockSize, buffer);
518 CV_SUPPRESS_DEPRECATED_START
519 if (ok >= 0) ok = ippiMulC_32f_C1IR(norm_coef, dst.ptr<Ipp32f>(), (int) dst.step, srcRoi);
520 CV_SUPPRESS_DEPRECATED_END
528 cornerEigenValsVecs( src, dst, blockSize, ksize, MINEIGENVAL, 0, borderType );
531 void cv::cornerHarris( InputArray _src, OutputArray _dst, int blockSize, int ksize, double k, int borderType )
533 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(),
534 ocl_cornerMinEigenValVecs(_src, _dst, blockSize, ksize, k, borderType, HARRIS))
536 Mat src = _src.getMat();
537 _dst.create( src.size(), CV_32FC1 );
538 Mat dst = _dst.getMat();
540 #if IPP_VERSION_X100 >= 801 && 0
541 int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
542 int borderTypeNI = borderType & ~BORDER_ISOLATED;
543 bool isolated = (borderType & BORDER_ISOLATED) != 0;
545 if ( (ksize == 3 || ksize == 5) && (type == CV_8UC1 || type == CV_32FC1) &&
546 (borderTypeNI == BORDER_CONSTANT || borderTypeNI == BORDER_REPLICATE) && cn == 1 && (!src.isSubmatrix() || isolated) )
548 IppiSize roisize = { src.cols, src.rows };
549 IppiMaskSize masksize = ksize == 5 ? ippMskSize5x5 : ippMskSize3x3;
550 IppDataType datatype = type == CV_8UC1 ? ipp8u : ipp32f;
553 double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;
558 scale = std::pow(scale, -4.0);
560 if (ippiHarrisCornerGetBufferSize(roisize, masksize, blockSize, datatype, cn, &bufsize) >= 0)
562 Ipp8u * buffer = ippsMalloc_8u(bufsize);
563 IppiDifferentialKernel filterType = ksize > 0 ? ippFilterSobel : ippFilterScharr;
564 IppiBorderType borderTypeIpp = borderTypeNI == BORDER_CONSTANT ? ippBorderConst : ippBorderRepl;
565 IppStatus status = (IppStatus)-1;
568 status = ippiHarrisCorner_8u32f_C1R((const Ipp8u *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize,
569 filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer);
570 else if (depth == CV_32F)
571 status = ippiHarrisCorner_32f_C1R((const Ipp32f *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize,
572 filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer);
582 cornerEigenValsVecs( src, dst, blockSize, ksize, HARRIS, k, borderType );
586 void cv::cornerEigenValsAndVecs( InputArray _src, OutputArray _dst, int blockSize, int ksize, int borderType )
588 Mat src = _src.getMat();
589 Size dsz = _dst.size();
590 int dtype = _dst.type();
592 if( dsz.height != src.rows || dsz.width*CV_MAT_CN(dtype) != src.cols*6 || CV_MAT_DEPTH(dtype) != CV_32F )
593 _dst.create( src.size(), CV_32FC(6) );
594 Mat dst = _dst.getMat();
595 cornerEigenValsVecs( src, dst, blockSize, ksize, EIGENVALSVECS, 0, borderType );
599 void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int borderType )
601 int type = _src.type();
602 CV_Assert( type == CV_8UC1 || type == CV_32FC1 );
604 CV_OCL_RUN( _src.dims() <= 2 && _dst.isUMat(),
605 ocl_preCornerDetect(_src, _dst, ksize, borderType, CV_MAT_DEPTH(type)))
607 Mat Dx, Dy, D2x, D2y, Dxy, src = _src.getMat();
608 _dst.create( src.size(), CV_32FC1 );
609 Mat dst = _dst.getMat();
611 Sobel( src, Dx, CV_32F, 1, 0, ksize, 1, 0, borderType );
612 Sobel( src, Dy, CV_32F, 0, 1, ksize, 1, 0, borderType );
613 Sobel( src, D2x, CV_32F, 2, 0, ksize, 1, 0, borderType );
614 Sobel( src, D2y, CV_32F, 0, 2, ksize, 1, 0, borderType );
615 Sobel( src, Dxy, CV_32F, 1, 1, ksize, 1, 0, borderType );
617 double factor = 1 << (ksize - 1);
618 if( src.depth() == CV_8U )
620 factor = 1./(factor * factor * factor);
621 float factor_f = (float)factor;
624 volatile bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2);
625 __m128 v_factor = _mm_set1_ps(factor_f), v_m2 = _mm_set1_ps(-2.0f);
628 Size size = src.size();
630 for( i = 0; i < size.height; i++ )
632 float* dstdata = dst.ptr<float>(i);
633 const float* dxdata = Dx.ptr<float>(i);
634 const float* dydata = Dy.ptr<float>(i);
635 const float* d2xdata = D2x.ptr<float>(i);
636 const float* d2ydata = D2y.ptr<float>(i);
637 const float* dxydata = Dxy.ptr<float>(i);
644 for( ; j <= size.width - 4; j += 4 )
646 __m128 v_dx = _mm_loadu_ps((const float *)(dxdata + j));
647 __m128 v_dy = _mm_loadu_ps((const float *)(dydata + j));
649 __m128 v_s1 = _mm_mul_ps(_mm_mul_ps(v_dx, v_dx), _mm_loadu_ps((const float *)(d2ydata + j)));
650 __m128 v_s2 = _mm_mul_ps(_mm_mul_ps(v_dy, v_dy), _mm_loadu_ps((const float *)(d2xdata + j)));
651 __m128 v_s3 = _mm_mul_ps(_mm_mul_ps(v_dx, v_dy), _mm_loadu_ps((const float *)(dxydata + j)));
652 v_s1 = _mm_mul_ps(v_factor, _mm_add_ps(v_s1, _mm_add_ps(v_s2, _mm_mul_ps(v_s3, v_m2))));
653 _mm_storeu_ps(dstdata + j, v_s1);
657 for( ; j <= size.width - 4; j += 4 )
659 float32x4_t v_dx = vld1q_f32(dxdata + j), v_dy = vld1q_f32(dydata + j);
660 float32x4_t v_s = vmulq_f32(v_dx, vmulq_f32(v_dx, vld1q_f32(d2ydata + j)));
661 v_s = vmlaq_f32(v_s, vld1q_f32(d2xdata + j), vmulq_f32(v_dy, v_dy));
662 v_s = vmlaq_f32(v_s, vld1q_f32(dxydata + j), vmulq_n_f32(vmulq_f32(v_dy, v_dx), -2));
663 vst1q_f32(dstdata + j, vmulq_n_f32(v_s, factor_f));
667 for( ; j < size.width; j++ )
669 float dx = dxdata[j];
670 float dy = dydata[j];
671 dstdata[j] = (float)(factor*(dx*dx*d2ydata[j] + dy*dy*d2xdata[j] - 2*dx*dy*dxydata[j]));
677 cvCornerMinEigenVal( const CvArr* srcarr, CvArr* dstarr,
678 int block_size, int aperture_size )
680 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
682 CV_Assert( src.size() == dst.size() && dst.type() == CV_32FC1 );
683 cv::cornerMinEigenVal( src, dst, block_size, aperture_size, cv::BORDER_REPLICATE );
687 cvCornerHarris( const CvArr* srcarr, CvArr* dstarr,
688 int block_size, int aperture_size, double k )
690 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
692 CV_Assert( src.size() == dst.size() && dst.type() == CV_32FC1 );
693 cv::cornerHarris( src, dst, block_size, aperture_size, k, cv::BORDER_REPLICATE );
698 cvCornerEigenValsAndVecs( const void* srcarr, void* dstarr,
699 int block_size, int aperture_size )
701 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
703 CV_Assert( src.rows == dst.rows && src.cols*6 == dst.cols*dst.channels() && dst.depth() == CV_32F );
704 cv::cornerEigenValsAndVecs( src, dst, block_size, aperture_size, cv::BORDER_REPLICATE );
709 cvPreCornerDetect( const void* srcarr, void* dstarr, int aperture_size )
711 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
713 CV_Assert( src.size() == dst.size() && dst.type() == CV_32FC1 );
714 cv::preCornerDetect( src, dst, aperture_size, cv::BORDER_REPLICATE );