1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "precomp.hpp"
44 #include "opencl_kernels_core.hpp"
49 typedef void (*MathFunc)(const void* src, void* dst, int len);
51 static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI);
52 static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI);
53 static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI);
54 static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI);
58 enum { OCL_OP_LOG=0, OCL_OP_EXP=1, OCL_OP_MAG=2, OCL_OP_PHASE_DEGREES=3, OCL_OP_PHASE_RADIANS=4 };
60 static const char* oclop2str[] = { "OP_LOG", "OP_EXP", "OP_MAG", "OP_PHASE_DEGREES", "OP_PHASE_RADIANS", 0 };
62 static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, int oclop)
64 int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
65 int kercn = oclop == OCL_OP_PHASE_DEGREES ||
66 oclop == OCL_OP_PHASE_RADIANS ? 1 : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
68 const ocl::Device d = ocl::Device::getDefault();
69 bool double_support = d.doubleFPConfig() > 0;
70 if (!double_support && depth == CV_64F)
72 int rowsPerWI = d.isIntel() ? 4 : 1;
74 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
75 format("-D %s -D %s -D dstT=%s -D rowsPerWI=%d%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP",
76 oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), rowsPerWI,
77 double_support ? " -D DOUBLE_SUPPORT" : ""));
81 UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
82 _dst.create(src1.size(), type);
83 UMat dst = _dst.getUMat();
85 ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
86 src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
87 dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
90 k.args(src1arg, dstarg);
92 k.args(src1arg, src2arg, dstarg);
94 size_t globalsize[] = { src1.cols * cn / kercn, (src1.rows + rowsPerWI - 1) / rowsPerWI };
95 return k.run(2, globalsize, 0, false);
100 float fastAtan2( float y, float x )
102 float ax = std::abs(x), ay = std::abs(y);
106 c = ay/(ax + (float)DBL_EPSILON);
108 a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
112 c = ax/(ay + (float)DBL_EPSILON);
114 a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
123 static void FastAtan2_32f(const float *Y, const float *X, float *angle, int len, bool angleInDegrees=true )
126 float scale = angleInDegrees ? 1 : (float)(CV_PI/180);
128 #ifdef HAVE_TEGRA_OPTIMIZATION
129 if (tegra::FastAtan2_32f(Y, X, angle, len, scale))
136 Cv32suf iabsmask; iabsmask.i = 0x7fffffff;
137 __m128 eps = _mm_set1_ps((float)DBL_EPSILON), absmask = _mm_set1_ps(iabsmask.f);
138 __m128 _90 = _mm_set1_ps(90.f), _180 = _mm_set1_ps(180.f), _360 = _mm_set1_ps(360.f);
139 __m128 z = _mm_setzero_ps(), scale4 = _mm_set1_ps(scale);
140 __m128 p1 = _mm_set1_ps(atan2_p1), p3 = _mm_set1_ps(atan2_p3);
141 __m128 p5 = _mm_set1_ps(atan2_p5), p7 = _mm_set1_ps(atan2_p7);
143 for( ; i <= len - 4; i += 4 )
145 __m128 x = _mm_loadu_ps(X + i), y = _mm_loadu_ps(Y + i);
146 __m128 ax = _mm_and_ps(x, absmask), ay = _mm_and_ps(y, absmask);
147 __m128 mask = _mm_cmplt_ps(ax, ay);
148 __m128 tmin = _mm_min_ps(ax, ay), tmax = _mm_max_ps(ax, ay);
149 __m128 c = _mm_div_ps(tmin, _mm_add_ps(tmax, eps));
150 __m128 c2 = _mm_mul_ps(c, c);
151 __m128 a = _mm_mul_ps(c2, p7);
152 a = _mm_mul_ps(_mm_add_ps(a, p5), c2);
153 a = _mm_mul_ps(_mm_add_ps(a, p3), c2);
154 a = _mm_mul_ps(_mm_add_ps(a, p1), c);
156 __m128 b = _mm_sub_ps(_90, a);
157 a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask));
159 b = _mm_sub_ps(_180, a);
160 mask = _mm_cmplt_ps(x, z);
161 a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask));
163 b = _mm_sub_ps(_360, a);
164 mask = _mm_cmplt_ps(y, z);
165 a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask));
167 a = _mm_mul_ps(a, scale4);
168 _mm_storeu_ps(angle + i, a);
172 float32x4_t eps = vdupq_n_f32((float)DBL_EPSILON);
173 float32x4_t _90 = vdupq_n_f32(90.f), _180 = vdupq_n_f32(180.f), _360 = vdupq_n_f32(360.f);
174 float32x4_t z = vdupq_n_f32(0.0f), scale4 = vdupq_n_f32(scale);
175 float32x4_t p1 = vdupq_n_f32(atan2_p1), p3 = vdupq_n_f32(atan2_p3);
176 float32x4_t p5 = vdupq_n_f32(atan2_p5), p7 = vdupq_n_f32(atan2_p7);
178 for( ; i <= len - 4; i += 4 )
180 float32x4_t x = vld1q_f32(X + i), y = vld1q_f32(Y + i);
181 float32x4_t ax = vabsq_f32(x), ay = vabsq_f32(y);
182 float32x4_t tmin = vminq_f32(ax, ay), tmax = vmaxq_f32(ax, ay);
183 float32x4_t c = vmulq_f32(tmin, cv_vrecpq_f32(vaddq_f32(tmax, eps)));
184 float32x4_t c2 = vmulq_f32(c, c);
185 float32x4_t a = vmulq_f32(c2, p7);
186 a = vmulq_f32(vaddq_f32(a, p5), c2);
187 a = vmulq_f32(vaddq_f32(a, p3), c2);
188 a = vmulq_f32(vaddq_f32(a, p1), c);
190 a = vbslq_f32(vcgeq_f32(ax, ay), a, vsubq_f32(_90, a));
191 a = vbslq_f32(vcltq_f32(x, z), vsubq_f32(_180, a), a);
192 a = vbslq_f32(vcltq_f32(y, z), vsubq_f32(_360, a), a);
194 vst1q_f32(angle + i, vmulq_f32(a, scale4));
198 for( ; i < len; i++ )
200 float x = X[i], y = Y[i];
201 float ax = std::abs(x), ay = std::abs(y);
205 c = ay/(ax + (float)DBL_EPSILON);
207 a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
211 c = ax/(ay + (float)DBL_EPSILON);
213 a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
219 angle[i] = (float)(a*scale);
224 /* ************************************************************************** *\
225 Fast cube root by Ken Turkowski
226 (http://www.worldserver.com/turk/computergraphics/papers.html)
227 \* ************************************************************************** */
228 float cubeRoot( float value )
236 ix = v.i & 0x7fffffff;
237 s = v.i & 0x80000000;
238 ex = (ix >> 23) - 127;
240 shx -= shx >= 0 ? 3 : 0;
241 ex = (ex - shx) / 3; /* exponent of cube root */
242 v.i = (ix & ((1<<23)-1)) | ((shx + 127)<<23);
245 /* 0.125 <= fr < 1.0 */
246 /* Use quartic rational polynomial with error < 2^(-24) */
247 fr = (float)(((((45.2548339756803022511987494 * fr +
248 192.2798368355061050458134625) * fr +
249 119.1654824285581628956914143) * fr +
250 13.43250139086239872172837314) * fr +
251 0.1636161226585754240958355063)/
252 ((((14.80884093219134573786480845 * fr +
253 151.9714051044435648658557668) * fr +
254 168.5254414101568283957668343) * fr +
255 33.9905941350215598754191872) * fr +
258 /* fr *= 2^ex * sign */
261 v.i = (v.i + (ex << 23) + s) & (m.i*2 != 0 ? -1 : 0);
265 static void Magnitude_32f(const float* x, const float* y, float* mag, int len)
267 #if defined HAVE_IPP && 0
270 IppStatus status = ippsMagnitude_32f(x, y, mag, len);
273 CV_IMPL_ADD(CV_IMPL_IPP);
285 for( ; i <= len - 8; i += 8 )
287 __m128 x0 = _mm_loadu_ps(x + i), x1 = _mm_loadu_ps(x + i + 4);
288 __m128 y0 = _mm_loadu_ps(y + i), y1 = _mm_loadu_ps(y + i + 4);
289 x0 = _mm_add_ps(_mm_mul_ps(x0, x0), _mm_mul_ps(y0, y0));
290 x1 = _mm_add_ps(_mm_mul_ps(x1, x1), _mm_mul_ps(y1, y1));
291 x0 = _mm_sqrt_ps(x0); x1 = _mm_sqrt_ps(x1);
292 _mm_storeu_ps(mag + i, x0); _mm_storeu_ps(mag + i + 4, x1);
296 for( ; i <= len - 4; i += 4 )
298 float32x4_t v_x = vld1q_f32(x + i), v_y = vld1q_f32(y + i);
299 vst1q_f32(mag + i, cv_vsqrtq_f32(vmlaq_f32(vmulq_f32(v_x, v_x), v_y, v_y)));
301 for( ; i <= len - 2; i += 2 )
303 float32x2_t v_x = vld1_f32(x + i), v_y = vld1_f32(y + i);
304 vst1_f32(mag + i, cv_vsqrt_f32(vmla_f32(vmul_f32(v_x, v_x), v_y, v_y)));
308 for( ; i < len; i++ )
310 float x0 = x[i], y0 = y[i];
311 mag[i] = std::sqrt(x0*x0 + y0*y0);
315 static void Magnitude_64f(const double* x, const double* y, double* mag, int len)
317 #if defined(HAVE_IPP)
320 IppStatus status = ippsMagnitude_64f(x, y, mag, len);
323 CV_IMPL_ADD(CV_IMPL_IPP);
335 for( ; i <= len - 4; i += 4 )
337 __m128d x0 = _mm_loadu_pd(x + i), x1 = _mm_loadu_pd(x + i + 2);
338 __m128d y0 = _mm_loadu_pd(y + i), y1 = _mm_loadu_pd(y + i + 2);
339 x0 = _mm_add_pd(_mm_mul_pd(x0, x0), _mm_mul_pd(y0, y0));
340 x1 = _mm_add_pd(_mm_mul_pd(x1, x1), _mm_mul_pd(y1, y1));
341 x0 = _mm_sqrt_pd(x0); x1 = _mm_sqrt_pd(x1);
342 _mm_storeu_pd(mag + i, x0); _mm_storeu_pd(mag + i + 2, x1);
347 for( ; i < len; i++ )
349 double x0 = x[i], y0 = y[i];
350 mag[i] = std::sqrt(x0*x0 + y0*y0);
355 static void InvSqrt_32f(const float* src, float* dst, int len)
357 #if defined(HAVE_IPP)
360 if (ippsInvSqrt_32f_A21(src, dst, len) >= 0)
362 CV_IMPL_ADD(CV_IMPL_IPP);
374 __m128 _0_5 = _mm_set1_ps(0.5f), _1_5 = _mm_set1_ps(1.5f);
375 if( (((size_t)src|(size_t)dst) & 15) == 0 )
376 for( ; i <= len - 8; i += 8 )
378 __m128 t0 = _mm_load_ps(src + i), t1 = _mm_load_ps(src + i + 4);
379 __m128 h0 = _mm_mul_ps(t0, _0_5), h1 = _mm_mul_ps(t1, _0_5);
380 t0 = _mm_rsqrt_ps(t0); t1 = _mm_rsqrt_ps(t1);
381 t0 = _mm_mul_ps(t0, _mm_sub_ps(_1_5, _mm_mul_ps(_mm_mul_ps(t0,t0),h0)));
382 t1 = _mm_mul_ps(t1, _mm_sub_ps(_1_5, _mm_mul_ps(_mm_mul_ps(t1,t1),h1)));
383 _mm_store_ps(dst + i, t0); _mm_store_ps(dst + i + 4, t1);
386 for( ; i <= len - 8; i += 8 )
388 __m128 t0 = _mm_loadu_ps(src + i), t1 = _mm_loadu_ps(src + i + 4);
389 __m128 h0 = _mm_mul_ps(t0, _0_5), h1 = _mm_mul_ps(t1, _0_5);
390 t0 = _mm_rsqrt_ps(t0); t1 = _mm_rsqrt_ps(t1);
391 t0 = _mm_mul_ps(t0, _mm_sub_ps(_1_5, _mm_mul_ps(_mm_mul_ps(t0,t0),h0)));
392 t1 = _mm_mul_ps(t1, _mm_sub_ps(_1_5, _mm_mul_ps(_mm_mul_ps(t1,t1),h1)));
393 _mm_storeu_ps(dst + i, t0); _mm_storeu_ps(dst + i + 4, t1);
398 for( ; i < len; i++ )
399 dst[i] = 1/std::sqrt(src[i]);
403 static void InvSqrt_64f(const double* src, double* dst, int len)
410 __m128d v_1 = _mm_set1_pd(1.0);
411 for ( ; i <= len - 2; i += 2)
412 _mm_storeu_pd(dst + i, _mm_div_pd(v_1, _mm_sqrt_pd(_mm_loadu_pd(src + i))));
416 for( ; i < len; i++ )
417 dst[i] = 1/std::sqrt(src[i]);
421 static void Sqrt_32f(const float* src, float* dst, int len)
423 #if defined(HAVE_IPP)
426 if (ippsSqrt_32f_A21(src, dst, len) >= 0)
428 CV_IMPL_ADD(CV_IMPL_IPP);
439 if( (((size_t)src|(size_t)dst) & 15) == 0 )
440 for( ; i <= len - 8; i += 8 )
442 __m128 t0 = _mm_load_ps(src + i), t1 = _mm_load_ps(src + i + 4);
443 t0 = _mm_sqrt_ps(t0); t1 = _mm_sqrt_ps(t1);
444 _mm_store_ps(dst + i, t0); _mm_store_ps(dst + i + 4, t1);
447 for( ; i <= len - 8; i += 8 )
449 __m128 t0 = _mm_loadu_ps(src + i), t1 = _mm_loadu_ps(src + i + 4);
450 t0 = _mm_sqrt_ps(t0); t1 = _mm_sqrt_ps(t1);
451 _mm_storeu_ps(dst + i, t0); _mm_storeu_ps(dst + i + 4, t1);
456 for( ; i < len; i++ )
457 dst[i] = std::sqrt(src[i]);
461 static void Sqrt_64f(const double* src, double* dst, int len)
463 #if defined(HAVE_IPP)
466 if (ippsSqrt_64f_A50(src, dst, len) >= 0)
468 CV_IMPL_ADD(CV_IMPL_IPP);
480 if( (((size_t)src|(size_t)dst) & 15) == 0 )
481 for( ; i <= len - 4; i += 4 )
483 __m128d t0 = _mm_load_pd(src + i), t1 = _mm_load_pd(src + i + 2);
484 t0 = _mm_sqrt_pd(t0); t1 = _mm_sqrt_pd(t1);
485 _mm_store_pd(dst + i, t0); _mm_store_pd(dst + i + 2, t1);
488 for( ; i <= len - 4; i += 4 )
490 __m128d t0 = _mm_loadu_pd(src + i), t1 = _mm_loadu_pd(src + i + 2);
491 t0 = _mm_sqrt_pd(t0); t1 = _mm_sqrt_pd(t1);
492 _mm_storeu_pd(dst + i, t0); _mm_storeu_pd(dst + i + 2, t1);
497 for( ; i < len; i++ )
498 dst[i] = std::sqrt(src[i]);
502 /****************************************************************************************\
503 * Cartezian -> Polar *
504 \****************************************************************************************/
506 void magnitude( InputArray src1, InputArray src2, OutputArray dst )
508 int type = src1.type(), depth = src1.depth(), cn = src1.channels();
509 CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F));
511 CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2,
512 ocl_math_op(src1, src2, dst, OCL_OP_MAG))
514 Mat X = src1.getMat(), Y = src2.getMat();
515 dst.create(X.dims, X.size, X.type());
516 Mat Mag = dst.getMat();
518 const Mat* arrays[] = {&X, &Y, &Mag, 0};
520 NAryMatIterator it(arrays, ptrs);
521 int len = (int)it.size*cn;
523 for( size_t i = 0; i < it.nplanes; i++, ++it )
525 if( depth == CV_32F )
527 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
528 float *mag = (float*)ptrs[2];
529 Magnitude_32f( x, y, mag, len );
533 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
534 double *mag = (double*)ptrs[2];
535 Magnitude_64f( x, y, mag, len );
541 void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegrees )
543 int type = src1.type(), depth = src1.depth(), cn = src1.channels();
544 CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F));
546 CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2,
547 ocl_math_op(src1, src2, dst, angleInDegrees ? OCL_OP_PHASE_DEGREES : OCL_OP_PHASE_RADIANS))
549 Mat X = src1.getMat(), Y = src2.getMat();
550 dst.create( X.dims, X.size, type );
551 Mat Angle = dst.getMat();
553 const Mat* arrays[] = {&X, &Y, &Angle, 0};
555 NAryMatIterator it(arrays, ptrs);
556 cv::AutoBuffer<float> _buf;
557 float* buf[2] = {0, 0};
558 int j, k, total = (int)(it.size*cn), blockSize = total;
559 size_t esz1 = X.elemSize1();
561 if( depth == CV_64F )
563 blockSize = std::min(blockSize, ((BLOCK_SIZE+cn-1)/cn)*cn);
564 _buf.allocate(blockSize*2);
566 buf[1] = buf[0] + blockSize;
569 for( size_t i = 0; i < it.nplanes; i++, ++it )
571 for( j = 0; j < total; j += blockSize )
573 int len = std::min(total - j, blockSize);
574 if( depth == CV_32F )
576 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
577 float *angle = (float*)ptrs[2];
578 FastAtan2_32f( y, x, angle, len, angleInDegrees );
582 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
583 double *angle = (double*)ptrs[2];
584 for( k = 0; k < len; k++ )
586 buf[0][k] = (float)x[k];
587 buf[1][k] = (float)y[k];
590 FastAtan2_32f( buf[1], buf[0], buf[0], len, angleInDegrees );
591 for( k = 0; k < len; k++ )
592 angle[k] = buf[0][k];
603 static bool ocl_cartToPolar( InputArray _src1, InputArray _src2,
604 OutputArray _dst1, OutputArray _dst2, bool angleInDegrees )
606 const ocl::Device & d = ocl::Device::getDefault();
607 int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
608 rowsPerWI = d.isIntel() ? 4 : 1;
609 bool doubleSupport = d.doubleFPConfig() > 0;
611 if ( !(_src1.dims() <= 2 && _src2.dims() <= 2 &&
612 (depth == CV_32F || depth == CV_64F) && type == _src2.type()) ||
613 (depth == CV_64F && !doubleSupport) )
616 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
617 format("-D BINARY_OP -D dstT=%s -D depth=%d -D rowsPerWI=%d -D OP_CTP_%s%s",
618 ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
619 depth, rowsPerWI, angleInDegrees ? "AD" : "AR",
620 doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
624 UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
625 Size size = src1.size();
626 CV_Assert( size == src2.size() );
628 _dst1.create(size, type);
629 _dst2.create(size, type);
630 UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat();
632 k.args(ocl::KernelArg::ReadOnlyNoSize(src1),
633 ocl::KernelArg::ReadOnlyNoSize(src2),
634 ocl::KernelArg::WriteOnly(dst1, cn),
635 ocl::KernelArg::WriteOnlyNoSize(dst2));
637 size_t globalsize[2] = { dst1.cols * cn, (dst1.rows + rowsPerWI - 1) / rowsPerWI };
638 return k.run(2, globalsize, NULL, false);
643 void cartToPolar( InputArray src1, InputArray src2,
644 OutputArray dst1, OutputArray dst2, bool angleInDegrees )
646 CV_OCL_RUN(dst1.isUMat() && dst2.isUMat(),
647 ocl_cartToPolar(src1, src2, dst1, dst2, angleInDegrees))
649 Mat X = src1.getMat(), Y = src2.getMat();
650 int type = X.type(), depth = X.depth(), cn = X.channels();
651 CV_Assert( X.size == Y.size && type == Y.type() && (depth == CV_32F || depth == CV_64F));
652 dst1.create( X.dims, X.size, type );
653 dst2.create( X.dims, X.size, type );
654 Mat Mag = dst1.getMat(), Angle = dst2.getMat();
656 const Mat* arrays[] = {&X, &Y, &Mag, &Angle, 0};
658 NAryMatIterator it(arrays, ptrs);
659 cv::AutoBuffer<float> _buf;
660 float* buf[2] = {0, 0};
661 int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
662 size_t esz1 = X.elemSize1();
664 if( depth == CV_64F )
666 _buf.allocate(blockSize*2);
668 buf[1] = buf[0] + blockSize;
671 for( size_t i = 0; i < it.nplanes; i++, ++it )
673 for( j = 0; j < total; j += blockSize )
675 int len = std::min(total - j, blockSize);
676 if( depth == CV_32F )
678 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
679 float *mag = (float*)ptrs[2], *angle = (float*)ptrs[3];
680 Magnitude_32f( x, y, mag, len );
681 FastAtan2_32f( y, x, angle, len, angleInDegrees );
685 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
686 double *angle = (double*)ptrs[3];
688 Magnitude_64f(x, y, (double*)ptrs[2], len);
689 for( k = 0; k < len; k++ )
691 buf[0][k] = (float)x[k];
692 buf[1][k] = (float)y[k];
695 FastAtan2_32f( buf[1], buf[0], buf[0], len, angleInDegrees );
696 for( k = 0; k < len; k++ )
697 angle[k] = buf[0][k];
708 /****************************************************************************************\
709 * Polar -> Cartezian *
710 \****************************************************************************************/
712 static void SinCos_32f( const float *angle, float *sinval, float* cosval,
713 int len, int angle_in_degrees )
717 static const double sin_table[] =
719 0.00000000000000000000, 0.09801714032956060400,
720 0.19509032201612825000, 0.29028467725446233000,
721 0.38268343236508978000, 0.47139673682599764000,
722 0.55557023301960218000, 0.63439328416364549000,
723 0.70710678118654746000, 0.77301045336273699000,
724 0.83146961230254524000, 0.88192126434835494000,
725 0.92387953251128674000, 0.95694033573220894000,
726 0.98078528040323043000, 0.99518472667219682000,
727 1.00000000000000000000, 0.99518472667219693000,
728 0.98078528040323043000, 0.95694033573220894000,
729 0.92387953251128674000, 0.88192126434835505000,
730 0.83146961230254546000, 0.77301045336273710000,
731 0.70710678118654757000, 0.63439328416364549000,
732 0.55557023301960218000, 0.47139673682599786000,
733 0.38268343236508989000, 0.29028467725446239000,
734 0.19509032201612861000, 0.09801714032956082600,
735 0.00000000000000012246, -0.09801714032956059000,
736 -0.19509032201612836000, -0.29028467725446211000,
737 -0.38268343236508967000, -0.47139673682599764000,
738 -0.55557023301960196000, -0.63439328416364527000,
739 -0.70710678118654746000, -0.77301045336273666000,
740 -0.83146961230254524000, -0.88192126434835494000,
741 -0.92387953251128652000, -0.95694033573220882000,
742 -0.98078528040323032000, -0.99518472667219693000,
743 -1.00000000000000000000, -0.99518472667219693000,
744 -0.98078528040323043000, -0.95694033573220894000,
745 -0.92387953251128663000, -0.88192126434835505000,
746 -0.83146961230254546000, -0.77301045336273688000,
747 -0.70710678118654768000, -0.63439328416364593000,
748 -0.55557023301960218000, -0.47139673682599792000,
749 -0.38268343236509039000, -0.29028467725446250000,
750 -0.19509032201612872000, -0.09801714032956050600,
753 static const double k2 = (2*CV_PI)/N;
755 static const double sin_a0 = -0.166630293345647*k2*k2*k2;
756 static const double sin_a2 = k2;
758 static const double cos_a0 = -0.499818138450326*k2*k2;
759 /*static const double cos_a2 = 1;*/
764 if( !angle_in_degrees )
769 for( i = 0; i < len; i++ )
771 double t = angle[i]*k1;
774 int sin_idx = it & (N - 1);
775 int cos_idx = (N/4 - sin_idx) & (N - 1);
777 double sin_b = (sin_a0*t*t + sin_a2)*t;
778 double cos_b = cos_a0*t*t + 1;
780 double sin_a = sin_table[sin_idx];
781 double cos_a = sin_table[cos_idx];
783 double sin_val = sin_a*cos_b + cos_a*sin_b;
784 double cos_val = cos_a*cos_b - sin_a*sin_b;
786 sinval[i] = (float)sin_val;
787 cosval[i] = (float)cos_val;
794 static bool ocl_polarToCart( InputArray _mag, InputArray _angle,
795 OutputArray _dst1, OutputArray _dst2, bool angleInDegrees )
797 const ocl::Device & d = ocl::Device::getDefault();
798 int type = _angle.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
799 rowsPerWI = d.isIntel() ? 4 : 1;
800 bool doubleSupport = d.doubleFPConfig() > 0;
802 if ( !doubleSupport && depth == CV_64F )
805 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
806 format("-D dstT=%s -D rowsPerWI=%d -D depth=%d -D BINARY_OP -D OP_PTC_%s%s",
807 ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), rowsPerWI,
808 depth, angleInDegrees ? "AD" : "AR",
809 doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
813 UMat mag = _mag.getUMat(), angle = _angle.getUMat();
814 Size size = angle.size();
815 CV_Assert(mag.size() == size);
817 _dst1.create(size, type);
818 _dst2.create(size, type);
819 UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat();
821 k.args(ocl::KernelArg::ReadOnlyNoSize(mag), ocl::KernelArg::ReadOnlyNoSize(angle),
822 ocl::KernelArg::WriteOnly(dst1, cn), ocl::KernelArg::WriteOnlyNoSize(dst2));
824 size_t globalsize[2] = { dst1.cols * cn, (dst1.rows + rowsPerWI - 1) / rowsPerWI };
825 return k.run(2, globalsize, NULL, false);
830 void polarToCart( InputArray src1, InputArray src2,
831 OutputArray dst1, OutputArray dst2, bool angleInDegrees )
833 int type = src2.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
834 CV_Assert((depth == CV_32F || depth == CV_64F) && (src1.empty() || src1.type() == type));
836 CV_OCL_RUN(!src1.empty() && src2.dims() <= 2 && dst1.isUMat() && dst2.isUMat(),
837 ocl_polarToCart(src1, src2, dst1, dst2, angleInDegrees))
839 Mat Mag = src1.getMat(), Angle = src2.getMat();
840 CV_Assert( Mag.empty() || Angle.size == Mag.size);
841 dst1.create( Angle.dims, Angle.size, type );
842 dst2.create( Angle.dims, Angle.size, type );
843 Mat X = dst1.getMat(), Y = dst2.getMat();
845 #if defined(HAVE_IPP)
848 if (Mag.isContinuous() && Angle.isContinuous() && X.isContinuous() && Y.isContinuous() && !angleInDegrees)
850 typedef IppStatus (CV_STDCALL * ippsPolarToCart)(const void * pSrcMagn, const void * pSrcPhase,
851 void * pDstRe, void * pDstIm, int len);
852 ippsPolarToCart ippFunc =
853 depth == CV_32F ? (ippsPolarToCart)ippsPolarToCart_32f :
854 depth == CV_64F ? (ippsPolarToCart)ippsPolarToCart_64f : 0;
855 CV_Assert(ippFunc != 0);
857 IppStatus status = ippFunc(Mag.ptr(), Angle.ptr(), X.ptr(), Y.ptr(), static_cast<int>(cn * X.total()));
860 CV_IMPL_ADD(CV_IMPL_IPP);
868 const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0};
870 NAryMatIterator it(arrays, ptrs);
871 cv::AutoBuffer<float> _buf;
872 float* buf[2] = {0, 0};
873 int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
874 size_t esz1 = Angle.elemSize1();
876 if( depth == CV_64F )
878 _buf.allocate(blockSize*2);
880 buf[1] = buf[0] + blockSize;
883 for( size_t i = 0; i < it.nplanes; i++, ++it )
885 for( j = 0; j < total; j += blockSize )
887 int len = std::min(total - j, blockSize);
888 if( depth == CV_32F )
890 const float *mag = (const float*)ptrs[0], *angle = (const float*)ptrs[1];
891 float *x = (float*)ptrs[2], *y = (float*)ptrs[3];
893 SinCos_32f( angle, y, x, len, angleInDegrees );
895 for( k = 0; k < len; k++ )
898 x[k] *= m; y[k] *= m;
903 const double *mag = (const double*)ptrs[0], *angle = (const double*)ptrs[1];
904 double *x = (double*)ptrs[2], *y = (double*)ptrs[3];
906 for( k = 0; k < len; k++ )
907 buf[0][k] = (float)angle[k];
909 SinCos_32f( buf[0], buf[1], buf[0], len, angleInDegrees );
911 for( k = 0; k < len; k++ )
914 x[k] = buf[0][k]*m; y[k] = buf[1][k]*m;
917 for( k = 0; k < len; k++ )
919 x[k] = buf[0][k]; y[k] = buf[1][k];
932 /****************************************************************************************\
934 \****************************************************************************************/
939 #if ( defined( WORDS_BIGENDIAN ) && !defined( OPENCV_UNIVERSAL_BUILD ) ) || defined( __BIG_ENDIAN__ )
951 #define EXPTAB_SCALE 6
952 #define EXPTAB_MASK ((1 << EXPTAB_SCALE) - 1)
954 #define EXPPOLY_32F_A0 .9670371139572337719125840413672004409288e-2
956 static const double expTab[] = {
957 1.0 * EXPPOLY_32F_A0,
958 1.0108892860517004600204097905619 * EXPPOLY_32F_A0,
959 1.0218971486541166782344801347833 * EXPPOLY_32F_A0,
960 1.0330248790212284225001082839705 * EXPPOLY_32F_A0,
961 1.0442737824274138403219664787399 * EXPPOLY_32F_A0,
962 1.0556451783605571588083413251529 * EXPPOLY_32F_A0,
963 1.0671404006768236181695211209928 * EXPPOLY_32F_A0,
964 1.0787607977571197937406800374385 * EXPPOLY_32F_A0,
965 1.0905077326652576592070106557607 * EXPPOLY_32F_A0,
966 1.1023825833078409435564142094256 * EXPPOLY_32F_A0,
967 1.1143867425958925363088129569196 * EXPPOLY_32F_A0,
968 1.126521618608241899794798643787 * EXPPOLY_32F_A0,
969 1.1387886347566916537038302838415 * EXPPOLY_32F_A0,
970 1.151189229952982705817759635202 * EXPPOLY_32F_A0,
971 1.1637248587775775138135735990922 * EXPPOLY_32F_A0,
972 1.1763969916502812762846457284838 * EXPPOLY_32F_A0,
973 1.1892071150027210667174999705605 * EXPPOLY_32F_A0,
974 1.2021567314527031420963969574978 * EXPPOLY_32F_A0,
975 1.2152473599804688781165202513388 * EXPPOLY_32F_A0,
976 1.2284805361068700056940089577928 * EXPPOLY_32F_A0,
977 1.2418578120734840485936774687266 * EXPPOLY_32F_A0,
978 1.2553807570246910895793906574423 * EXPPOLY_32F_A0,
979 1.2690509571917332225544190810323 * EXPPOLY_32F_A0,
980 1.2828700160787782807266697810215 * EXPPOLY_32F_A0,
981 1.2968395546510096659337541177925 * EXPPOLY_32F_A0,
982 1.3109612115247643419229917863308 * EXPPOLY_32F_A0,
983 1.3252366431597412946295370954987 * EXPPOLY_32F_A0,
984 1.3396675240533030053600306697244 * EXPPOLY_32F_A0,
985 1.3542555469368927282980147401407 * EXPPOLY_32F_A0,
986 1.3690024229745906119296011329822 * EXPPOLY_32F_A0,
987 1.3839098819638319548726595272652 * EXPPOLY_32F_A0,
988 1.3989796725383111402095281367152 * EXPPOLY_32F_A0,
989 1.4142135623730950488016887242097 * EXPPOLY_32F_A0,
990 1.4296133383919700112350657782751 * EXPPOLY_32F_A0,
991 1.4451808069770466200370062414717 * EXPPOLY_32F_A0,
992 1.4609177941806469886513028903106 * EXPPOLY_32F_A0,
993 1.476826145939499311386907480374 * EXPPOLY_32F_A0,
994 1.4929077282912648492006435314867 * EXPPOLY_32F_A0,
995 1.5091644275934227397660195510332 * EXPPOLY_32F_A0,
996 1.5255981507445383068512536895169 * EXPPOLY_32F_A0,
997 1.5422108254079408236122918620907 * EXPPOLY_32F_A0,
998 1.5590044002378369670337280894749 * EXPPOLY_32F_A0,
999 1.5759808451078864864552701601819 * EXPPOLY_32F_A0,
1000 1.5931421513422668979372486431191 * EXPPOLY_32F_A0,
1001 1.6104903319492543081795206673574 * EXPPOLY_32F_A0,
1002 1.628027421857347766848218522014 * EXPPOLY_32F_A0,
1003 1.6457554781539648445187567247258 * EXPPOLY_32F_A0,
1004 1.6636765803267364350463364569764 * EXPPOLY_32F_A0,
1005 1.6817928305074290860622509524664 * EXPPOLY_32F_A0,
1006 1.7001063537185234695013625734975 * EXPPOLY_32F_A0,
1007 1.7186192981224779156293443764563 * EXPPOLY_32F_A0,
1008 1.7373338352737062489942020818722 * EXPPOLY_32F_A0,
1009 1.7562521603732994831121606193753 * EXPPOLY_32F_A0,
1010 1.7753764925265212525505592001993 * EXPPOLY_32F_A0,
1011 1.7947090750031071864277032421278 * EXPPOLY_32F_A0,
1012 1.8142521755003987562498346003623 * EXPPOLY_32F_A0,
1013 1.8340080864093424634870831895883 * EXPPOLY_32F_A0,
1014 1.8539791250833855683924530703377 * EXPPOLY_32F_A0,
1015 1.8741676341102999013299989499544 * EXPPOLY_32F_A0,
1016 1.8945759815869656413402186534269 * EXPPOLY_32F_A0,
1017 1.9152065613971472938726112702958 * EXPPOLY_32F_A0,
1018 1.9360617934922944505980559045667 * EXPPOLY_32F_A0,
1019 1.9571441241754002690183222516269 * EXPPOLY_32F_A0,
1020 1.9784560263879509682582499181312 * EXPPOLY_32F_A0,
1024 // the code below uses _mm_cast* intrinsics, which are not avialable on VS2005
1025 #if (defined _MSC_VER && _MSC_VER < 1500) || \
1026 (!defined __APPLE__ && defined __GNUC__ && __GNUC__*100 + __GNUC_MINOR__ < 402)
1031 static const double exp_prescale = 1.4426950408889634073599246810019 * (1 << EXPTAB_SCALE);
1032 static const double exp_postscale = 1./(1 << EXPTAB_SCALE);
1033 static const double exp_max_val = 3000.*(1 << EXPTAB_SCALE); // log10(DBL_MAX) < 3000
1035 static void Exp_32f( const float *_x, float *y, int n )
1038 A4 = (float)(1.000000000000002438532970795181890933776 / EXPPOLY_32F_A0),
1039 A3 = (float)(.6931471805521448196800669615864773144641 / EXPPOLY_32F_A0),
1040 A2 = (float)(.2402265109513301490103372422686535526573 / EXPPOLY_32F_A0),
1041 A1 = (float)(.5550339366753125211915322047004666939128e-1 / EXPPOLY_32F_A0);
1044 #define EXPPOLY(x) \
1045 (((((x) + A1)*(x) + A2)*(x) + A3)*(x) + A4)
1048 const Cv32suf* x = (const Cv32suf*)_x;
1052 if( n >= 8 && USE_SSE2 )
1054 static const __m128d prescale2 = _mm_set1_pd(exp_prescale);
1055 static const __m128 postscale4 = _mm_set1_ps((float)exp_postscale);
1056 static const __m128 maxval4 = _mm_set1_ps((float)(exp_max_val/exp_prescale));
1057 static const __m128 minval4 = _mm_set1_ps((float)(-exp_max_val/exp_prescale));
1059 static const __m128 mA1 = _mm_set1_ps(A1);
1060 static const __m128 mA2 = _mm_set1_ps(A2);
1061 static const __m128 mA3 = _mm_set1_ps(A3);
1062 static const __m128 mA4 = _mm_set1_ps(A4);
1063 bool y_aligned = (size_t)(void*)y % 16 == 0;
1065 ushort CV_DECL_ALIGNED(16) tab_idx[8];
1067 for( ; i <= n - 8; i += 8 )
1070 xf0 = _mm_loadu_ps(&x[i].f);
1071 xf1 = _mm_loadu_ps(&x[i+4].f);
1072 __m128i xi0, xi1, xi2, xi3;
1074 xf0 = _mm_min_ps(_mm_max_ps(xf0, minval4), maxval4);
1075 xf1 = _mm_min_ps(_mm_max_ps(xf1, minval4), maxval4);
1077 __m128d xd0 = _mm_cvtps_pd(xf0);
1078 __m128d xd2 = _mm_cvtps_pd(_mm_movehl_ps(xf0, xf0));
1079 __m128d xd1 = _mm_cvtps_pd(xf1);
1080 __m128d xd3 = _mm_cvtps_pd(_mm_movehl_ps(xf1, xf1));
1082 xd0 = _mm_mul_pd(xd0, prescale2);
1083 xd2 = _mm_mul_pd(xd2, prescale2);
1084 xd1 = _mm_mul_pd(xd1, prescale2);
1085 xd3 = _mm_mul_pd(xd3, prescale2);
1087 xi0 = _mm_cvtpd_epi32(xd0);
1088 xi2 = _mm_cvtpd_epi32(xd2);
1090 xi1 = _mm_cvtpd_epi32(xd1);
1091 xi3 = _mm_cvtpd_epi32(xd3);
1093 xd0 = _mm_sub_pd(xd0, _mm_cvtepi32_pd(xi0));
1094 xd2 = _mm_sub_pd(xd2, _mm_cvtepi32_pd(xi2));
1095 xd1 = _mm_sub_pd(xd1, _mm_cvtepi32_pd(xi1));
1096 xd3 = _mm_sub_pd(xd3, _mm_cvtepi32_pd(xi3));
1098 xf0 = _mm_movelh_ps(_mm_cvtpd_ps(xd0), _mm_cvtpd_ps(xd2));
1099 xf1 = _mm_movelh_ps(_mm_cvtpd_ps(xd1), _mm_cvtpd_ps(xd3));
1101 xf0 = _mm_mul_ps(xf0, postscale4);
1102 xf1 = _mm_mul_ps(xf1, postscale4);
1104 xi0 = _mm_unpacklo_epi64(xi0, xi2);
1105 xi1 = _mm_unpacklo_epi64(xi1, xi3);
1106 xi0 = _mm_packs_epi32(xi0, xi1);
1108 _mm_store_si128((__m128i*)tab_idx, _mm_and_si128(xi0, _mm_set1_epi16(EXPTAB_MASK)));
1110 xi0 = _mm_add_epi16(_mm_srai_epi16(xi0, EXPTAB_SCALE), _mm_set1_epi16(127));
1111 xi0 = _mm_max_epi16(xi0, _mm_setzero_si128());
1112 xi0 = _mm_min_epi16(xi0, _mm_set1_epi16(255));
1113 xi1 = _mm_unpackhi_epi16(xi0, _mm_setzero_si128());
1114 xi0 = _mm_unpacklo_epi16(xi0, _mm_setzero_si128());
1116 __m128d yd0 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[0]), _mm_load_sd(expTab + tab_idx[1]));
1117 __m128d yd1 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[2]), _mm_load_sd(expTab + tab_idx[3]));
1118 __m128d yd2 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[4]), _mm_load_sd(expTab + tab_idx[5]));
1119 __m128d yd3 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[6]), _mm_load_sd(expTab + tab_idx[7]));
1121 __m128 yf0 = _mm_movelh_ps(_mm_cvtpd_ps(yd0), _mm_cvtpd_ps(yd1));
1122 __m128 yf1 = _mm_movelh_ps(_mm_cvtpd_ps(yd2), _mm_cvtpd_ps(yd3));
1124 yf0 = _mm_mul_ps(yf0, _mm_castsi128_ps(_mm_slli_epi32(xi0, 23)));
1125 yf1 = _mm_mul_ps(yf1, _mm_castsi128_ps(_mm_slli_epi32(xi1, 23)));
1127 __m128 zf0 = _mm_add_ps(xf0, mA1);
1128 __m128 zf1 = _mm_add_ps(xf1, mA1);
1130 zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA2);
1131 zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA2);
1133 zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA3);
1134 zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA3);
1136 zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA4);
1137 zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA4);
1139 zf0 = _mm_mul_ps(zf0, yf0);
1140 zf1 = _mm_mul_ps(zf1, yf1);
1144 _mm_store_ps(y + i, zf0);
1145 _mm_store_ps(y + i + 4, zf1);
1149 _mm_storeu_ps(y + i, zf0);
1150 _mm_storeu_ps(y + i + 4, zf1);
1156 for( ; i <= n - 4; i += 4 )
1158 double x0 = x[i].f * exp_prescale;
1159 double x1 = x[i + 1].f * exp_prescale;
1160 double x2 = x[i + 2].f * exp_prescale;
1161 double x3 = x[i + 3].f * exp_prescale;
1162 int val0, val1, val2, val3, t;
1164 if( ((x[i].i >> 23) & 255) > 127 + 10 )
1165 x0 = x[i].i < 0 ? -exp_max_val : exp_max_val;
1167 if( ((x[i+1].i >> 23) & 255) > 127 + 10 )
1168 x1 = x[i+1].i < 0 ? -exp_max_val : exp_max_val;
1170 if( ((x[i+2].i >> 23) & 255) > 127 + 10 )
1171 x2 = x[i+2].i < 0 ? -exp_max_val : exp_max_val;
1173 if( ((x[i+3].i >> 23) & 255) > 127 + 10 )
1174 x3 = x[i+3].i < 0 ? -exp_max_val : exp_max_val;
1181 x0 = (x0 - val0)*exp_postscale;
1182 x1 = (x1 - val1)*exp_postscale;
1183 x2 = (x2 - val2)*exp_postscale;
1184 x3 = (x3 - val3)*exp_postscale;
1186 t = (val0 >> EXPTAB_SCALE) + 127;
1187 t = !(t & ~255) ? t : t < 0 ? 0 : 255;
1190 t = (val1 >> EXPTAB_SCALE) + 127;
1191 t = !(t & ~255) ? t : t < 0 ? 0 : 255;
1194 t = (val2 >> EXPTAB_SCALE) + 127;
1195 t = !(t & ~255) ? t : t < 0 ? 0 : 255;
1198 t = (val3 >> EXPTAB_SCALE) + 127;
1199 t = !(t & ~255) ? t : t < 0 ? 0 : 255;
1202 x0 = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 );
1203 x1 = buf[1].f * expTab[val1 & EXPTAB_MASK] * EXPPOLY( x1 );
1206 y[i + 1] = (float)x1;
1208 x2 = buf[2].f * expTab[val2 & EXPTAB_MASK] * EXPPOLY( x2 );
1209 x3 = buf[3].f * expTab[val3 & EXPTAB_MASK] * EXPPOLY( x3 );
1211 y[i + 2] = (float)x2;
1212 y[i + 3] = (float)x3;
1217 double x0 = x[i].f * exp_prescale;
1220 if( ((x[i].i >> 23) & 255) > 127 + 10 )
1221 x0 = x[i].i < 0 ? -exp_max_val : exp_max_val;
1224 t = (val0 >> EXPTAB_SCALE) + 127;
1225 t = !(t & ~255) ? t : t < 0 ? 0 : 255;
1228 x0 = (x0 - val0)*exp_postscale;
1230 y[i] = (float)(buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY(x0));
1235 static void Exp_64f( const double *_x, double *y, int n )
1238 A5 = .99999999999999999998285227504999 / EXPPOLY_32F_A0,
1239 A4 = .69314718055994546743029643825322 / EXPPOLY_32F_A0,
1240 A3 = .24022650695886477918181338054308 / EXPPOLY_32F_A0,
1241 A2 = .55504108793649567998466049042729e-1 / EXPPOLY_32F_A0,
1242 A1 = .96180973140732918010002372686186e-2 / EXPPOLY_32F_A0,
1243 A0 = .13369713757180123244806654839424e-2 / EXPPOLY_32F_A0;
1246 #define EXPPOLY(x) (((((A0*(x) + A1)*(x) + A2)*(x) + A3)*(x) + A4)*(x) + A5)
1250 const Cv64suf* x = (const Cv64suf*)_x;
1255 static const __m128d prescale2 = _mm_set1_pd(exp_prescale);
1256 static const __m128d postscale2 = _mm_set1_pd(exp_postscale);
1257 static const __m128d maxval2 = _mm_set1_pd(exp_max_val);
1258 static const __m128d minval2 = _mm_set1_pd(-exp_max_val);
1260 static const __m128d mA0 = _mm_set1_pd(A0);
1261 static const __m128d mA1 = _mm_set1_pd(A1);
1262 static const __m128d mA2 = _mm_set1_pd(A2);
1263 static const __m128d mA3 = _mm_set1_pd(A3);
1264 static const __m128d mA4 = _mm_set1_pd(A4);
1265 static const __m128d mA5 = _mm_set1_pd(A5);
1267 int CV_DECL_ALIGNED(16) tab_idx[4];
1269 for( ; i <= n - 4; i += 4 )
1271 __m128d xf0 = _mm_loadu_pd(&x[i].f), xf1 = _mm_loadu_pd(&x[i+2].f);
1273 xf0 = _mm_min_pd(_mm_max_pd(xf0, minval2), maxval2);
1274 xf1 = _mm_min_pd(_mm_max_pd(xf1, minval2), maxval2);
1275 xf0 = _mm_mul_pd(xf0, prescale2);
1276 xf1 = _mm_mul_pd(xf1, prescale2);
1278 xi0 = _mm_cvtpd_epi32(xf0);
1279 xi1 = _mm_cvtpd_epi32(xf1);
1280 xf0 = _mm_mul_pd(_mm_sub_pd(xf0, _mm_cvtepi32_pd(xi0)), postscale2);
1281 xf1 = _mm_mul_pd(_mm_sub_pd(xf1, _mm_cvtepi32_pd(xi1)), postscale2);
1283 xi0 = _mm_unpacklo_epi64(xi0, xi1);
1284 _mm_store_si128((__m128i*)tab_idx, _mm_and_si128(xi0, _mm_set1_epi32(EXPTAB_MASK)));
1286 xi0 = _mm_add_epi32(_mm_srai_epi32(xi0, EXPTAB_SCALE), _mm_set1_epi32(1023));
1287 xi0 = _mm_packs_epi32(xi0, xi0);
1288 xi0 = _mm_max_epi16(xi0, _mm_setzero_si128());
1289 xi0 = _mm_min_epi16(xi0, _mm_set1_epi16(2047));
1290 xi0 = _mm_unpacklo_epi16(xi0, _mm_setzero_si128());
1291 xi1 = _mm_unpackhi_epi32(xi0, _mm_setzero_si128());
1292 xi0 = _mm_unpacklo_epi32(xi0, _mm_setzero_si128());
1294 __m128d yf0 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[0]), _mm_load_sd(expTab + tab_idx[1]));
1295 __m128d yf1 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[2]), _mm_load_sd(expTab + tab_idx[3]));
1296 yf0 = _mm_mul_pd(yf0, _mm_castsi128_pd(_mm_slli_epi64(xi0, 52)));
1297 yf1 = _mm_mul_pd(yf1, _mm_castsi128_pd(_mm_slli_epi64(xi1, 52)));
1299 __m128d zf0 = _mm_add_pd(_mm_mul_pd(mA0, xf0), mA1);
1300 __m128d zf1 = _mm_add_pd(_mm_mul_pd(mA0, xf1), mA1);
1302 zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA2);
1303 zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA2);
1305 zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA3);
1306 zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA3);
1308 zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA4);
1309 zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA4);
1311 zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA5);
1312 zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA5);
1314 zf0 = _mm_mul_pd(zf0, yf0);
1315 zf1 = _mm_mul_pd(zf1, yf1);
1317 _mm_storeu_pd(y + i, zf0);
1318 _mm_storeu_pd(y + i + 2, zf1);
1323 for( ; i <= n - 4; i += 4 )
1325 double x0 = x[i].f * exp_prescale;
1326 double x1 = x[i + 1].f * exp_prescale;
1327 double x2 = x[i + 2].f * exp_prescale;
1328 double x3 = x[i + 3].f * exp_prescale;
1330 double y0, y1, y2, y3;
1331 int val0, val1, val2, val3, t;
1333 t = (int)(x[i].i >> 52);
1334 if( (t & 2047) > 1023 + 10 )
1335 x0 = t < 0 ? -exp_max_val : exp_max_val;
1337 t = (int)(x[i+1].i >> 52);
1338 if( (t & 2047) > 1023 + 10 )
1339 x1 = t < 0 ? -exp_max_val : exp_max_val;
1341 t = (int)(x[i+2].i >> 52);
1342 if( (t & 2047) > 1023 + 10 )
1343 x2 = t < 0 ? -exp_max_val : exp_max_val;
1345 t = (int)(x[i+3].i >> 52);
1346 if( (t & 2047) > 1023 + 10 )
1347 x3 = t < 0 ? -exp_max_val : exp_max_val;
1354 x0 = (x0 - val0)*exp_postscale;
1355 x1 = (x1 - val1)*exp_postscale;
1356 x2 = (x2 - val2)*exp_postscale;
1357 x3 = (x3 - val3)*exp_postscale;
1359 t = (val0 >> EXPTAB_SCALE) + 1023;
1360 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
1361 buf[0].i = (int64)t << 52;
1363 t = (val1 >> EXPTAB_SCALE) + 1023;
1364 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
1365 buf[1].i = (int64)t << 52;
1367 t = (val2 >> EXPTAB_SCALE) + 1023;
1368 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
1369 buf[2].i = (int64)t << 52;
1371 t = (val3 >> EXPTAB_SCALE) + 1023;
1372 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
1373 buf[3].i = (int64)t << 52;
1375 y0 = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 );
1376 y1 = buf[1].f * expTab[val1 & EXPTAB_MASK] * EXPPOLY( x1 );
1381 y2 = buf[2].f * expTab[val2 & EXPTAB_MASK] * EXPPOLY( x2 );
1382 y3 = buf[3].f * expTab[val3 & EXPTAB_MASK] * EXPPOLY( x3 );
1390 double x0 = x[i].f * exp_prescale;
1393 t = (int)(x[i].i >> 52);
1394 if( (t & 2047) > 1023 + 10 )
1395 x0 = t < 0 ? -exp_max_val : exp_max_val;
1398 t = (val0 >> EXPTAB_SCALE) + 1023;
1399 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
1401 buf[0].i = (int64)t << 52;
1402 x0 = (x0 - val0)*exp_postscale;
1404 y[i] = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 );
1410 #undef EXPPOLY_32F_A0
1413 static void Exp_32f_ipp(const float *x, float *y, int n)
1417 if (0 <= ippsExp_32f_A21(x, y, n))
1419 CV_IMPL_ADD(CV_IMPL_IPP);
1422 setIppErrorStatus();
1427 static void Exp_64f_ipp(const double *x, double *y, int n)
1431 if (0 <= ippsExp_64f_A50(x, y, n))
1433 CV_IMPL_ADD(CV_IMPL_IPP);
1436 setIppErrorStatus();
1441 #define Exp_32f Exp_32f_ipp
1442 #define Exp_64f Exp_64f_ipp
1446 void exp( InputArray _src, OutputArray _dst )
1448 int type = _src.type(), depth = _src.depth(), cn = _src.channels();
1449 CV_Assert( depth == CV_32F || depth == CV_64F );
1451 CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
1452 ocl_math_op(_src, noArray(), _dst, OCL_OP_EXP))
1454 Mat src = _src.getMat();
1455 _dst.create( src.dims, src.size, type );
1456 Mat dst = _dst.getMat();
1458 const Mat* arrays[] = {&src, &dst, 0};
1460 NAryMatIterator it(arrays, ptrs);
1461 int len = (int)(it.size*cn);
1463 for( size_t i = 0; i < it.nplanes; i++, ++it )
1465 if( depth == CV_32F )
1466 Exp_32f((const float*)ptrs[0], (float*)ptrs[1], len);
1468 Exp_64f((const double*)ptrs[0], (double*)ptrs[1], len);
1473 /****************************************************************************************\
1475 \****************************************************************************************/
1477 #define LOGTAB_SCALE 8
1478 #define LOGTAB_MASK ((1 << LOGTAB_SCALE) - 1)
1479 #define LOGTAB_MASK2 ((1 << (20 - LOGTAB_SCALE)) - 1)
1480 #define LOGTAB_MASK2_32F ((1 << (23 - LOGTAB_SCALE)) - 1)
1482 static const double CV_DECL_ALIGNED(16) icvLogTab[] = {
1483 0.0000000000000000000000000000000000000000, 1.000000000000000000000000000000000000000,
1484 .00389864041565732288852075271279318258166, .9961089494163424124513618677042801556420,
1485 .00778214044205494809292034119607706088573, .9922480620155038759689922480620155038760,
1486 .01165061721997527263705585198749759001657, .9884169884169884169884169884169884169884,
1487 .01550418653596525274396267235488267033361, .9846153846153846153846153846153846153846,
1488 .01934296284313093139406447562578250654042, .9808429118773946360153256704980842911877,
1489 .02316705928153437593630670221500622574241, .9770992366412213740458015267175572519084,
1490 .02697658769820207233514075539915211265906, .9733840304182509505703422053231939163498,
1491 .03077165866675368732785500469617545604706, .9696969696969696969696969696969696969697,
1492 .03455238150665972812758397481047722976656, .9660377358490566037735849056603773584906,
1493 .03831886430213659461285757856785494368522, .9624060150375939849624060150375939849624,
1494 .04207121392068705056921373852674150839447, .9588014981273408239700374531835205992509,
1495 .04580953603129420126371940114040626212953, .9552238805970149253731343283582089552239,
1496 .04953393512227662748292900118940451648088, .9516728624535315985130111524163568773234,
1497 .05324451451881227759255210685296333394944, .9481481481481481481481481481481481481481,
1498 .05694137640013842427411105973078520037234, .9446494464944649446494464944649446494465,
1499 .06062462181643483993820353816772694699466, .9411764705882352941176470588235294117647,
1500 .06429435070539725460836422143984236754475, .9377289377289377289377289377289377289377,
1501 .06795066190850773679699159401934593915938, .9343065693430656934306569343065693430657,
1502 .07159365318700880442825962290953611955044, .9309090909090909090909090909090909090909,
1503 .07522342123758751775142172846244648098944, .9275362318840579710144927536231884057971,
1504 .07884006170777602129362549021607264876369, .9241877256317689530685920577617328519856,
1505 .08244366921107458556772229485432035289706, .9208633093525179856115107913669064748201,
1506 .08603433734180314373940490213499288074675, .9175627240143369175627240143369175627240,
1507 .08961215868968712416897659522874164395031, .9142857142857142857142857142857142857143,
1508 .09317722485418328259854092721070628613231, .9110320284697508896797153024911032028470,
1509 .09672962645855109897752299730200320482256, .9078014184397163120567375886524822695035,
1510 .10026945316367513738597949668474029749630, .9045936395759717314487632508833922261484,
1511 .10379679368164355934833764649738441221420, .9014084507042253521126760563380281690141,
1512 .10731173578908805021914218968959175981580, .8982456140350877192982456140350877192982,
1513 .11081436634029011301105782649756292812530, .8951048951048951048951048951048951048951,
1514 .11430477128005862852422325204315711744130, .8919860627177700348432055749128919860627,
1515 .11778303565638344185817487641543266363440, .8888888888888888888888888888888888888889,
1516 .12124924363286967987640707633545389398930, .8858131487889273356401384083044982698962,
1517 .12470347850095722663787967121606925502420, .8827586206896551724137931034482758620690,
1518 .12814582269193003360996385708858724683530, .8797250859106529209621993127147766323024,
1519 .13157635778871926146571524895989568904040, .8767123287671232876712328767123287671233,
1520 .13499516453750481925766280255629681050780, .8737201365187713310580204778156996587031,
1521 .13840232285911913123754857224412262439730, .8707482993197278911564625850340136054422,
1522 .14179791186025733629172407290752744302150, .8677966101694915254237288135593220338983,
1523 .14518200984449788903951628071808954700830, .8648648648648648648648648648648648648649,
1524 .14855469432313711530824207329715136438610, .8619528619528619528619528619528619528620,
1525 .15191604202584196858794030049466527998450, .8590604026845637583892617449664429530201,
1526 .15526612891112392955683674244937719777230, .8561872909698996655518394648829431438127,
1527 .15860503017663857283636730244325008243330, .8533333333333333333333333333333333333333,
1528 .16193282026931324346641360989451641216880, .8504983388704318936877076411960132890365,
1529 .16524957289530714521497145597095368430010, .8476821192052980132450331125827814569536,
1530 .16855536102980664403538924034364754334090, .8448844884488448844884488448844884488449,
1531 .17185025692665920060697715143760433420540, .8421052631578947368421052631578947368421,
1532 .17513433212784912385018287750426679849630, .8393442622950819672131147540983606557377,
1533 .17840765747281828179637841458315961062910, .8366013071895424836601307189542483660131,
1534 .18167030310763465639212199675966985523700, .8338762214983713355048859934853420195440,
1535 .18492233849401198964024217730184318497780, .8311688311688311688311688311688311688312,
1536 .18816383241818296356839823602058459073300, .8284789644012944983818770226537216828479,
1537 .19139485299962943898322009772527962923050, .8258064516129032258064516129032258064516,
1538 .19461546769967164038916962454095482826240, .8231511254019292604501607717041800643087,
1539 .19782574332991986754137769821682013571260, .8205128205128205128205128205128205128205,
1540 .20102574606059073203390141770796617493040, .8178913738019169329073482428115015974441,
1541 .20421554142869088876999228432396193966280, .8152866242038216560509554140127388535032,
1542 .20739519434607056602715147164417430758480, .8126984126984126984126984126984126984127,
1543 .21056476910734961416338251183333341032260, .8101265822784810126582278481012658227848,
1544 .21372432939771812687723695489694364368910, .8075709779179810725552050473186119873817,
1545 .21687393830061435506806333251006435602900, .8050314465408805031446540880503144654088,
1546 .22001365830528207823135744547471404075630, .8025078369905956112852664576802507836991,
1547 .22314355131420973710199007200571941211830, .8000000000000000000000000000000000000000,
1548 .22626367865045338145790765338460914790630, .7975077881619937694704049844236760124611,
1549 .22937410106484582006380890106811420992010, .7950310559006211180124223602484472049689,
1550 .23247487874309405442296849741978803649550, .7925696594427244582043343653250773993808,
1551 .23556607131276688371634975283086532726890, .7901234567901234567901234567901234567901,
1552 .23864773785017498464178231643018079921600, .7876923076923076923076923076923076923077,
1553 .24171993688714515924331749374687206000090, .7852760736196319018404907975460122699387,
1554 .24478272641769091566565919038112042471760, .7828746177370030581039755351681957186544,
1555 .24783616390458124145723672882013488560910, .7804878048780487804878048780487804878049,
1556 .25088030628580937353433455427875742316250, .7781155015197568389057750759878419452888,
1557 .25391520998096339667426946107298135757450, .7757575757575757575757575757575757575758,
1558 .25694093089750041913887912414793390780680, .7734138972809667673716012084592145015106,
1559 .25995752443692604627401010475296061486000, .7710843373493975903614457831325301204819,
1560 .26296504550088134477547896494797896593800, .7687687687687687687687687687687687687688,
1561 .26596354849713793599974565040611196309330, .7664670658682634730538922155688622754491,
1562 .26895308734550393836570947314612567424780, .7641791044776119402985074626865671641791,
1563 .27193371548364175804834985683555714786050, .7619047619047619047619047619047619047619,
1564 .27490548587279922676529508862586226314300, .7596439169139465875370919881305637982196,
1565 .27786845100345625159121709657483734190480, .7573964497041420118343195266272189349112,
1566 .28082266290088775395616949026589281857030, .7551622418879056047197640117994100294985,
1567 .28376817313064456316240580235898960381750, .7529411764705882352941176470588235294118,
1568 .28670503280395426282112225635501090437180, .7507331378299120234604105571847507331378,
1569 .28963329258304265634293983566749375313530, .7485380116959064327485380116959064327485,
1570 .29255300268637740579436012922087684273730, .7463556851311953352769679300291545189504,
1571 .29546421289383584252163927885703742504130, .7441860465116279069767441860465116279070,
1572 .29836697255179722709783618483925238251680, .7420289855072463768115942028985507246377,
1573 .30126133057816173455023545102449133992200, .7398843930635838150289017341040462427746,
1574 .30414733546729666446850615102448500692850, .7377521613832853025936599423631123919308,
1575 .30702503529491181888388950937951449304830, .7356321839080459770114942528735632183908,
1576 .30989447772286465854207904158101882785550, .7335243553008595988538681948424068767908,
1577 .31275571000389684739317885942000430077330, .7314285714285714285714285714285714285714,
1578 .31560877898630329552176476681779604405180, .7293447293447293447293447293447293447293,
1579 .31845373111853458869546784626436419785030, .7272727272727272727272727272727272727273,
1580 .32129061245373424782201254856772720813750, .7252124645892351274787535410764872521246,
1581 .32411946865421192853773391107097268104550, .7231638418079096045197740112994350282486,
1582 .32694034499585328257253991068864706903700, .7211267605633802816901408450704225352113,
1583 .32975328637246797969240219572384376078850, .7191011235955056179775280898876404494382,
1584 .33255833730007655635318997155991382896900, .7170868347338935574229691876750700280112,
1585 .33535554192113781191153520921943709254280, .7150837988826815642458100558659217877095,
1586 .33814494400871636381467055798566434532400, .7130919220055710306406685236768802228412,
1587 .34092658697059319283795275623560883104800, .7111111111111111111111111111111111111111,
1588 .34370051385331840121395430287520866841080, .7091412742382271468144044321329639889197,
1589 .34646676734620857063262633346312213689100, .7071823204419889502762430939226519337017,
1590 .34922538978528827602332285096053965389730, .7052341597796143250688705234159779614325,
1591 .35197642315717814209818925519357435405250, .7032967032967032967032967032967032967033,
1592 .35471990910292899856770532096561510115850, .7013698630136986301369863013698630136986,
1593 .35745588892180374385176833129662554711100, .6994535519125683060109289617486338797814,
1594 .36018440357500774995358483465679455548530, .6975476839237057220708446866485013623978,
1595 .36290549368936841911903457003063522279280, .6956521739130434782608695652173913043478,
1596 .36561919956096466943762379742111079394830, .6937669376693766937669376693766937669377,
1597 .36832556115870762614150635272380895912650, .6918918918918918918918918918918918918919,
1598 .37102461812787262962487488948681857436900, .6900269541778975741239892183288409703504,
1599 .37371640979358405898480555151763837784530, .6881720430107526881720430107526881720430,
1600 .37640097516425302659470730759494472295050, .6863270777479892761394101876675603217158,
1601 .37907835293496944251145919224654790014030, .6844919786096256684491978609625668449198,
1602 .38174858149084833769393299007788300514230, .6826666666666666666666666666666666666667,
1603 .38441169891033200034513583887019194662580, .6808510638297872340425531914893617021277,
1604 .38706774296844825844488013899535872042180, .6790450928381962864721485411140583554377,
1605 .38971675114002518602873692543653305619950, .6772486772486772486772486772486772486772,
1606 .39235876060286384303665840889152605086580, .6754617414248021108179419525065963060686,
1607 .39499380824086893770896722344332374632350, .6736842105263157894736842105263157894737,
1608 .39762193064713846624158577469643205404280, .6719160104986876640419947506561679790026,
1609 .40024316412701266276741307592601515352730, .6701570680628272251308900523560209424084,
1610 .40285754470108348090917615991202183067800, .6684073107049608355091383812010443864230,
1611 .40546510810816432934799991016916465014230, .6666666666666666666666666666666666666667,
1612 .40806588980822172674223224930756259709600, .6649350649350649350649350649350649350649,
1613 .41065992498526837639616360320360399782650, .6632124352331606217616580310880829015544,
1614 .41324724855021932601317757871584035456180, .6614987080103359173126614987080103359173,
1615 .41582789514371093497757669865677598863850, .6597938144329896907216494845360824742268,
1616 .41840189913888381489925905043492093682300, .6580976863753213367609254498714652956298,
1617 .42096929464412963239894338585145305842150, .6564102564102564102564102564102564102564,
1618 .42353011550580327293502591601281892508280, .6547314578005115089514066496163682864450,
1619 .42608439531090003260516141381231136620050, .6530612244897959183673469387755102040816,
1620 .42863216738969872610098832410585600882780, .6513994910941475826972010178117048346056,
1621 .43117346481837132143866142541810404509300, .6497461928934010152284263959390862944162,
1622 .43370832042155937902094819946796633303180, .6481012658227848101265822784810126582278,
1623 .43623676677491801667585491486534010618930, .6464646464646464646464646464646464646465,
1624 .43875883620762790027214350629947148263450, .6448362720403022670025188916876574307305,
1625 .44127456080487520440058801796112675219780, .6432160804020100502512562814070351758794,
1626 .44378397241030093089975139264424797147500, .6416040100250626566416040100250626566416,
1627 .44628710262841947420398014401143882423650, .6400000000000000000000000000000000000000,
1628 .44878398282700665555822183705458883196130, .6384039900249376558603491271820448877805,
1629 .45127464413945855836729492693848442286250, .6368159203980099502487562189054726368159,
1630 .45375911746712049854579618113348260521900, .6352357320099255583126550868486352357320,
1631 .45623743348158757315857769754074979573500, .6336633663366336633663366336633663366337,
1632 .45870962262697662081833982483658473938700, .6320987654320987654320987654320987654321,
1633 .46117571512217014895185229761409573256980, .6305418719211822660098522167487684729064,
1634 .46363574096303250549055974261136725544930, .6289926289926289926289926289926289926290,
1635 .46608972992459918316399125615134835243230, .6274509803921568627450980392156862745098,
1636 .46853771156323925639597405279346276074650, .6259168704156479217603911980440097799511,
1637 .47097971521879100631480241645476780831830, .6243902439024390243902439024390243902439,
1638 .47341577001667212165614273544633761048330, .6228710462287104622871046228710462287105,
1639 .47584590486996386493601107758877333253630, .6213592233009708737864077669902912621359,
1640 .47827014848147025860569669930555392056700, .6198547215496368038740920096852300242131,
1641 .48068852934575190261057286988943815231330, .6183574879227053140096618357487922705314,
1642 .48310107575113581113157579238759353756900, .6168674698795180722891566265060240963855,
1643 .48550781578170076890899053978500887751580, .6153846153846153846153846153846153846154,
1644 .48790877731923892879351001283794175833480, .6139088729016786570743405275779376498801,
1645 .49030398804519381705802061333088204264650, .6124401913875598086124401913875598086124,
1646 .49269347544257524607047571407747454941280, .6109785202863961813842482100238663484487,
1647 .49507726679785146739476431321236304938800, .6095238095238095238095238095238095238095,
1648 .49745538920281889838648226032091770321130, .6080760095011876484560570071258907363420,
1649 .49982786955644931126130359189119189977650, .6066350710900473933649289099526066350711,
1650 .50219473456671548383667413872899487614650, .6052009456264775413711583924349881796690,
1651 .50455601075239520092452494282042607665050, .6037735849056603773584905660377358490566,
1652 .50691172444485432801997148999362252652650, .6023529411764705882352941176470588235294,
1653 .50926190178980790257412536448100581765150, .6009389671361502347417840375586854460094,
1654 .51160656874906207391973111953120678663250, .5995316159250585480093676814988290398126,
1655 .51394575110223428282552049495279788970950, .5981308411214953271028037383177570093458,
1656 .51627947444845445623684554448118433356300, .5967365967365967365967365967365967365967,
1657 .51860776420804555186805373523384332656850, .5953488372093023255813953488372093023256,
1658 .52093064562418522900344441950437612831600, .5939675174013921113689095127610208816705,
1659 .52324814376454775732838697877014055848100, .5925925925925925925925925925925925925926,
1660 .52556028352292727401362526507000438869000, .5912240184757505773672055427251732101617,
1661 .52786708962084227803046587723656557500350, .5898617511520737327188940092165898617512,
1662 .53016858660912158374145519701414741575700, .5885057471264367816091954022988505747126,
1663 .53246479886947173376654518506256863474850, .5871559633027522935779816513761467889908,
1664 .53475575061602764748158733709715306758900, .5858123569794050343249427917620137299771,
1665 .53704146589688361856929077475797384977350, .5844748858447488584474885844748858447489,
1666 .53932196859560876944783558428753167390800, .5831435079726651480637813211845102505695,
1667 .54159728243274429804188230264117009937750, .5818181818181818181818181818181818181818,
1668 .54386743096728351609669971367111429572100, .5804988662131519274376417233560090702948,
1669 .54613243759813556721383065450936555862450, .5791855203619909502262443438914027149321,
1670 .54839232556557315767520321969641372561450, .5778781038374717832957110609480812641084,
1671 .55064711795266219063194057525834068655950, .5765765765765765765765765765765765765766,
1672 .55289683768667763352766542084282264113450, .5752808988764044943820224719101123595506,
1673 .55514150754050151093110798683483153581600, .5739910313901345291479820627802690582960,
1674 .55738115013400635344709144192165695130850, .5727069351230425055928411633109619686801,
1675 .55961578793542265941596269840374588966350, .5714285714285714285714285714285714285714,
1676 .56184544326269181269140062795486301183700, .5701559020044543429844097995545657015590,
1677 .56407013828480290218436721261241473257550, .5688888888888888888888888888888888888889,
1678 .56628989502311577464155334382667206227800, .5676274944567627494456762749445676274945,
1679 .56850473535266865532378233183408156037350, .5663716814159292035398230088495575221239,
1680 .57071468100347144680739575051120482385150, .5651214128035320088300220750551876379691,
1681 .57291975356178548306473885531886480748650, .5638766519823788546255506607929515418502,
1682 .57511997447138785144460371157038025558000, .5626373626373626373626373626373626373626,
1683 .57731536503482350219940144597785547375700, .5614035087719298245614035087719298245614,
1684 .57950594641464214795689713355386629700650, .5601750547045951859956236323851203501094,
1685 .58169173963462239562716149521293118596100, .5589519650655021834061135371179039301310,
1686 .58387276558098266665552955601015128195300, .5577342047930283224400871459694989106754,
1687 .58604904500357812846544902640744112432000, .5565217391304347826086956521739130434783,
1688 .58822059851708596855957011939608491957200, .5553145336225596529284164859002169197397,
1689 .59038744660217634674381770309992134571100, .5541125541125541125541125541125541125541,
1690 .59254960960667157898740242671919986605650, .5529157667386609071274298056155507559395,
1691 .59470710774669277576265358220553025603300, .5517241379310344827586206896551724137931,
1692 .59685996110779382384237123915227130055450, .5505376344086021505376344086021505376344,
1693 .59900818964608337768851242799428291618800, .5493562231759656652360515021459227467811,
1694 .60115181318933474940990890900138765573500, .5481798715203426124197002141327623126338,
1695 .60329085143808425240052883964381180703650, .5470085470085470085470085470085470085470,
1696 .60542532396671688843525771517306566238400, .5458422174840085287846481876332622601279,
1697 .60755525022454170969155029524699784815300, .5446808510638297872340425531914893617021,
1698 .60968064953685519036241657886421307921400, .5435244161358811040339702760084925690021,
1699 .61180154110599282990534675263916142284850, .5423728813559322033898305084745762711864,
1700 .61391794401237043121710712512140162289150, .5412262156448202959830866807610993657505,
1701 .61602987721551394351138242200249806046500, .5400843881856540084388185654008438818565,
1702 .61813735955507864705538167982012964785100, .5389473684210526315789473684210526315789,
1703 .62024040975185745772080281312810257077200, .5378151260504201680672268907563025210084,
1704 .62233904640877868441606324267922900617100, .5366876310272536687631027253668763102725,
1705 .62443328801189346144440150965237990021700, .5355648535564853556485355648535564853556,
1706 .62652315293135274476554741340805776417250, .5344467640918580375782881002087682672234,
1707 .62860865942237409420556559780379757285100, .5333333333333333333333333333333333333333,
1708 .63068982562619868570408243613201193511500, .5322245322245322245322245322245322245322,
1709 .63276666957103777644277897707070223987100, .5311203319502074688796680497925311203320,
1710 .63483920917301017716738442686619237065300, .5300207039337474120082815734989648033126,
1711 .63690746223706917739093569252872839570050, .5289256198347107438016528925619834710744,
1712 .63897144645792069983514238629140891134750, .5278350515463917525773195876288659793814,
1713 .64103117942093124081992527862894348800200, .5267489711934156378600823045267489711934,
1714 .64308667860302726193566513757104985415950, .5256673511293634496919917864476386036961,
1715 .64513796137358470073053240412264131009600, .5245901639344262295081967213114754098361,
1716 .64718504499530948859131740391603671014300, .5235173824130879345603271983640081799591,
1717 .64922794662510974195157587018911726772800, .5224489795918367346938775510204081632653,
1718 .65126668331495807251485530287027359008800, .5213849287169042769857433808553971486762,
1719 .65330127201274557080523663898929953575150, .5203252032520325203252032520325203252033,
1720 .65533172956312757406749369692988693714150, .5192697768762677484787018255578093306288,
1721 .65735807270835999727154330685152672231200, .5182186234817813765182186234817813765182,
1722 .65938031808912778153342060249997302889800, .5171717171717171717171717171717171717172,
1723 .66139848224536490484126716182800009846700, .5161290322580645161290322580645161290323,
1724 .66341258161706617713093692145776003599150, .5150905432595573440643863179074446680080,
1725 .66542263254509037562201001492212526500250, .5140562248995983935742971887550200803213,
1726 .66742865127195616370414654738851822912700, .5130260521042084168336673346693386773547,
1727 .66943065394262923906154583164607174694550, .5120000000000000000000000000000000000000,
1728 .67142865660530226534774556057527661323550, .5109780439121756487025948103792415169661,
1729 .67342267521216669923234121597488410770900, .5099601593625498007968127490039840637450,
1730 .67541272562017662384192817626171745359900, .5089463220675944333996023856858846918489,
1731 .67739882359180603188519853574689477682100, .5079365079365079365079365079365079365079,
1732 .67938098479579733801614338517538271844400, .5069306930693069306930693069306930693069,
1733 .68135922480790300781450241629499942064300, .5059288537549407114624505928853754940711,
1734 .68333355911162063645036823800182901322850, .5049309664694280078895463510848126232742,
1735 .68530400309891936760919861626462079584600, .5039370078740157480314960629921259842520,
1736 .68727057207096020619019327568821609020250, .5029469548133595284872298624754420432220,
1737 .68923328123880889251040571252815425395950, .5019607843137254901960784313725490196078,
1738 .69314718055994530941723212145818, 5.0e-01,
1743 #define LOGTAB_TRANSLATE(x,h) (((x) - 1.)*icvLogTab[(h)+1])
1744 static const double ln_2 = 0.69314718055994530941723212145818;
1746 static void Log_32f( const float *_x, float *y, int n )
1748 static const float shift[] = { 0, -1.f/512 };
1750 A0 = 0.3333333333333333333333333f,
1755 #define LOGPOLY(x) (((A0*(x) + A1)*(x) + A2)*(x))
1759 const int* x = (const int*)_x;
1764 static const __m128d ln2_2 = _mm_set1_pd(ln_2);
1765 static const __m128 _1_4 = _mm_set1_ps(1.f);
1766 static const __m128 shift4 = _mm_set1_ps(-1.f/512);
1768 static const __m128 mA0 = _mm_set1_ps(A0);
1769 static const __m128 mA1 = _mm_set1_ps(A1);
1770 static const __m128 mA2 = _mm_set1_ps(A2);
1772 int CV_DECL_ALIGNED(16) idx[4];
1774 for( ; i <= n - 4; i += 4 )
1776 __m128i h0 = _mm_loadu_si128((const __m128i*)(x + i));
1777 __m128i yi0 = _mm_sub_epi32(_mm_and_si128(_mm_srli_epi32(h0, 23), _mm_set1_epi32(255)), _mm_set1_epi32(127));
1778 __m128d yd0 = _mm_mul_pd(_mm_cvtepi32_pd(yi0), ln2_2);
1779 __m128d yd1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_unpackhi_epi64(yi0,yi0)), ln2_2);
1781 __m128i xi0 = _mm_or_si128(_mm_and_si128(h0, _mm_set1_epi32(LOGTAB_MASK2_32F)), _mm_set1_epi32(127 << 23));
1783 h0 = _mm_and_si128(_mm_srli_epi32(h0, 23 - LOGTAB_SCALE - 1), _mm_set1_epi32(LOGTAB_MASK*2));
1784 _mm_store_si128((__m128i*)idx, h0);
1785 h0 = _mm_cmpeq_epi32(h0, _mm_set1_epi32(510));
1787 __m128d t0, t1, t2, t3, t4;
1788 t0 = _mm_load_pd(icvLogTab + idx[0]);
1789 t2 = _mm_load_pd(icvLogTab + idx[1]);
1790 t1 = _mm_unpackhi_pd(t0, t2);
1791 t0 = _mm_unpacklo_pd(t0, t2);
1792 t2 = _mm_load_pd(icvLogTab + idx[2]);
1793 t4 = _mm_load_pd(icvLogTab + idx[3]);
1794 t3 = _mm_unpackhi_pd(t2, t4);
1795 t2 = _mm_unpacklo_pd(t2, t4);
1797 yd0 = _mm_add_pd(yd0, t0);
1798 yd1 = _mm_add_pd(yd1, t2);
1800 __m128 yf0 = _mm_movelh_ps(_mm_cvtpd_ps(yd0), _mm_cvtpd_ps(yd1));
1802 __m128 xf0 = _mm_sub_ps(_mm_castsi128_ps(xi0), _1_4);
1803 xf0 = _mm_mul_ps(xf0, _mm_movelh_ps(_mm_cvtpd_ps(t1), _mm_cvtpd_ps(t3)));
1804 xf0 = _mm_add_ps(xf0, _mm_and_ps(_mm_castsi128_ps(h0), shift4));
1806 __m128 zf0 = _mm_mul_ps(xf0, mA0);
1807 zf0 = _mm_mul_ps(_mm_add_ps(zf0, mA1), xf0);
1808 zf0 = _mm_mul_ps(_mm_add_ps(zf0, mA2), xf0);
1809 yf0 = _mm_add_ps(yf0, zf0);
1811 _mm_storeu_ps(y + i, yf0);
1816 for( ; i <= n - 4; i += 4 )
1818 double x0, x1, x2, x3;
1819 double y0, y1, y2, y3;
1824 buf[0].i = (h0 & LOGTAB_MASK2_32F) | (127 << 23);
1825 buf[1].i = (h1 & LOGTAB_MASK2_32F) | (127 << 23);
1827 y0 = (((h0 >> 23) & 0xff) - 127) * ln_2;
1828 y1 = (((h1 >> 23) & 0xff) - 127) * ln_2;
1830 h0 = (h0 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1831 h1 = (h1 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1833 y0 += icvLogTab[h0];
1834 y1 += icvLogTab[h1];
1839 x0 = LOGTAB_TRANSLATE( buf[0].f, h0 );
1840 x1 = LOGTAB_TRANSLATE( buf[1].f, h1 );
1842 buf[2].i = (h2 & LOGTAB_MASK2_32F) | (127 << 23);
1843 buf[3].i = (h3 & LOGTAB_MASK2_32F) | (127 << 23);
1845 y2 = (((h2 >> 23) & 0xff) - 127) * ln_2;
1846 y3 = (((h3 >> 23) & 0xff) - 127) * ln_2;
1848 h2 = (h2 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1849 h3 = (h3 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1851 y2 += icvLogTab[h2];
1852 y3 += icvLogTab[h3];
1854 x2 = LOGTAB_TRANSLATE( buf[2].f, h2 );
1855 x3 = LOGTAB_TRANSLATE( buf[3].f, h3 );
1857 x0 += shift[h0 == 510];
1858 x1 += shift[h1 == 510];
1859 y0 += LOGPOLY( x0 );
1860 y1 += LOGPOLY( x1 );
1863 y[i + 1] = (float) y1;
1865 x2 += shift[h2 == 510];
1866 x3 += shift[h3 == 510];
1867 y2 += LOGPOLY( x2 );
1868 y3 += LOGPOLY( x3 );
1870 y[i + 2] = (float) y2;
1871 y[i + 3] = (float) y3;
1880 y0 = (((h0 >> 23) & 0xff) - 127) * ln_2;
1882 buf[0].i = (h0 & LOGTAB_MASK2_32F) | (127 << 23);
1883 h0 = (h0 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1885 y0 += icvLogTab[h0];
1886 x0 = (float)LOGTAB_TRANSLATE( buf[0].f, h0 );
1887 x0 += shift[h0 == 510];
1888 y0 += LOGPOLY( x0 );
1895 static void Log_64f( const double *x, double *y, int n )
1897 static const double shift[] = { 0, -1./512 };
1901 A5 = 0.333333333333333314829616256247390992939472198486328125,
1904 A2 = -0.1666666666666666574148081281236954964697360992431640625,
1905 A1 = 0.1428571428571428769682682968777953647077083587646484375,
1909 #define LOGPOLY(x,k) ((x)+=shift[k], xq = (x)*(x),\
1910 (((A0*xq + A2)*xq + A4)*xq + A6)*xq + \
1911 (((A1*xq + A3)*xq + A5)*xq + A7)*(x))
1915 DBLINT *X = (DBLINT *) x;
1920 static const __m128d ln2_2 = _mm_set1_pd(ln_2);
1921 static const __m128d _1_2 = _mm_set1_pd(1.);
1922 static const __m128d shift2 = _mm_set1_pd(-1./512);
1924 static const __m128i log_and_mask2 = _mm_set_epi32(LOGTAB_MASK2, 0xffffffff, LOGTAB_MASK2, 0xffffffff);
1925 static const __m128i log_or_mask2 = _mm_set_epi32(1023 << 20, 0, 1023 << 20, 0);
1927 static const __m128d mA0 = _mm_set1_pd(A0);
1928 static const __m128d mA1 = _mm_set1_pd(A1);
1929 static const __m128d mA2 = _mm_set1_pd(A2);
1930 static const __m128d mA3 = _mm_set1_pd(A3);
1931 static const __m128d mA4 = _mm_set1_pd(A4);
1932 static const __m128d mA5 = _mm_set1_pd(A5);
1933 static const __m128d mA6 = _mm_set1_pd(A6);
1934 static const __m128d mA7 = _mm_set1_pd(A7);
1936 int CV_DECL_ALIGNED(16) idx[4];
1938 for( ; i <= n - 4; i += 4 )
1940 __m128i h0 = _mm_loadu_si128((const __m128i*)(x + i));
1941 __m128i h1 = _mm_loadu_si128((const __m128i*)(x + i + 2));
1943 __m128d xd0 = _mm_castsi128_pd(_mm_or_si128(_mm_and_si128(h0, log_and_mask2), log_or_mask2));
1944 __m128d xd1 = _mm_castsi128_pd(_mm_or_si128(_mm_and_si128(h1, log_and_mask2), log_or_mask2));
1946 h0 = _mm_unpackhi_epi32(_mm_unpacklo_epi32(h0, h1), _mm_unpackhi_epi32(h0, h1));
1948 __m128i yi0 = _mm_sub_epi32(_mm_and_si128(_mm_srli_epi32(h0, 20),
1949 _mm_set1_epi32(2047)), _mm_set1_epi32(1023));
1950 __m128d yd0 = _mm_mul_pd(_mm_cvtepi32_pd(yi0), ln2_2);
1951 __m128d yd1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_unpackhi_epi64(yi0, yi0)), ln2_2);
1953 h0 = _mm_and_si128(_mm_srli_epi32(h0, 20 - LOGTAB_SCALE - 1), _mm_set1_epi32(LOGTAB_MASK * 2));
1954 _mm_store_si128((__m128i*)idx, h0);
1955 h0 = _mm_cmpeq_epi32(h0, _mm_set1_epi32(510));
1957 __m128d t0, t1, t2, t3, t4;
1958 t0 = _mm_load_pd(icvLogTab + idx[0]);
1959 t2 = _mm_load_pd(icvLogTab + idx[1]);
1960 t1 = _mm_unpackhi_pd(t0, t2);
1961 t0 = _mm_unpacklo_pd(t0, t2);
1962 t2 = _mm_load_pd(icvLogTab + idx[2]);
1963 t4 = _mm_load_pd(icvLogTab + idx[3]);
1964 t3 = _mm_unpackhi_pd(t2, t4);
1965 t2 = _mm_unpacklo_pd(t2, t4);
1967 yd0 = _mm_add_pd(yd0, t0);
1968 yd1 = _mm_add_pd(yd1, t2);
1970 xd0 = _mm_mul_pd(_mm_sub_pd(xd0, _1_2), t1);
1971 xd1 = _mm_mul_pd(_mm_sub_pd(xd1, _1_2), t3);
1973 xd0 = _mm_add_pd(xd0, _mm_and_pd(_mm_castsi128_pd(_mm_unpacklo_epi32(h0, h0)), shift2));
1974 xd1 = _mm_add_pd(xd1, _mm_and_pd(_mm_castsi128_pd(_mm_unpackhi_epi32(h0, h0)), shift2));
1976 __m128d zd0 = _mm_mul_pd(xd0, mA0);
1977 __m128d zd1 = _mm_mul_pd(xd1, mA0);
1978 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA1), xd0);
1979 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA1), xd1);
1980 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA2), xd0);
1981 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA2), xd1);
1982 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA3), xd0);
1983 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA3), xd1);
1984 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA4), xd0);
1985 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA4), xd1);
1986 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA5), xd0);
1987 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA5), xd1);
1988 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA6), xd0);
1989 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA6), xd1);
1990 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA7), xd0);
1991 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA7), xd1);
1993 yd0 = _mm_add_pd(yd0, zd0);
1994 yd1 = _mm_add_pd(yd1, zd1);
1996 _mm_storeu_pd(y + i, yd0);
1997 _mm_storeu_pd(y + i + 2, yd1);
2002 for( ; i <= n - 4; i += 4 )
2005 double x0, x1, x2, x3;
2006 double y0, y1, y2, y3;
2016 buf[0].i.hi = (h0 & LOGTAB_MASK2) | (1023 << 20);
2017 buf[1].i.hi = (h1 & LOGTAB_MASK2) | (1023 << 20);
2019 y0 = (((h0 >> 20) & 0x7ff) - 1023) * ln_2;
2020 y1 = (((h1 >> 20) & 0x7ff) - 1023) * ln_2;
2027 h0 = (h0 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
2028 h1 = (h1 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
2030 y0 += icvLogTab[h0];
2031 y1 += icvLogTab[h1];
2036 x0 = LOGTAB_TRANSLATE( buf[0].d, h0 );
2037 x1 = LOGTAB_TRANSLATE( buf[1].d, h1 );
2039 buf[2].i.hi = (h2 & LOGTAB_MASK2) | (1023 << 20);
2040 buf[3].i.hi = (h3 & LOGTAB_MASK2) | (1023 << 20);
2042 y2 = (((h2 >> 20) & 0x7ff) - 1023) * ln_2;
2043 y3 = (((h3 >> 20) & 0x7ff) - 1023) * ln_2;
2045 h2 = (h2 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
2046 h3 = (h3 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
2048 y2 += icvLogTab[h2];
2049 y3 += icvLogTab[h3];
2051 x2 = LOGTAB_TRANSLATE( buf[2].d, h2 );
2052 x3 = LOGTAB_TRANSLATE( buf[3].d, h3 );
2054 y0 += LOGPOLY( x0, h0 == 510 );
2055 y1 += LOGPOLY( x1, h1 == 510 );
2060 y2 += LOGPOLY( x2, h2 == 510 );
2061 y3 += LOGPOLY( x3, h3 == 510 );
2071 double x0, y0 = (((h0 >> 20) & 0x7ff) - 1023) * ln_2;
2073 buf[0].i.hi = (h0 & LOGTAB_MASK2) | (1023 << 20);
2074 buf[0].i.lo = X[i].i.lo;
2075 h0 = (h0 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
2077 y0 += icvLogTab[h0];
2078 x0 = LOGTAB_TRANSLATE( buf[0].d, h0 );
2079 y0 += LOGPOLY( x0, h0 == 510 );
2085 static void Log_32f_ipp(const float *x, float *y, int n)
2089 if (0 <= ippsLn_32f_A21(x, y, n))
2091 CV_IMPL_ADD(CV_IMPL_IPP);
2094 setIppErrorStatus();
2099 static void Log_64f_ipp(const double *x, double *y, int n)
2103 if (0 <= ippsLn_64f_A50(x, y, n))
2105 CV_IMPL_ADD(CV_IMPL_IPP);
2108 setIppErrorStatus();
2113 #define Log_32f Log_32f_ipp
2114 #define Log_64f Log_64f_ipp
2117 void log( InputArray _src, OutputArray _dst )
2119 int type = _src.type(), depth = _src.depth(), cn = _src.channels();
2120 CV_Assert( depth == CV_32F || depth == CV_64F );
2122 CV_OCL_RUN( _dst.isUMat() && _src.dims() <= 2,
2123 ocl_math_op(_src, noArray(), _dst, OCL_OP_LOG))
2125 Mat src = _src.getMat();
2126 _dst.create( src.dims, src.size, type );
2127 Mat dst = _dst.getMat();
2129 const Mat* arrays[] = {&src, &dst, 0};
2131 NAryMatIterator it(arrays, ptrs);
2132 int len = (int)(it.size*cn);
2134 for( size_t i = 0; i < it.nplanes; i++, ++it )
2136 if( depth == CV_32F )
2137 Log_32f( (const float*)ptrs[0], (float*)ptrs[1], len );
2139 Log_64f( (const double*)ptrs[0], (double*)ptrs[1], len );
2143 /****************************************************************************************\
2145 \****************************************************************************************/
2147 template<typename T, typename WT>
2149 iPow_( const T* src, T* dst, int len, int power )
2152 for( i = 0; i < len; i++ )
2154 WT a = 1, b = src[i];
2165 dst[i] = saturate_cast<T>(a);
2170 static void iPow8u(const uchar* src, uchar* dst, int len, int power)
2172 iPow_<uchar, int>(src, dst, len, power);
2175 static void iPow8s(const schar* src, schar* dst, int len, int power)
2177 iPow_<schar, int>(src, dst, len, power);
2180 static void iPow16u(const ushort* src, ushort* dst, int len, int power)
2182 iPow_<ushort, int>(src, dst, len, power);
2185 static void iPow16s(const short* src, short* dst, int len, int power)
2187 iPow_<short, int>(src, dst, len, power);
2190 static void iPow32s(const int* src, int* dst, int len, int power)
2192 iPow_<int, int>(src, dst, len, power);
2195 static void iPow32f(const float* src, float* dst, int len, int power)
2197 iPow_<float, float>(src, dst, len, power);
2200 static void iPow64f(const double* src, double* dst, int len, int power)
2202 iPow_<double, double>(src, dst, len, power);
2206 typedef void (*IPowFunc)( const uchar* src, uchar* dst, int len, int power );
2208 static IPowFunc ipowTab[] =
2210 (IPowFunc)iPow8u, (IPowFunc)iPow8s, (IPowFunc)iPow16u, (IPowFunc)iPow16s,
2211 (IPowFunc)iPow32s, (IPowFunc)iPow32f, (IPowFunc)iPow64f, 0
2216 static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
2217 bool is_ipower, int ipower)
2219 const ocl::Device & d = ocl::Device::getDefault();
2220 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
2221 rowsPerWI = d.isIntel() ? 4 : 1;
2222 bool doubleSupport = d.doubleFPConfig() > 0;
2224 _dst.createSameSize(_src, type);
2225 if (is_ipower && (ipower == 0 || ipower == 1))
2228 _dst.setTo(Scalar::all(1));
2229 else if (ipower == 1)
2235 if (depth == CV_64F && !doubleSupport)
2238 bool issqrt = std::abs(power - 0.5) < DBL_EPSILON, nonnegative = power >= 0;
2239 const char * const op = issqrt ? "OP_SQRT" : is_ipower ? nonnegative ? "OP_POWN" : "OP_ROOTN" : nonnegative ? "OP_POWR" : "OP_POW";
2241 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
2242 format("-D dstT=%s -D depth=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s",
2243 ocl::typeToStr(depth), depth, rowsPerWI, op,
2244 doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
2248 UMat src = _src.getUMat();
2249 _dst.create(src.size(), type);
2250 UMat dst = _dst.getUMat();
2252 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
2253 dstarg = ocl::KernelArg::WriteOnly(dst, cn);
2256 k.args(srcarg, dstarg);
2258 k.args(srcarg, dstarg, ipower);
2261 if (depth == CV_32F)
2262 k.args(srcarg, dstarg, (float)power);
2264 k.args(srcarg, dstarg, power);
2267 size_t globalsize[2] = { dst.cols * cn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
2268 return k.run(2, globalsize, NULL, false);
2273 void pow( InputArray _src, double power, OutputArray _dst )
2275 int type = _src.type(), depth = CV_MAT_DEPTH(type),
2276 cn = CV_MAT_CN(type), ipower = cvRound(power);
2277 bool is_ipower = fabs(ipower - power) < DBL_EPSILON, same = false,
2278 useOpenCL = _dst.isUMat() && _src.dims() <= 2;
2280 if( is_ipower && !(ocl::Device::getDefault().isIntel() && useOpenCL && depth != CV_64F))
2284 divide( Scalar::all(1), _src, _dst );
2294 _dst.createSameSize(_src, type);
2295 _dst.setTo(Scalar::all(1));
2301 #if defined(HAVE_IPP)
2304 if (depth == CV_32F && !same && ( (_src.dims() <= 2 && !ocl::useOpenCL()) ||
2305 (_src.dims() > 2 && _src.isContinuous() && _dst.isContinuous()) ))
2307 Mat src = _src.getMat();
2308 _dst.create( src.dims, src.size, type );
2309 Mat dst = _dst.getMat();
2311 Size size = src.size();
2312 int srcstep = (int)src.step, dststep = (int)dst.step, esz = CV_ELEM_SIZE(type);
2313 if (src.isContinuous() && dst.isContinuous())
2315 size.width = (int)src.total();
2317 srcstep = dststep = (int)src.total() * esz;
2321 IppStatus status = ippiSqr_32f_C1R(src.ptr<Ipp32f>(), srcstep, dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
2325 CV_IMPL_ADD(CV_IMPL_IPP);
2328 setIppErrorStatus();
2333 multiply(_dst, _dst, _dst);
2335 multiply(_src, _src, _dst);
2340 CV_Assert( depth == CV_32F || depth == CV_64F );
2342 CV_OCL_RUN(useOpenCL,
2343 ocl_pow(same ? _dst : _src, power, _dst, is_ipower, ipower))
2347 src = dst = _dst.getMat();
2350 src = _src.getMat();
2351 _dst.create( src.dims, src.size, type );
2352 dst = _dst.getMat();
2355 const Mat* arrays[] = {&src, &dst, 0};
2357 NAryMatIterator it(arrays, ptrs);
2358 int len = (int)(it.size*cn);
2362 IPowFunc func = ipowTab[depth];
2363 CV_Assert( func != 0 );
2365 for( size_t i = 0; i < it.nplanes; i++, ++it )
2366 func( ptrs[0], ptrs[1], len, ipower );
2368 else if( fabs(fabs(power) - 0.5) < DBL_EPSILON )
2370 MathFunc func = power < 0 ?
2371 (depth == CV_32F ? (MathFunc)InvSqrt_32f : (MathFunc)InvSqrt_64f) :
2372 (depth == CV_32F ? (MathFunc)Sqrt_32f : (MathFunc)Sqrt_64f);
2374 for( size_t i = 0; i < it.nplanes; i++, ++it )
2375 func( ptrs[0], ptrs[1], len );
2379 #if defined(HAVE_IPP)
2382 if (src.isContinuous() && dst.isContinuous())
2384 IppStatus status = depth == CV_32F ?
2385 ippsPowx_32f_A21(src.ptr<Ipp32f>(), (Ipp32f)power, dst.ptr<Ipp32f>(), (Ipp32s)(src.total() * cn)) :
2386 ippsPowx_64f_A50(src.ptr<Ipp64f>(), power, dst.ptr<Ipp64f>(), (Ipp32s)(src.total() * cn));
2390 CV_IMPL_ADD(CV_IMPL_IPP);
2393 setIppErrorStatus();
2398 int j, k, blockSize = std::min(len, ((BLOCK_SIZE + cn-1)/cn)*cn);
2399 size_t esz1 = src.elemSize1();
2401 for( size_t i = 0; i < it.nplanes; i++, ++it )
2403 for( j = 0; j < len; j += blockSize )
2405 int bsz = std::min(len - j, blockSize);
2406 if( depth == CV_32F )
2408 const float* x = (const float*)ptrs[0];
2409 float* y = (float*)ptrs[1];
2412 for( k = 0; k < bsz; k++ )
2413 y[k] = (float)(y[k]*power);
2418 const double* x = (const double*)ptrs[0];
2419 double* y = (double*)ptrs[1];
2422 for( k = 0; k < bsz; k++ )
2426 ptrs[0] += bsz*esz1;
2427 ptrs[1] += bsz*esz1;
2433 void sqrt(InputArray a, OutputArray b)
2438 /************************** CheckArray for NaN's, Inf's *********************************/
2440 template<int cv_mat_type> struct mat_type_assotiations{};
2442 template<> struct mat_type_assotiations<CV_8U>
2444 typedef unsigned char type;
2445 static const type min_allowable = 0x0;
2446 static const type max_allowable = 0xFF;
2449 template<> struct mat_type_assotiations<CV_8S>
2451 typedef signed char type;
2452 static const type min_allowable = SCHAR_MIN;
2453 static const type max_allowable = SCHAR_MAX;
2456 template<> struct mat_type_assotiations<CV_16U>
2458 typedef unsigned short type;
2459 static const type min_allowable = 0x0;
2460 static const type max_allowable = USHRT_MAX;
2462 template<> struct mat_type_assotiations<CV_16S>
2464 typedef signed short type;
2465 static const type min_allowable = SHRT_MIN;
2466 static const type max_allowable = SHRT_MAX;
2469 template<> struct mat_type_assotiations<CV_32S>
2472 static const type min_allowable = (-INT_MAX - 1);
2473 static const type max_allowable = INT_MAX;
2476 // inclusive maxVal !!!
2478 bool checkIntegerRange(cv::Mat src, Point& bad_pt, int minVal, int maxVal, double& bad_value)
2480 typedef mat_type_assotiations<depth> type_ass;
2482 if (minVal < type_ass::min_allowable && maxVal > type_ass::max_allowable)
2486 else if (minVal > type_ass::max_allowable || maxVal < type_ass::min_allowable || maxVal < minVal)
2488 bad_pt = cv::Point(0,0);
2491 cv::Mat as_one_channel = src.reshape(1,0);
2493 for (int j = 0; j < as_one_channel.rows; ++j)
2494 for (int i = 0; i < as_one_channel.cols; ++i)
2496 if (as_one_channel.at<typename type_ass::type>(j ,i) < minVal || as_one_channel.at<typename type_ass::type>(j ,i) > maxVal)
2499 bad_pt.x = i % src.channels();
2500 bad_value = as_one_channel.at<typename type_ass::type>(j ,i);
2509 typedef bool (*check_range_function)(cv::Mat src, Point& bad_pt, int minVal, int maxVal, double& bad_value);
2511 check_range_function check_range_functions[] =
2513 &checkIntegerRange<CV_8U>,
2514 &checkIntegerRange<CV_8S>,
2515 &checkIntegerRange<CV_16U>,
2516 &checkIntegerRange<CV_16S>,
2517 &checkIntegerRange<CV_32S>
2520 bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double maxVal)
2522 Mat src = _src.getMat();
2526 const Mat* arrays[] = {&src, 0};
2528 NAryMatIterator it(arrays, planes);
2530 for ( size_t i = 0; i < it.nplanes; i++, ++it )
2532 if (!checkRange( it.planes[0], quiet, pt, minVal, maxVal ))
2534 // todo: set index properly
2541 int depth = src.depth();
2542 Point badPt(-1, -1);
2543 double badValue = 0;
2548 int minVali = minVal<(-INT_MAX - 1) ? (-INT_MAX - 1) : cvFloor(minVal);
2549 int maxVali = maxVal>INT_MAX ? INT_MAX : cvCeil(maxVal) - 1; // checkIntegerRang() use inclusive maxVal
2551 (check_range_functions[depth])(src, badPt, minVali, maxVali, badValue);
2556 Size size = getContinuousSize( src, src.channels() );
2558 if( depth == CV_32F )
2562 const int* isrc = src.ptr<int>();
2563 size_t step = src.step/sizeof(isrc[0]);
2565 a.f = (float)std::max(minVal, (double)-FLT_MAX);
2566 b.f = (float)std::min(maxVal, (double)FLT_MAX);
2568 ia = CV_TOGGLE_FLT(a.i);
2569 ib = CV_TOGGLE_FLT(b.i);
2571 for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step )
2573 for( i = 0; i < size.width; i++ )
2576 val = CV_TOGGLE_FLT(val);
2578 if( val < ia || val >= ib )
2580 badPt = Point((loc + i) % src.cols, (loc + i) / src.cols);
2581 badValue = ((const float*)isrc)[i];
2591 const int64* isrc = src.ptr<int64>();
2592 size_t step = src.step/sizeof(isrc[0]);
2597 ia = CV_TOGGLE_DBL(a.i);
2598 ib = CV_TOGGLE_DBL(b.i);
2600 for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step )
2602 for( i = 0; i < size.width; i++ )
2604 int64 val = isrc[i];
2605 val = CV_TOGGLE_DBL(val);
2607 if( val < ia || val >= ib )
2609 badPt = Point((loc + i) % src.cols, (loc + i) / src.cols);
2610 badValue = ((const double*)isrc)[i];
2623 CV_Error_( CV_StsOutOfRange,
2624 ("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue));
2631 static bool ocl_patchNaNs( InputOutputArray _a, float value )
2633 int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
2634 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
2635 format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D rowsPerWI=%d",
2640 UMat a = _a.getUMat();
2641 int cn = a.channels();
2643 k.args(ocl::KernelArg::ReadOnlyNoSize(a),
2644 ocl::KernelArg::WriteOnly(a, cn), (float)value);
2646 size_t globalsize[2] = { a.cols * cn, (a.rows + rowsPerWI - 1) / rowsPerWI };
2647 return k.run(2, globalsize, NULL, false);
2652 void patchNaNs( InputOutputArray _a, double _val )
2654 CV_Assert( _a.depth() == CV_32F );
2656 CV_OCL_RUN(_a.isUMat() && _a.dims() <= 2,
2657 ocl_patchNaNs(_a, (float)_val))
2659 Mat a = _a.getMat();
2660 const Mat* arrays[] = {&a, 0};
2662 NAryMatIterator it(arrays, (uchar**)ptrs);
2663 size_t len = it.size*a.channels();
2665 val.f = (float)_val;
2668 __m128i v_mask1 = _mm_set1_epi32(0x7fffffff), v_mask2 = _mm_set1_epi32(0x7f800000);
2669 __m128i v_val = _mm_set1_epi32(val.i);
2671 int32x4_t v_mask1 = vdupq_n_s32(0x7fffffff), v_mask2 = vdupq_n_s32(0x7f800000),
2672 v_val = vdupq_n_s32(val.i);
2675 for( size_t i = 0; i < it.nplanes; i++, ++it )
2677 int* tptr = ptrs[0];
2683 for ( ; j + 4 <= len; j += 4)
2685 __m128i v_src = _mm_loadu_si128((__m128i const *)(tptr + j));
2686 __m128i v_cmp_mask = _mm_cmplt_epi32(v_mask2, _mm_and_si128(v_src, v_mask1));
2687 __m128i v_res = _mm_or_si128(_mm_andnot_si128(v_cmp_mask, v_src), _mm_and_si128(v_cmp_mask, v_val));
2688 _mm_storeu_si128((__m128i *)(tptr + j), v_res);
2692 for ( ; j + 4 <= len; j += 4)
2694 int32x4_t v_src = vld1q_s32(tptr + j);
2695 uint32x4_t v_cmp_mask = vcltq_s32(v_mask2, vandq_s32(v_src, v_mask1));
2696 int32x4_t v_dst = vbslq_s32(v_cmp_mask, v_val, v_src);
2697 vst1q_s32(tptr + j, v_dst);
2701 for( ; j < len; j++ )
2702 if( (tptr[j] & 0x7fffffff) > 0x7f800000 )
2708 void exp(const float* src, float* dst, int n)
2710 Exp_32f(src, dst, n);
2713 void log(const float* src, float* dst, int n)
2715 Log_32f(src, dst, n);
2718 void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees)
2720 FastAtan2_32f(y, x, dst, n, angleInDegrees);
2723 void magnitude(const float* x, const float* y, float* dst, int n)
2725 Magnitude_32f(x, y, dst, n);
2730 CV_IMPL float cvCbrt(float value) { return cv::cubeRoot(value); }
2731 CV_IMPL float cvFastArctan(float y, float x) { return cv::fastAtan2(y, x); }
2734 cvCartToPolar( const CvArr* xarr, const CvArr* yarr,
2735 CvArr* magarr, CvArr* anglearr,
2736 int angle_in_degrees )
2738 cv::Mat X = cv::cvarrToMat(xarr), Y = cv::cvarrToMat(yarr), Mag, Angle;
2741 Mag = cv::cvarrToMat(magarr);
2742 CV_Assert( Mag.size() == X.size() && Mag.type() == X.type() );
2746 Angle = cv::cvarrToMat(anglearr);
2747 CV_Assert( Angle.size() == X.size() && Angle.type() == X.type() );
2752 cv::cartToPolar( X, Y, Mag, Angle, angle_in_degrees != 0 );
2754 cv::magnitude( X, Y, Mag );
2757 cv::phase( X, Y, Angle, angle_in_degrees != 0 );
2761 cvPolarToCart( const CvArr* magarr, const CvArr* anglearr,
2762 CvArr* xarr, CvArr* yarr, int angle_in_degrees )
2764 cv::Mat X, Y, Angle = cv::cvarrToMat(anglearr), Mag;
2767 Mag = cv::cvarrToMat(magarr);
2768 CV_Assert( Mag.size() == Angle.size() && Mag.type() == Angle.type() );
2772 X = cv::cvarrToMat(xarr);
2773 CV_Assert( X.size() == Angle.size() && X.type() == Angle.type() );
2777 Y = cv::cvarrToMat(yarr);
2778 CV_Assert( Y.size() == Angle.size() && Y.type() == Angle.type() );
2781 cv::polarToCart( Mag, Angle, X, Y, angle_in_degrees != 0 );
2784 CV_IMPL void cvExp( const CvArr* srcarr, CvArr* dstarr )
2786 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
2787 CV_Assert( src.type() == dst.type() && src.size == dst.size );
2788 cv::exp( src, dst );
2791 CV_IMPL void cvLog( const CvArr* srcarr, CvArr* dstarr )
2793 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
2794 CV_Assert( src.type() == dst.type() && src.size == dst.size );
2795 cv::log( src, dst );
2798 CV_IMPL void cvPow( const CvArr* srcarr, CvArr* dstarr, double power )
2800 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
2801 CV_Assert( src.type() == dst.type() && src.size == dst.size );
2802 cv::pow( src, power, dst );
2805 CV_IMPL int cvCheckArr( const CvArr* arr, int flags,
2806 double minVal, double maxVal )
2808 if( (flags & CV_CHECK_RANGE) == 0 )
2809 minVal = -DBL_MAX, maxVal = DBL_MAX;
2810 return cv::checkRange(cv::cvarrToMat(arr), (flags & CV_CHECK_QUIET) != 0, 0, minVal, maxVal );
2815 Finds real roots of cubic, quadratic or linear equation.
2816 The original code has been taken from Ken Turkowski web page
2817 (http://www.worldserver.com/turk/opensource/) and adopted for OpenCV.
2818 Here is the copyright notice.
2820 -----------------------------------------------------------------------
2821 Copyright (C) 1978-1999 Ken Turkowski. <turk@computer.org>
2823 All rights reserved.
2825 Warranty Information
2826 Even though I have reviewed this software, I make no warranty
2827 or representation, either express or implied, with respect to this
2828 software, its quality, accuracy, merchantability, or fitness for a
2829 particular purpose. As a result, this software is provided "as is,"
2830 and you, its user, are assuming the entire risk as to its quality
2833 This code may be used and freely distributed as long as it includes
2834 this copyright notice and the above warranty information.
2835 -----------------------------------------------------------------------
2838 int cv::solveCubic( InputArray _coeffs, OutputArray _roots )
2841 Mat coeffs = _coeffs.getMat();
2842 int ctype = coeffs.type();
2844 CV_Assert( ctype == CV_32F || ctype == CV_64F );
2845 CV_Assert( (coeffs.size() == Size(n0, 1) ||
2846 coeffs.size() == Size(n0+1, 1) ||
2847 coeffs.size() == Size(1, n0) ||
2848 coeffs.size() == Size(1, n0+1)) );
2850 _roots.create(n0, 1, ctype, -1, true, _OutputArray::DEPTH_MASK_FLT);
2851 Mat roots = _roots.getMat();
2854 double a0 = 1., a1, a2, a3;
2855 double x0 = 0., x1 = 0., x2 = 0.;
2856 int ncoeffs = coeffs.rows + coeffs.cols - 1;
2858 if( ctype == CV_32FC1 )
2861 a0 = coeffs.at<float>(++i);
2863 a1 = coeffs.at<float>(i+1);
2864 a2 = coeffs.at<float>(i+2);
2865 a3 = coeffs.at<float>(i+3);
2870 a0 = coeffs.at<double>(++i);
2872 a1 = coeffs.at<double>(i+1);
2873 a2 = coeffs.at<double>(i+2);
2874 a3 = coeffs.at<double>(i+3);
2882 n = a3 == 0 ? -1 : 0;
2892 // quadratic equation
2893 double d = a2*a2 - 4*a1*a3;
2897 double q1 = (-a2 + d) * 0.5;
2898 double q2 = (a2 + d) * -0.5;
2899 if( fabs(q1) > fabs(q2) )
2920 double Q = (a1 * a1 - 3 * a2) * (1./9);
2921 double R = (2 * a1 * a1 * a1 - 9 * a1 * a2 + 27 * a3) * (1./54);
2922 double Qcubed = Q * Q * Q;
2923 double d = Qcubed - R * R;
2927 double theta = acos(R / std::sqrt(Qcubed));
2928 double sqrtQ = std::sqrt(Q);
2929 double t0 = -2 * sqrtQ;
2930 double t1 = theta * (1./3);
2931 double t2 = a1 * (1./3);
2932 x0 = t0 * cos(t1) - t2;
2933 x1 = t0 * cos(t1 + (2.*CV_PI/3)) - t2;
2934 x2 = t0 * cos(t1 + (4.*CV_PI/3)) - t2;
2941 e = std::pow(d + fabs(R), 0.333333333333);
2944 x0 = (e + Q / e) - a1 * (1./3);
2949 if( roots.type() == CV_32FC1 )
2951 roots.at<float>(0) = (float)x0;
2952 roots.at<float>(1) = (float)x1;
2953 roots.at<float>(2) = (float)x2;
2957 roots.at<double>(0) = x0;
2958 roots.at<double>(1) = x1;
2959 roots.at<double>(2) = x2;
2965 /* finds complex roots of a polynomial using Durand-Kerner method:
2966 http://en.wikipedia.org/wiki/Durand%E2%80%93Kerner_method */
2967 double cv::solvePoly( InputArray _coeffs0, OutputArray _roots0, int maxIters )
2969 typedef Complex<double> C;
2973 Mat coeffs0 = _coeffs0.getMat();
2974 int ctype = _coeffs0.type();
2975 int cdepth = CV_MAT_DEPTH(ctype);
2977 CV_Assert( CV_MAT_DEPTH(ctype) >= CV_32F && CV_MAT_CN(ctype) <= 2 );
2978 CV_Assert( coeffs0.rows == 1 || coeffs0.cols == 1 );
2980 int n = coeffs0.cols + coeffs0.rows - 2;
2982 _roots0.create(n, 1, CV_MAKETYPE(cdepth, 2), -1, true, _OutputArray::DEPTH_MASK_FLT);
2983 Mat roots0 = _roots0.getMat();
2985 AutoBuffer<C> buf(n*2+2);
2986 C *coeffs = buf, *roots = coeffs + n + 1;
2987 Mat coeffs1(coeffs0.size(), CV_MAKETYPE(CV_64F, coeffs0.channels()), coeffs0.channels() == 2 ? coeffs : roots);
2988 coeffs0.convertTo(coeffs1, coeffs1.type());
2989 if( coeffs0.channels() == 1 )
2991 const double* rcoeffs = (const double*)roots;
2992 for( i = 0; i <= n; i++ )
2993 coeffs[i] = C(rcoeffs[i], 0);
2998 for( i = 0; i < n; i++ )
3004 maxIters = maxIters <= 0 ? 1000 : maxIters;
3005 for( iter = 0; iter < maxIters; iter++ )
3008 for( i = 0; i < n; i++ )
3011 C num = coeffs[n], denom = coeffs[n];
3012 for( j = 0; j < n; j++ )
3014 num = num*p + coeffs[n-j-1];
3015 if( j != i ) denom = denom * (p - roots[j]);
3019 maxDiff = std::max(maxDiff, cv::abs(num));
3025 if( coeffs0.channels() == 1 )
3027 const double verySmallEps = 1e-100;
3028 for( i = 0; i < n; i++ )
3029 if( fabs(roots[i].im) < verySmallEps )
3033 Mat(roots0.size(), CV_64FC2, roots).convertTo(roots0, roots0.type());
3039 cvSolveCubic( const CvMat* coeffs, CvMat* roots )
3041 cv::Mat _coeffs = cv::cvarrToMat(coeffs), _roots = cv::cvarrToMat(roots), _roots0 = _roots;
3042 int nroots = cv::solveCubic(_coeffs, _roots);
3043 CV_Assert( _roots.data == _roots0.data ); // check that the array of roots was not reallocated
3048 void cvSolvePoly(const CvMat* a, CvMat *r, int maxiter, int)
3050 cv::Mat _a = cv::cvarrToMat(a);
3051 cv::Mat _r = cv::cvarrToMat(r);
3053 cv::solvePoly(_a, _r, maxiter);
3054 CV_Assert( _r.data == _r0.data ); // check that the array of roots was not reallocated