1 /*M///////////////////////////////////////////////////////////////////////////////////////
3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
5 // By downloading, copying, installing or using the software you agree to this license.
6 // If you do not agree to this license, do not download, install,
7 // copy or use the software.
11 // For Open Source Computer Vision Library
13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved.
15 // Third party copyrights are property of their respective owners.
17 // Redistribution and use in source and binary forms, with or without modification,
18 // are permitted provided that the following conditions are met:
20 // * Redistribution's of source code must retain the above copyright notice,
21 // this list of conditions and the following disclaimer.
23 // * Redistribution's in binary form must reproduce the above copyright notice,
24 // this list of conditions and the following disclaimer in the documentation
25 // and/or other materials provided with the distribution.
27 // * The name of the copyright holders may not be used to endorse or promote products
28 // derived from this software without specific prior written permission.
30 // This software is provided by the copyright holders and contributors "as is" and
31 // any express or implied warranties, including, but not limited to, the implied
32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
33 // In no event shall the Intel Corporation or contributors be liable for any direct,
34 // indirect, incidental, special, exemplary, or consequential damages
35 // (including, but not limited to, procurement of substitute goods or services;
36 // loss of use, data, or profits; or business interruption) however caused
37 // and on any theory of liability, whether in contract, strict liability,
38 // or tort (including negligence or otherwise) arising in any way out of
39 // the use of this software, even if advised of the possibility of such damage.
43 #include "precomp.hpp"
44 #include "opencl_kernels_core.hpp"
49 typedef void (*MathFunc)(const void* src, void* dst, int len);
51 static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI);
52 static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI);
53 static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI);
54 static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI);
58 enum { OCL_OP_LOG=0, OCL_OP_EXP=1, OCL_OP_MAG=2, OCL_OP_PHASE_DEGREES=3, OCL_OP_PHASE_RADIANS=4 };
60 static const char* oclop2str[] = { "OP_LOG", "OP_EXP", "OP_MAG", "OP_PHASE_DEGREES", "OP_PHASE_RADIANS", 0 };
62 static bool ocl_math_op(InputArray _src1, InputArray _src2, OutputArray _dst, int oclop)
64 int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
65 int kercn = oclop == OCL_OP_PHASE_DEGREES ||
66 oclop == OCL_OP_PHASE_RADIANS ? 1 : ocl::predictOptimalVectorWidth(_src1, _src2, _dst);
68 const ocl::Device d = ocl::Device::getDefault();
69 bool double_support = d.doubleFPConfig() > 0;
70 if (!double_support && depth == CV_64F)
72 int rowsPerWI = d.isIntel() ? 4 : 1;
74 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
75 format("-D %s -D %s -D dstT=%s -D rowsPerWI=%d%s", _src2.empty() ? "UNARY_OP" : "BINARY_OP",
76 oclop2str[oclop], ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), rowsPerWI,
77 double_support ? " -D DOUBLE_SUPPORT" : ""));
81 UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
82 _dst.create(src1.size(), type);
83 UMat dst = _dst.getUMat();
85 ocl::KernelArg src1arg = ocl::KernelArg::ReadOnlyNoSize(src1),
86 src2arg = ocl::KernelArg::ReadOnlyNoSize(src2),
87 dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn);
90 k.args(src1arg, dstarg);
92 k.args(src1arg, src2arg, dstarg);
94 size_t globalsize[] = { src1.cols * cn / kercn, (src1.rows + rowsPerWI - 1) / rowsPerWI };
95 return k.run(2, globalsize, 0, false);
100 float fastAtan2( float y, float x )
102 float ax = std::abs(x), ay = std::abs(y);
106 c = ay/(ax + (float)DBL_EPSILON);
108 a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
112 c = ax/(ay + (float)DBL_EPSILON);
114 a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
123 static void FastAtan2_32f(const float *Y, const float *X, float *angle, int len, bool angleInDegrees=true )
126 float scale = angleInDegrees ? 1 : (float)(CV_PI/180);
128 #ifdef HAVE_TEGRA_OPTIMIZATION
129 if (tegra::FastAtan2_32f(Y, X, angle, len, scale))
136 Cv32suf iabsmask; iabsmask.i = 0x7fffffff;
137 __m128 eps = _mm_set1_ps((float)DBL_EPSILON), absmask = _mm_set1_ps(iabsmask.f);
138 __m128 _90 = _mm_set1_ps(90.f), _180 = _mm_set1_ps(180.f), _360 = _mm_set1_ps(360.f);
139 __m128 z = _mm_setzero_ps(), scale4 = _mm_set1_ps(scale);
140 __m128 p1 = _mm_set1_ps(atan2_p1), p3 = _mm_set1_ps(atan2_p3);
141 __m128 p5 = _mm_set1_ps(atan2_p5), p7 = _mm_set1_ps(atan2_p7);
143 for( ; i <= len - 4; i += 4 )
145 __m128 x = _mm_loadu_ps(X + i), y = _mm_loadu_ps(Y + i);
146 __m128 ax = _mm_and_ps(x, absmask), ay = _mm_and_ps(y, absmask);
147 __m128 mask = _mm_cmplt_ps(ax, ay);
148 __m128 tmin = _mm_min_ps(ax, ay), tmax = _mm_max_ps(ax, ay);
149 __m128 c = _mm_div_ps(tmin, _mm_add_ps(tmax, eps));
150 __m128 c2 = _mm_mul_ps(c, c);
151 __m128 a = _mm_mul_ps(c2, p7);
152 a = _mm_mul_ps(_mm_add_ps(a, p5), c2);
153 a = _mm_mul_ps(_mm_add_ps(a, p3), c2);
154 a = _mm_mul_ps(_mm_add_ps(a, p1), c);
156 __m128 b = _mm_sub_ps(_90, a);
157 a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask));
159 b = _mm_sub_ps(_180, a);
160 mask = _mm_cmplt_ps(x, z);
161 a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask));
163 b = _mm_sub_ps(_360, a);
164 mask = _mm_cmplt_ps(y, z);
165 a = _mm_xor_ps(a, _mm_and_ps(_mm_xor_ps(a, b), mask));
167 a = _mm_mul_ps(a, scale4);
168 _mm_storeu_ps(angle + i, a);
172 float32x4_t eps = vdupq_n_f32((float)DBL_EPSILON);
173 float32x4_t _90 = vdupq_n_f32(90.f), _180 = vdupq_n_f32(180.f), _360 = vdupq_n_f32(360.f);
174 float32x4_t z = vdupq_n_f32(0.0f), scale4 = vdupq_n_f32(scale);
175 float32x4_t p1 = vdupq_n_f32(atan2_p1), p3 = vdupq_n_f32(atan2_p3);
176 float32x4_t p5 = vdupq_n_f32(atan2_p5), p7 = vdupq_n_f32(atan2_p7);
178 for( ; i <= len - 4; i += 4 )
180 float32x4_t x = vld1q_f32(X + i), y = vld1q_f32(Y + i);
181 float32x4_t ax = vabsq_f32(x), ay = vabsq_f32(y);
182 float32x4_t tmin = vminq_f32(ax, ay), tmax = vmaxq_f32(ax, ay);
183 float32x4_t c = vmulq_f32(tmin, cv_vrecpq_f32(vaddq_f32(tmax, eps)));
184 float32x4_t c2 = vmulq_f32(c, c);
185 float32x4_t a = vmulq_f32(c2, p7);
186 a = vmulq_f32(vaddq_f32(a, p5), c2);
187 a = vmulq_f32(vaddq_f32(a, p3), c2);
188 a = vmulq_f32(vaddq_f32(a, p1), c);
190 a = vbslq_f32(vcgeq_f32(ax, ay), a, vsubq_f32(_90, a));
191 a = vbslq_f32(vcltq_f32(x, z), vsubq_f32(_180, a), a);
192 a = vbslq_f32(vcltq_f32(y, z), vsubq_f32(_360, a), a);
194 vst1q_f32(angle + i, vmulq_f32(a, scale4));
198 for( ; i < len; i++ )
200 float x = X[i], y = Y[i];
201 float ax = std::abs(x), ay = std::abs(y);
205 c = ay/(ax + (float)DBL_EPSILON);
207 a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
211 c = ax/(ay + (float)DBL_EPSILON);
213 a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
219 angle[i] = (float)(a*scale);
224 /* ************************************************************************** *\
225 Fast cube root by Ken Turkowski
226 (http://www.worldserver.com/turk/computergraphics/papers.html)
227 \* ************************************************************************** */
228 float cubeRoot( float value )
236 ix = v.i & 0x7fffffff;
237 s = v.i & 0x80000000;
238 ex = (ix >> 23) - 127;
240 shx -= shx >= 0 ? 3 : 0;
241 ex = (ex - shx) / 3; /* exponent of cube root */
242 v.i = (ix & ((1<<23)-1)) | ((shx + 127)<<23);
245 /* 0.125 <= fr < 1.0 */
246 /* Use quartic rational polynomial with error < 2^(-24) */
247 fr = (float)(((((45.2548339756803022511987494 * fr +
248 192.2798368355061050458134625) * fr +
249 119.1654824285581628956914143) * fr +
250 13.43250139086239872172837314) * fr +
251 0.1636161226585754240958355063)/
252 ((((14.80884093219134573786480845 * fr +
253 151.9714051044435648658557668) * fr +
254 168.5254414101568283957668343) * fr +
255 33.9905941350215598754191872) * fr +
258 /* fr *= 2^ex * sign */
261 v.i = (v.i + (ex << 23) + s) & (m.i*2 != 0 ? -1 : 0);
265 static void Magnitude_32f(const float* x, const float* y, float* mag, int len)
267 #if defined HAVE_IPP && 0
270 IppStatus status = ippsMagnitude_32f(x, y, mag, len);
273 CV_IMPL_ADD(CV_IMPL_IPP);
285 for( ; i <= len - 8; i += 8 )
287 __m128 x0 = _mm_loadu_ps(x + i), x1 = _mm_loadu_ps(x + i + 4);
288 __m128 y0 = _mm_loadu_ps(y + i), y1 = _mm_loadu_ps(y + i + 4);
289 x0 = _mm_add_ps(_mm_mul_ps(x0, x0), _mm_mul_ps(y0, y0));
290 x1 = _mm_add_ps(_mm_mul_ps(x1, x1), _mm_mul_ps(y1, y1));
291 x0 = _mm_sqrt_ps(x0); x1 = _mm_sqrt_ps(x1);
292 _mm_storeu_ps(mag + i, x0); _mm_storeu_ps(mag + i + 4, x1);
296 for( ; i <= len - 4; i += 4 )
298 float32x4_t v_x = vld1q_f32(x + i), v_y = vld1q_f32(y + i);
299 vst1q_f32(mag + i, cv_vsqrtq_f32(vmlaq_f32(vmulq_f32(v_x, v_x), v_y, v_y)));
301 for( ; i <= len - 2; i += 2 )
303 float32x2_t v_x = vld1_f32(x + i), v_y = vld1_f32(y + i);
304 vst1_f32(mag + i, cv_vsqrt_f32(vmla_f32(vmul_f32(v_x, v_x), v_y, v_y)));
308 for( ; i < len; i++ )
310 float x0 = x[i], y0 = y[i];
311 mag[i] = std::sqrt(x0*x0 + y0*y0);
315 static void Magnitude_64f(const double* x, const double* y, double* mag, int len)
317 #if defined(HAVE_IPP)
320 IppStatus status = ippsMagnitude_64f(x, y, mag, len);
323 CV_IMPL_ADD(CV_IMPL_IPP);
335 for( ; i <= len - 4; i += 4 )
337 __m128d x0 = _mm_loadu_pd(x + i), x1 = _mm_loadu_pd(x + i + 2);
338 __m128d y0 = _mm_loadu_pd(y + i), y1 = _mm_loadu_pd(y + i + 2);
339 x0 = _mm_add_pd(_mm_mul_pd(x0, x0), _mm_mul_pd(y0, y0));
340 x1 = _mm_add_pd(_mm_mul_pd(x1, x1), _mm_mul_pd(y1, y1));
341 x0 = _mm_sqrt_pd(x0); x1 = _mm_sqrt_pd(x1);
342 _mm_storeu_pd(mag + i, x0); _mm_storeu_pd(mag + i + 2, x1);
347 for( ; i < len; i++ )
349 double x0 = x[i], y0 = y[i];
350 mag[i] = std::sqrt(x0*x0 + y0*y0);
355 static void InvSqrt_32f(const float* src, float* dst, int len)
357 #if defined(HAVE_IPP)
360 if (ippsInvSqrt_32f_A21(src, dst, len) >= 0)
362 CV_IMPL_ADD(CV_IMPL_IPP);
374 __m128 _0_5 = _mm_set1_ps(0.5f), _1_5 = _mm_set1_ps(1.5f);
375 if( (((size_t)src|(size_t)dst) & 15) == 0 )
376 for( ; i <= len - 8; i += 8 )
378 __m128 t0 = _mm_load_ps(src + i), t1 = _mm_load_ps(src + i + 4);
379 __m128 h0 = _mm_mul_ps(t0, _0_5), h1 = _mm_mul_ps(t1, _0_5);
380 t0 = _mm_rsqrt_ps(t0); t1 = _mm_rsqrt_ps(t1);
381 t0 = _mm_mul_ps(t0, _mm_sub_ps(_1_5, _mm_mul_ps(_mm_mul_ps(t0,t0),h0)));
382 t1 = _mm_mul_ps(t1, _mm_sub_ps(_1_5, _mm_mul_ps(_mm_mul_ps(t1,t1),h1)));
383 _mm_store_ps(dst + i, t0); _mm_store_ps(dst + i + 4, t1);
386 for( ; i <= len - 8; i += 8 )
388 __m128 t0 = _mm_loadu_ps(src + i), t1 = _mm_loadu_ps(src + i + 4);
389 __m128 h0 = _mm_mul_ps(t0, _0_5), h1 = _mm_mul_ps(t1, _0_5);
390 t0 = _mm_rsqrt_ps(t0); t1 = _mm_rsqrt_ps(t1);
391 t0 = _mm_mul_ps(t0, _mm_sub_ps(_1_5, _mm_mul_ps(_mm_mul_ps(t0,t0),h0)));
392 t1 = _mm_mul_ps(t1, _mm_sub_ps(_1_5, _mm_mul_ps(_mm_mul_ps(t1,t1),h1)));
393 _mm_storeu_ps(dst + i, t0); _mm_storeu_ps(dst + i + 4, t1);
397 for ( ; i <= len - 8; i += 8)
399 vst1q_f32(dst + i, cv_vrsqrtq_f32(vld1q_f32(src + i)));
400 vst1q_f32(dst + i + 4, cv_vrsqrtq_f32(vld1q_f32(src + i + 4)));
404 for( ; i < len; i++ )
405 dst[i] = 1/std::sqrt(src[i]);
409 static void InvSqrt_64f(const double* src, double* dst, int len)
416 __m128d v_1 = _mm_set1_pd(1.0);
417 for ( ; i <= len - 2; i += 2)
418 _mm_storeu_pd(dst + i, _mm_div_pd(v_1, _mm_sqrt_pd(_mm_loadu_pd(src + i))));
422 for( ; i < len; i++ )
423 dst[i] = 1/std::sqrt(src[i]);
427 static void Sqrt_32f(const float* src, float* dst, int len)
429 #if defined(HAVE_IPP)
432 if (ippsSqrt_32f_A21(src, dst, len) >= 0)
434 CV_IMPL_ADD(CV_IMPL_IPP);
445 if( (((size_t)src|(size_t)dst) & 15) == 0 )
446 for( ; i <= len - 8; i += 8 )
448 __m128 t0 = _mm_load_ps(src + i), t1 = _mm_load_ps(src + i + 4);
449 t0 = _mm_sqrt_ps(t0); t1 = _mm_sqrt_ps(t1);
450 _mm_store_ps(dst + i, t0); _mm_store_ps(dst + i + 4, t1);
453 for( ; i <= len - 8; i += 8 )
455 __m128 t0 = _mm_loadu_ps(src + i), t1 = _mm_loadu_ps(src + i + 4);
456 t0 = _mm_sqrt_ps(t0); t1 = _mm_sqrt_ps(t1);
457 _mm_storeu_ps(dst + i, t0); _mm_storeu_ps(dst + i + 4, t1);
461 for ( ; i <= len - 8; i += 8)
463 vst1q_f32(dst + i, cv_vsqrtq_f32(vld1q_f32(src + i)));
464 vst1q_f32(dst + i + 4, cv_vsqrtq_f32(vld1q_f32(src + i + 4)));
468 for( ; i < len; i++ )
469 dst[i] = std::sqrt(src[i]);
473 static void Sqrt_64f(const double* src, double* dst, int len)
475 #if defined(HAVE_IPP)
478 if (ippsSqrt_64f_A50(src, dst, len) >= 0)
480 CV_IMPL_ADD(CV_IMPL_IPP);
492 if( (((size_t)src|(size_t)dst) & 15) == 0 )
493 for( ; i <= len - 4; i += 4 )
495 __m128d t0 = _mm_load_pd(src + i), t1 = _mm_load_pd(src + i + 2);
496 t0 = _mm_sqrt_pd(t0); t1 = _mm_sqrt_pd(t1);
497 _mm_store_pd(dst + i, t0); _mm_store_pd(dst + i + 2, t1);
500 for( ; i <= len - 4; i += 4 )
502 __m128d t0 = _mm_loadu_pd(src + i), t1 = _mm_loadu_pd(src + i + 2);
503 t0 = _mm_sqrt_pd(t0); t1 = _mm_sqrt_pd(t1);
504 _mm_storeu_pd(dst + i, t0); _mm_storeu_pd(dst + i + 2, t1);
509 for( ; i < len; i++ )
510 dst[i] = std::sqrt(src[i]);
514 /****************************************************************************************\
515 * Cartezian -> Polar *
516 \****************************************************************************************/
518 void magnitude( InputArray src1, InputArray src2, OutputArray dst )
520 int type = src1.type(), depth = src1.depth(), cn = src1.channels();
521 CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F));
523 CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2,
524 ocl_math_op(src1, src2, dst, OCL_OP_MAG))
526 Mat X = src1.getMat(), Y = src2.getMat();
527 dst.create(X.dims, X.size, X.type());
528 Mat Mag = dst.getMat();
530 const Mat* arrays[] = {&X, &Y, &Mag, 0};
532 NAryMatIterator it(arrays, ptrs);
533 int len = (int)it.size*cn;
535 for( size_t i = 0; i < it.nplanes; i++, ++it )
537 if( depth == CV_32F )
539 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
540 float *mag = (float*)ptrs[2];
541 Magnitude_32f( x, y, mag, len );
545 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
546 double *mag = (double*)ptrs[2];
547 Magnitude_64f( x, y, mag, len );
553 void phase( InputArray src1, InputArray src2, OutputArray dst, bool angleInDegrees )
555 int type = src1.type(), depth = src1.depth(), cn = src1.channels();
556 CV_Assert( src1.size() == src2.size() && type == src2.type() && (depth == CV_32F || depth == CV_64F));
558 CV_OCL_RUN(dst.isUMat() && src1.dims() <= 2 && src2.dims() <= 2,
559 ocl_math_op(src1, src2, dst, angleInDegrees ? OCL_OP_PHASE_DEGREES : OCL_OP_PHASE_RADIANS))
561 Mat X = src1.getMat(), Y = src2.getMat();
562 dst.create( X.dims, X.size, type );
563 Mat Angle = dst.getMat();
565 const Mat* arrays[] = {&X, &Y, &Angle, 0};
567 NAryMatIterator it(arrays, ptrs);
568 cv::AutoBuffer<float> _buf;
569 float* buf[2] = {0, 0};
570 int j, k, total = (int)(it.size*cn), blockSize = total;
571 size_t esz1 = X.elemSize1();
573 if( depth == CV_64F )
575 blockSize = std::min(blockSize, ((BLOCK_SIZE+cn-1)/cn)*cn);
576 _buf.allocate(blockSize*2);
578 buf[1] = buf[0] + blockSize;
581 for( size_t i = 0; i < it.nplanes; i++, ++it )
583 for( j = 0; j < total; j += blockSize )
585 int len = std::min(total - j, blockSize);
586 if( depth == CV_32F )
588 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
589 float *angle = (float*)ptrs[2];
590 FastAtan2_32f( y, x, angle, len, angleInDegrees );
594 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
595 double *angle = (double*)ptrs[2];
596 for( k = 0; k < len; k++ )
598 buf[0][k] = (float)x[k];
599 buf[1][k] = (float)y[k];
602 FastAtan2_32f( buf[1], buf[0], buf[0], len, angleInDegrees );
603 for( k = 0; k < len; k++ )
604 angle[k] = buf[0][k];
615 static bool ocl_cartToPolar( InputArray _src1, InputArray _src2,
616 OutputArray _dst1, OutputArray _dst2, bool angleInDegrees )
618 const ocl::Device & d = ocl::Device::getDefault();
619 int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
620 rowsPerWI = d.isIntel() ? 4 : 1;
621 bool doubleSupport = d.doubleFPConfig() > 0;
623 if ( !(_src1.dims() <= 2 && _src2.dims() <= 2 &&
624 (depth == CV_32F || depth == CV_64F) && type == _src2.type()) ||
625 (depth == CV_64F && !doubleSupport) )
628 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
629 format("-D BINARY_OP -D dstT=%s -D depth=%d -D rowsPerWI=%d -D OP_CTP_%s%s",
630 ocl::typeToStr(CV_MAKE_TYPE(depth, 1)),
631 depth, rowsPerWI, angleInDegrees ? "AD" : "AR",
632 doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
636 UMat src1 = _src1.getUMat(), src2 = _src2.getUMat();
637 Size size = src1.size();
638 CV_Assert( size == src2.size() );
640 _dst1.create(size, type);
641 _dst2.create(size, type);
642 UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat();
644 k.args(ocl::KernelArg::ReadOnlyNoSize(src1),
645 ocl::KernelArg::ReadOnlyNoSize(src2),
646 ocl::KernelArg::WriteOnly(dst1, cn),
647 ocl::KernelArg::WriteOnlyNoSize(dst2));
649 size_t globalsize[2] = { dst1.cols * cn, (dst1.rows + rowsPerWI - 1) / rowsPerWI };
650 return k.run(2, globalsize, NULL, false);
655 void cartToPolar( InputArray src1, InputArray src2,
656 OutputArray dst1, OutputArray dst2, bool angleInDegrees )
658 CV_OCL_RUN(dst1.isUMat() && dst2.isUMat(),
659 ocl_cartToPolar(src1, src2, dst1, dst2, angleInDegrees))
661 Mat X = src1.getMat(), Y = src2.getMat();
662 int type = X.type(), depth = X.depth(), cn = X.channels();
663 CV_Assert( X.size == Y.size && type == Y.type() && (depth == CV_32F || depth == CV_64F));
664 dst1.create( X.dims, X.size, type );
665 dst2.create( X.dims, X.size, type );
666 Mat Mag = dst1.getMat(), Angle = dst2.getMat();
668 const Mat* arrays[] = {&X, &Y, &Mag, &Angle, 0};
670 NAryMatIterator it(arrays, ptrs);
671 cv::AutoBuffer<float> _buf;
672 float* buf[2] = {0, 0};
673 int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
674 size_t esz1 = X.elemSize1();
676 if( depth == CV_64F )
678 _buf.allocate(blockSize*2);
680 buf[1] = buf[0] + blockSize;
683 for( size_t i = 0; i < it.nplanes; i++, ++it )
685 for( j = 0; j < total; j += blockSize )
687 int len = std::min(total - j, blockSize);
688 if( depth == CV_32F )
690 const float *x = (const float*)ptrs[0], *y = (const float*)ptrs[1];
691 float *mag = (float*)ptrs[2], *angle = (float*)ptrs[3];
692 Magnitude_32f( x, y, mag, len );
693 FastAtan2_32f( y, x, angle, len, angleInDegrees );
697 const double *x = (const double*)ptrs[0], *y = (const double*)ptrs[1];
698 double *angle = (double*)ptrs[3];
700 Magnitude_64f(x, y, (double*)ptrs[2], len);
701 for( k = 0; k < len; k++ )
703 buf[0][k] = (float)x[k];
704 buf[1][k] = (float)y[k];
707 FastAtan2_32f( buf[1], buf[0], buf[0], len, angleInDegrees );
708 for( k = 0; k < len; k++ )
709 angle[k] = buf[0][k];
720 /****************************************************************************************\
721 * Polar -> Cartezian *
722 \****************************************************************************************/
724 static void SinCos_32f( const float *angle, float *sinval, float* cosval,
725 int len, int angle_in_degrees )
729 static const double sin_table[] =
731 0.00000000000000000000, 0.09801714032956060400,
732 0.19509032201612825000, 0.29028467725446233000,
733 0.38268343236508978000, 0.47139673682599764000,
734 0.55557023301960218000, 0.63439328416364549000,
735 0.70710678118654746000, 0.77301045336273699000,
736 0.83146961230254524000, 0.88192126434835494000,
737 0.92387953251128674000, 0.95694033573220894000,
738 0.98078528040323043000, 0.99518472667219682000,
739 1.00000000000000000000, 0.99518472667219693000,
740 0.98078528040323043000, 0.95694033573220894000,
741 0.92387953251128674000, 0.88192126434835505000,
742 0.83146961230254546000, 0.77301045336273710000,
743 0.70710678118654757000, 0.63439328416364549000,
744 0.55557023301960218000, 0.47139673682599786000,
745 0.38268343236508989000, 0.29028467725446239000,
746 0.19509032201612861000, 0.09801714032956082600,
747 0.00000000000000012246, -0.09801714032956059000,
748 -0.19509032201612836000, -0.29028467725446211000,
749 -0.38268343236508967000, -0.47139673682599764000,
750 -0.55557023301960196000, -0.63439328416364527000,
751 -0.70710678118654746000, -0.77301045336273666000,
752 -0.83146961230254524000, -0.88192126434835494000,
753 -0.92387953251128652000, -0.95694033573220882000,
754 -0.98078528040323032000, -0.99518472667219693000,
755 -1.00000000000000000000, -0.99518472667219693000,
756 -0.98078528040323043000, -0.95694033573220894000,
757 -0.92387953251128663000, -0.88192126434835505000,
758 -0.83146961230254546000, -0.77301045336273688000,
759 -0.70710678118654768000, -0.63439328416364593000,
760 -0.55557023301960218000, -0.47139673682599792000,
761 -0.38268343236509039000, -0.29028467725446250000,
762 -0.19509032201612872000, -0.09801714032956050600,
765 static const double k2 = (2*CV_PI)/N;
767 static const double sin_a0 = -0.166630293345647*k2*k2*k2;
768 static const double sin_a2 = k2;
770 static const double cos_a0 = -0.499818138450326*k2*k2;
771 /*static const double cos_a2 = 1;*/
776 if( !angle_in_degrees )
781 for( i = 0; i < len; i++ )
783 double t = angle[i]*k1;
786 int sin_idx = it & (N - 1);
787 int cos_idx = (N/4 - sin_idx) & (N - 1);
789 double sin_b = (sin_a0*t*t + sin_a2)*t;
790 double cos_b = cos_a0*t*t + 1;
792 double sin_a = sin_table[sin_idx];
793 double cos_a = sin_table[cos_idx];
795 double sin_val = sin_a*cos_b + cos_a*sin_b;
796 double cos_val = cos_a*cos_b - sin_a*sin_b;
798 sinval[i] = (float)sin_val;
799 cosval[i] = (float)cos_val;
806 static bool ocl_polarToCart( InputArray _mag, InputArray _angle,
807 OutputArray _dst1, OutputArray _dst2, bool angleInDegrees )
809 const ocl::Device & d = ocl::Device::getDefault();
810 int type = _angle.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
811 rowsPerWI = d.isIntel() ? 4 : 1;
812 bool doubleSupport = d.doubleFPConfig() > 0;
814 if ( !doubleSupport && depth == CV_64F )
817 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
818 format("-D dstT=%s -D rowsPerWI=%d -D depth=%d -D BINARY_OP -D OP_PTC_%s%s",
819 ocl::typeToStr(CV_MAKE_TYPE(depth, 1)), rowsPerWI,
820 depth, angleInDegrees ? "AD" : "AR",
821 doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
825 UMat mag = _mag.getUMat(), angle = _angle.getUMat();
826 Size size = angle.size();
827 CV_Assert(mag.size() == size);
829 _dst1.create(size, type);
830 _dst2.create(size, type);
831 UMat dst1 = _dst1.getUMat(), dst2 = _dst2.getUMat();
833 k.args(ocl::KernelArg::ReadOnlyNoSize(mag), ocl::KernelArg::ReadOnlyNoSize(angle),
834 ocl::KernelArg::WriteOnly(dst1, cn), ocl::KernelArg::WriteOnlyNoSize(dst2));
836 size_t globalsize[2] = { dst1.cols * cn, (dst1.rows + rowsPerWI - 1) / rowsPerWI };
837 return k.run(2, globalsize, NULL, false);
842 void polarToCart( InputArray src1, InputArray src2,
843 OutputArray dst1, OutputArray dst2, bool angleInDegrees )
845 int type = src2.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
846 CV_Assert((depth == CV_32F || depth == CV_64F) && (src1.empty() || src1.type() == type));
848 CV_OCL_RUN(!src1.empty() && src2.dims() <= 2 && dst1.isUMat() && dst2.isUMat(),
849 ocl_polarToCart(src1, src2, dst1, dst2, angleInDegrees))
851 Mat Mag = src1.getMat(), Angle = src2.getMat();
852 CV_Assert( Mag.empty() || Angle.size == Mag.size);
853 dst1.create( Angle.dims, Angle.size, type );
854 dst2.create( Angle.dims, Angle.size, type );
855 Mat X = dst1.getMat(), Y = dst2.getMat();
857 #if defined(HAVE_IPP)
860 if (Mag.isContinuous() && Angle.isContinuous() && X.isContinuous() && Y.isContinuous() && !angleInDegrees)
862 typedef IppStatus (CV_STDCALL * ippsPolarToCart)(const void * pSrcMagn, const void * pSrcPhase,
863 void * pDstRe, void * pDstIm, int len);
864 ippsPolarToCart ippFunc =
865 depth == CV_32F ? (ippsPolarToCart)ippsPolarToCart_32f :
866 depth == CV_64F ? (ippsPolarToCart)ippsPolarToCart_64f : 0;
867 CV_Assert(ippFunc != 0);
869 IppStatus status = ippFunc(Mag.ptr(), Angle.ptr(), X.ptr(), Y.ptr(), static_cast<int>(cn * X.total()));
872 CV_IMPL_ADD(CV_IMPL_IPP);
880 const Mat* arrays[] = {&Mag, &Angle, &X, &Y, 0};
882 NAryMatIterator it(arrays, ptrs);
883 cv::AutoBuffer<float> _buf;
884 float* buf[2] = {0, 0};
885 int j, k, total = (int)(it.size*cn), blockSize = std::min(total, ((BLOCK_SIZE+cn-1)/cn)*cn);
886 size_t esz1 = Angle.elemSize1();
888 if( depth == CV_64F )
890 _buf.allocate(blockSize*2);
892 buf[1] = buf[0] + blockSize;
895 for( size_t i = 0; i < it.nplanes; i++, ++it )
897 for( j = 0; j < total; j += blockSize )
899 int len = std::min(total - j, blockSize);
900 if( depth == CV_32F )
902 const float *mag = (const float*)ptrs[0], *angle = (const float*)ptrs[1];
903 float *x = (float*)ptrs[2], *y = (float*)ptrs[3];
905 SinCos_32f( angle, y, x, len, angleInDegrees );
911 for( ; k <= len - 4; k += 4 )
913 float32x4_t v_m = vld1q_f32(mag + k);
914 vst1q_f32(x + k, vmulq_f32(vld1q_f32(x + k), v_m));
915 vst1q_f32(y + k, vmulq_f32(vld1q_f32(y + k), v_m));
919 for( ; k < len; k++ )
922 x[k] *= m; y[k] *= m;
928 const double *mag = (const double*)ptrs[0], *angle = (const double*)ptrs[1];
929 double *x = (double*)ptrs[2], *y = (double*)ptrs[3];
931 for( k = 0; k < len; k++ )
932 buf[0][k] = (float)angle[k];
934 SinCos_32f( buf[0], buf[1], buf[0], len, angleInDegrees );
936 for( k = 0; k < len; k++ )
939 x[k] = buf[0][k]*m; y[k] = buf[1][k]*m;
942 for( k = 0; k < len; k++ )
944 x[k] = buf[0][k]; y[k] = buf[1][k];
957 /****************************************************************************************\
959 \****************************************************************************************/
964 #if ( defined( WORDS_BIGENDIAN ) && !defined( OPENCV_UNIVERSAL_BUILD ) ) || defined( __BIG_ENDIAN__ )
976 #define EXPTAB_SCALE 6
977 #define EXPTAB_MASK ((1 << EXPTAB_SCALE) - 1)
979 #define EXPPOLY_32F_A0 .9670371139572337719125840413672004409288e-2
981 static const double expTab[] = {
982 1.0 * EXPPOLY_32F_A0,
983 1.0108892860517004600204097905619 * EXPPOLY_32F_A0,
984 1.0218971486541166782344801347833 * EXPPOLY_32F_A0,
985 1.0330248790212284225001082839705 * EXPPOLY_32F_A0,
986 1.0442737824274138403219664787399 * EXPPOLY_32F_A0,
987 1.0556451783605571588083413251529 * EXPPOLY_32F_A0,
988 1.0671404006768236181695211209928 * EXPPOLY_32F_A0,
989 1.0787607977571197937406800374385 * EXPPOLY_32F_A0,
990 1.0905077326652576592070106557607 * EXPPOLY_32F_A0,
991 1.1023825833078409435564142094256 * EXPPOLY_32F_A0,
992 1.1143867425958925363088129569196 * EXPPOLY_32F_A0,
993 1.126521618608241899794798643787 * EXPPOLY_32F_A0,
994 1.1387886347566916537038302838415 * EXPPOLY_32F_A0,
995 1.151189229952982705817759635202 * EXPPOLY_32F_A0,
996 1.1637248587775775138135735990922 * EXPPOLY_32F_A0,
997 1.1763969916502812762846457284838 * EXPPOLY_32F_A0,
998 1.1892071150027210667174999705605 * EXPPOLY_32F_A0,
999 1.2021567314527031420963969574978 * EXPPOLY_32F_A0,
1000 1.2152473599804688781165202513388 * EXPPOLY_32F_A0,
1001 1.2284805361068700056940089577928 * EXPPOLY_32F_A0,
1002 1.2418578120734840485936774687266 * EXPPOLY_32F_A0,
1003 1.2553807570246910895793906574423 * EXPPOLY_32F_A0,
1004 1.2690509571917332225544190810323 * EXPPOLY_32F_A0,
1005 1.2828700160787782807266697810215 * EXPPOLY_32F_A0,
1006 1.2968395546510096659337541177925 * EXPPOLY_32F_A0,
1007 1.3109612115247643419229917863308 * EXPPOLY_32F_A0,
1008 1.3252366431597412946295370954987 * EXPPOLY_32F_A0,
1009 1.3396675240533030053600306697244 * EXPPOLY_32F_A0,
1010 1.3542555469368927282980147401407 * EXPPOLY_32F_A0,
1011 1.3690024229745906119296011329822 * EXPPOLY_32F_A0,
1012 1.3839098819638319548726595272652 * EXPPOLY_32F_A0,
1013 1.3989796725383111402095281367152 * EXPPOLY_32F_A0,
1014 1.4142135623730950488016887242097 * EXPPOLY_32F_A0,
1015 1.4296133383919700112350657782751 * EXPPOLY_32F_A0,
1016 1.4451808069770466200370062414717 * EXPPOLY_32F_A0,
1017 1.4609177941806469886513028903106 * EXPPOLY_32F_A0,
1018 1.476826145939499311386907480374 * EXPPOLY_32F_A0,
1019 1.4929077282912648492006435314867 * EXPPOLY_32F_A0,
1020 1.5091644275934227397660195510332 * EXPPOLY_32F_A0,
1021 1.5255981507445383068512536895169 * EXPPOLY_32F_A0,
1022 1.5422108254079408236122918620907 * EXPPOLY_32F_A0,
1023 1.5590044002378369670337280894749 * EXPPOLY_32F_A0,
1024 1.5759808451078864864552701601819 * EXPPOLY_32F_A0,
1025 1.5931421513422668979372486431191 * EXPPOLY_32F_A0,
1026 1.6104903319492543081795206673574 * EXPPOLY_32F_A0,
1027 1.628027421857347766848218522014 * EXPPOLY_32F_A0,
1028 1.6457554781539648445187567247258 * EXPPOLY_32F_A0,
1029 1.6636765803267364350463364569764 * EXPPOLY_32F_A0,
1030 1.6817928305074290860622509524664 * EXPPOLY_32F_A0,
1031 1.7001063537185234695013625734975 * EXPPOLY_32F_A0,
1032 1.7186192981224779156293443764563 * EXPPOLY_32F_A0,
1033 1.7373338352737062489942020818722 * EXPPOLY_32F_A0,
1034 1.7562521603732994831121606193753 * EXPPOLY_32F_A0,
1035 1.7753764925265212525505592001993 * EXPPOLY_32F_A0,
1036 1.7947090750031071864277032421278 * EXPPOLY_32F_A0,
1037 1.8142521755003987562498346003623 * EXPPOLY_32F_A0,
1038 1.8340080864093424634870831895883 * EXPPOLY_32F_A0,
1039 1.8539791250833855683924530703377 * EXPPOLY_32F_A0,
1040 1.8741676341102999013299989499544 * EXPPOLY_32F_A0,
1041 1.8945759815869656413402186534269 * EXPPOLY_32F_A0,
1042 1.9152065613971472938726112702958 * EXPPOLY_32F_A0,
1043 1.9360617934922944505980559045667 * EXPPOLY_32F_A0,
1044 1.9571441241754002690183222516269 * EXPPOLY_32F_A0,
1045 1.9784560263879509682582499181312 * EXPPOLY_32F_A0,
1049 // the code below uses _mm_cast* intrinsics, which are not avialable on VS2005
1050 #if (defined _MSC_VER && _MSC_VER < 1500) || \
1051 (!defined __APPLE__ && defined __GNUC__ && __GNUC__*100 + __GNUC_MINOR__ < 402)
1056 static const double exp_prescale = 1.4426950408889634073599246810019 * (1 << EXPTAB_SCALE);
1057 static const double exp_postscale = 1./(1 << EXPTAB_SCALE);
1058 static const double exp_max_val = 3000.*(1 << EXPTAB_SCALE); // log10(DBL_MAX) < 3000
1060 static void Exp_32f( const float *_x, float *y, int n )
1063 A4 = (float)(1.000000000000002438532970795181890933776 / EXPPOLY_32F_A0),
1064 A3 = (float)(.6931471805521448196800669615864773144641 / EXPPOLY_32F_A0),
1065 A2 = (float)(.2402265109513301490103372422686535526573 / EXPPOLY_32F_A0),
1066 A1 = (float)(.5550339366753125211915322047004666939128e-1 / EXPPOLY_32F_A0);
1069 #define EXPPOLY(x) \
1070 (((((x) + A1)*(x) + A2)*(x) + A3)*(x) + A4)
1073 const Cv32suf* x = (const Cv32suf*)_x;
1077 if( n >= 8 && USE_SSE2 )
1079 static const __m128d prescale2 = _mm_set1_pd(exp_prescale);
1080 static const __m128 postscale4 = _mm_set1_ps((float)exp_postscale);
1081 static const __m128 maxval4 = _mm_set1_ps((float)(exp_max_val/exp_prescale));
1082 static const __m128 minval4 = _mm_set1_ps((float)(-exp_max_val/exp_prescale));
1084 static const __m128 mA1 = _mm_set1_ps(A1);
1085 static const __m128 mA2 = _mm_set1_ps(A2);
1086 static const __m128 mA3 = _mm_set1_ps(A3);
1087 static const __m128 mA4 = _mm_set1_ps(A4);
1088 bool y_aligned = (size_t)(void*)y % 16 == 0;
1090 ushort CV_DECL_ALIGNED(16) tab_idx[8];
1092 for( ; i <= n - 8; i += 8 )
1095 xf0 = _mm_loadu_ps(&x[i].f);
1096 xf1 = _mm_loadu_ps(&x[i+4].f);
1097 __m128i xi0, xi1, xi2, xi3;
1099 xf0 = _mm_min_ps(_mm_max_ps(xf0, minval4), maxval4);
1100 xf1 = _mm_min_ps(_mm_max_ps(xf1, minval4), maxval4);
1102 __m128d xd0 = _mm_cvtps_pd(xf0);
1103 __m128d xd2 = _mm_cvtps_pd(_mm_movehl_ps(xf0, xf0));
1104 __m128d xd1 = _mm_cvtps_pd(xf1);
1105 __m128d xd3 = _mm_cvtps_pd(_mm_movehl_ps(xf1, xf1));
1107 xd0 = _mm_mul_pd(xd0, prescale2);
1108 xd2 = _mm_mul_pd(xd2, prescale2);
1109 xd1 = _mm_mul_pd(xd1, prescale2);
1110 xd3 = _mm_mul_pd(xd3, prescale2);
1112 xi0 = _mm_cvtpd_epi32(xd0);
1113 xi2 = _mm_cvtpd_epi32(xd2);
1115 xi1 = _mm_cvtpd_epi32(xd1);
1116 xi3 = _mm_cvtpd_epi32(xd3);
1118 xd0 = _mm_sub_pd(xd0, _mm_cvtepi32_pd(xi0));
1119 xd2 = _mm_sub_pd(xd2, _mm_cvtepi32_pd(xi2));
1120 xd1 = _mm_sub_pd(xd1, _mm_cvtepi32_pd(xi1));
1121 xd3 = _mm_sub_pd(xd3, _mm_cvtepi32_pd(xi3));
1123 xf0 = _mm_movelh_ps(_mm_cvtpd_ps(xd0), _mm_cvtpd_ps(xd2));
1124 xf1 = _mm_movelh_ps(_mm_cvtpd_ps(xd1), _mm_cvtpd_ps(xd3));
1126 xf0 = _mm_mul_ps(xf0, postscale4);
1127 xf1 = _mm_mul_ps(xf1, postscale4);
1129 xi0 = _mm_unpacklo_epi64(xi0, xi2);
1130 xi1 = _mm_unpacklo_epi64(xi1, xi3);
1131 xi0 = _mm_packs_epi32(xi0, xi1);
1133 _mm_store_si128((__m128i*)tab_idx, _mm_and_si128(xi0, _mm_set1_epi16(EXPTAB_MASK)));
1135 xi0 = _mm_add_epi16(_mm_srai_epi16(xi0, EXPTAB_SCALE), _mm_set1_epi16(127));
1136 xi0 = _mm_max_epi16(xi0, _mm_setzero_si128());
1137 xi0 = _mm_min_epi16(xi0, _mm_set1_epi16(255));
1138 xi1 = _mm_unpackhi_epi16(xi0, _mm_setzero_si128());
1139 xi0 = _mm_unpacklo_epi16(xi0, _mm_setzero_si128());
1141 __m128d yd0 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[0]), _mm_load_sd(expTab + tab_idx[1]));
1142 __m128d yd1 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[2]), _mm_load_sd(expTab + tab_idx[3]));
1143 __m128d yd2 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[4]), _mm_load_sd(expTab + tab_idx[5]));
1144 __m128d yd3 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[6]), _mm_load_sd(expTab + tab_idx[7]));
1146 __m128 yf0 = _mm_movelh_ps(_mm_cvtpd_ps(yd0), _mm_cvtpd_ps(yd1));
1147 __m128 yf1 = _mm_movelh_ps(_mm_cvtpd_ps(yd2), _mm_cvtpd_ps(yd3));
1149 yf0 = _mm_mul_ps(yf0, _mm_castsi128_ps(_mm_slli_epi32(xi0, 23)));
1150 yf1 = _mm_mul_ps(yf1, _mm_castsi128_ps(_mm_slli_epi32(xi1, 23)));
1152 __m128 zf0 = _mm_add_ps(xf0, mA1);
1153 __m128 zf1 = _mm_add_ps(xf1, mA1);
1155 zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA2);
1156 zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA2);
1158 zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA3);
1159 zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA3);
1161 zf0 = _mm_add_ps(_mm_mul_ps(zf0, xf0), mA4);
1162 zf1 = _mm_add_ps(_mm_mul_ps(zf1, xf1), mA4);
1164 zf0 = _mm_mul_ps(zf0, yf0);
1165 zf1 = _mm_mul_ps(zf1, yf1);
1169 _mm_store_ps(y + i, zf0);
1170 _mm_store_ps(y + i + 4, zf1);
1174 _mm_storeu_ps(y + i, zf0);
1175 _mm_storeu_ps(y + i + 4, zf1);
1181 for( ; i <= n - 4; i += 4 )
1183 double x0 = x[i].f * exp_prescale;
1184 double x1 = x[i + 1].f * exp_prescale;
1185 double x2 = x[i + 2].f * exp_prescale;
1186 double x3 = x[i + 3].f * exp_prescale;
1187 int val0, val1, val2, val3, t;
1189 if( ((x[i].i >> 23) & 255) > 127 + 10 )
1190 x0 = x[i].i < 0 ? -exp_max_val : exp_max_val;
1192 if( ((x[i+1].i >> 23) & 255) > 127 + 10 )
1193 x1 = x[i+1].i < 0 ? -exp_max_val : exp_max_val;
1195 if( ((x[i+2].i >> 23) & 255) > 127 + 10 )
1196 x2 = x[i+2].i < 0 ? -exp_max_val : exp_max_val;
1198 if( ((x[i+3].i >> 23) & 255) > 127 + 10 )
1199 x3 = x[i+3].i < 0 ? -exp_max_val : exp_max_val;
1206 x0 = (x0 - val0)*exp_postscale;
1207 x1 = (x1 - val1)*exp_postscale;
1208 x2 = (x2 - val2)*exp_postscale;
1209 x3 = (x3 - val3)*exp_postscale;
1211 t = (val0 >> EXPTAB_SCALE) + 127;
1212 t = !(t & ~255) ? t : t < 0 ? 0 : 255;
1215 t = (val1 >> EXPTAB_SCALE) + 127;
1216 t = !(t & ~255) ? t : t < 0 ? 0 : 255;
1219 t = (val2 >> EXPTAB_SCALE) + 127;
1220 t = !(t & ~255) ? t : t < 0 ? 0 : 255;
1223 t = (val3 >> EXPTAB_SCALE) + 127;
1224 t = !(t & ~255) ? t : t < 0 ? 0 : 255;
1227 x0 = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 );
1228 x1 = buf[1].f * expTab[val1 & EXPTAB_MASK] * EXPPOLY( x1 );
1231 y[i + 1] = (float)x1;
1233 x2 = buf[2].f * expTab[val2 & EXPTAB_MASK] * EXPPOLY( x2 );
1234 x3 = buf[3].f * expTab[val3 & EXPTAB_MASK] * EXPPOLY( x3 );
1236 y[i + 2] = (float)x2;
1237 y[i + 3] = (float)x3;
1242 double x0 = x[i].f * exp_prescale;
1245 if( ((x[i].i >> 23) & 255) > 127 + 10 )
1246 x0 = x[i].i < 0 ? -exp_max_val : exp_max_val;
1249 t = (val0 >> EXPTAB_SCALE) + 127;
1250 t = !(t & ~255) ? t : t < 0 ? 0 : 255;
1253 x0 = (x0 - val0)*exp_postscale;
1255 y[i] = (float)(buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY(x0));
1260 static void Exp_64f( const double *_x, double *y, int n )
1263 A5 = .99999999999999999998285227504999 / EXPPOLY_32F_A0,
1264 A4 = .69314718055994546743029643825322 / EXPPOLY_32F_A0,
1265 A3 = .24022650695886477918181338054308 / EXPPOLY_32F_A0,
1266 A2 = .55504108793649567998466049042729e-1 / EXPPOLY_32F_A0,
1267 A1 = .96180973140732918010002372686186e-2 / EXPPOLY_32F_A0,
1268 A0 = .13369713757180123244806654839424e-2 / EXPPOLY_32F_A0;
1271 #define EXPPOLY(x) (((((A0*(x) + A1)*(x) + A2)*(x) + A3)*(x) + A4)*(x) + A5)
1275 const Cv64suf* x = (const Cv64suf*)_x;
1280 static const __m128d prescale2 = _mm_set1_pd(exp_prescale);
1281 static const __m128d postscale2 = _mm_set1_pd(exp_postscale);
1282 static const __m128d maxval2 = _mm_set1_pd(exp_max_val);
1283 static const __m128d minval2 = _mm_set1_pd(-exp_max_val);
1285 static const __m128d mA0 = _mm_set1_pd(A0);
1286 static const __m128d mA1 = _mm_set1_pd(A1);
1287 static const __m128d mA2 = _mm_set1_pd(A2);
1288 static const __m128d mA3 = _mm_set1_pd(A3);
1289 static const __m128d mA4 = _mm_set1_pd(A4);
1290 static const __m128d mA5 = _mm_set1_pd(A5);
1292 int CV_DECL_ALIGNED(16) tab_idx[4];
1294 for( ; i <= n - 4; i += 4 )
1296 __m128d xf0 = _mm_loadu_pd(&x[i].f), xf1 = _mm_loadu_pd(&x[i+2].f);
1298 xf0 = _mm_min_pd(_mm_max_pd(xf0, minval2), maxval2);
1299 xf1 = _mm_min_pd(_mm_max_pd(xf1, minval2), maxval2);
1300 xf0 = _mm_mul_pd(xf0, prescale2);
1301 xf1 = _mm_mul_pd(xf1, prescale2);
1303 xi0 = _mm_cvtpd_epi32(xf0);
1304 xi1 = _mm_cvtpd_epi32(xf1);
1305 xf0 = _mm_mul_pd(_mm_sub_pd(xf0, _mm_cvtepi32_pd(xi0)), postscale2);
1306 xf1 = _mm_mul_pd(_mm_sub_pd(xf1, _mm_cvtepi32_pd(xi1)), postscale2);
1308 xi0 = _mm_unpacklo_epi64(xi0, xi1);
1309 _mm_store_si128((__m128i*)tab_idx, _mm_and_si128(xi0, _mm_set1_epi32(EXPTAB_MASK)));
1311 xi0 = _mm_add_epi32(_mm_srai_epi32(xi0, EXPTAB_SCALE), _mm_set1_epi32(1023));
1312 xi0 = _mm_packs_epi32(xi0, xi0);
1313 xi0 = _mm_max_epi16(xi0, _mm_setzero_si128());
1314 xi0 = _mm_min_epi16(xi0, _mm_set1_epi16(2047));
1315 xi0 = _mm_unpacklo_epi16(xi0, _mm_setzero_si128());
1316 xi1 = _mm_unpackhi_epi32(xi0, _mm_setzero_si128());
1317 xi0 = _mm_unpacklo_epi32(xi0, _mm_setzero_si128());
1319 __m128d yf0 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[0]), _mm_load_sd(expTab + tab_idx[1]));
1320 __m128d yf1 = _mm_unpacklo_pd(_mm_load_sd(expTab + tab_idx[2]), _mm_load_sd(expTab + tab_idx[3]));
1321 yf0 = _mm_mul_pd(yf0, _mm_castsi128_pd(_mm_slli_epi64(xi0, 52)));
1322 yf1 = _mm_mul_pd(yf1, _mm_castsi128_pd(_mm_slli_epi64(xi1, 52)));
1324 __m128d zf0 = _mm_add_pd(_mm_mul_pd(mA0, xf0), mA1);
1325 __m128d zf1 = _mm_add_pd(_mm_mul_pd(mA0, xf1), mA1);
1327 zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA2);
1328 zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA2);
1330 zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA3);
1331 zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA3);
1333 zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA4);
1334 zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA4);
1336 zf0 = _mm_add_pd(_mm_mul_pd(zf0, xf0), mA5);
1337 zf1 = _mm_add_pd(_mm_mul_pd(zf1, xf1), mA5);
1339 zf0 = _mm_mul_pd(zf0, yf0);
1340 zf1 = _mm_mul_pd(zf1, yf1);
1342 _mm_storeu_pd(y + i, zf0);
1343 _mm_storeu_pd(y + i + 2, zf1);
1348 for( ; i <= n - 4; i += 4 )
1350 double x0 = x[i].f * exp_prescale;
1351 double x1 = x[i + 1].f * exp_prescale;
1352 double x2 = x[i + 2].f * exp_prescale;
1353 double x3 = x[i + 3].f * exp_prescale;
1355 double y0, y1, y2, y3;
1356 int val0, val1, val2, val3, t;
1358 t = (int)(x[i].i >> 52);
1359 if( (t & 2047) > 1023 + 10 )
1360 x0 = t < 0 ? -exp_max_val : exp_max_val;
1362 t = (int)(x[i+1].i >> 52);
1363 if( (t & 2047) > 1023 + 10 )
1364 x1 = t < 0 ? -exp_max_val : exp_max_val;
1366 t = (int)(x[i+2].i >> 52);
1367 if( (t & 2047) > 1023 + 10 )
1368 x2 = t < 0 ? -exp_max_val : exp_max_val;
1370 t = (int)(x[i+3].i >> 52);
1371 if( (t & 2047) > 1023 + 10 )
1372 x3 = t < 0 ? -exp_max_val : exp_max_val;
1379 x0 = (x0 - val0)*exp_postscale;
1380 x1 = (x1 - val1)*exp_postscale;
1381 x2 = (x2 - val2)*exp_postscale;
1382 x3 = (x3 - val3)*exp_postscale;
1384 t = (val0 >> EXPTAB_SCALE) + 1023;
1385 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
1386 buf[0].i = (int64)t << 52;
1388 t = (val1 >> EXPTAB_SCALE) + 1023;
1389 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
1390 buf[1].i = (int64)t << 52;
1392 t = (val2 >> EXPTAB_SCALE) + 1023;
1393 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
1394 buf[2].i = (int64)t << 52;
1396 t = (val3 >> EXPTAB_SCALE) + 1023;
1397 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
1398 buf[3].i = (int64)t << 52;
1400 y0 = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 );
1401 y1 = buf[1].f * expTab[val1 & EXPTAB_MASK] * EXPPOLY( x1 );
1406 y2 = buf[2].f * expTab[val2 & EXPTAB_MASK] * EXPPOLY( x2 );
1407 y3 = buf[3].f * expTab[val3 & EXPTAB_MASK] * EXPPOLY( x3 );
1415 double x0 = x[i].f * exp_prescale;
1418 t = (int)(x[i].i >> 52);
1419 if( (t & 2047) > 1023 + 10 )
1420 x0 = t < 0 ? -exp_max_val : exp_max_val;
1423 t = (val0 >> EXPTAB_SCALE) + 1023;
1424 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
1426 buf[0].i = (int64)t << 52;
1427 x0 = (x0 - val0)*exp_postscale;
1429 y[i] = buf[0].f * expTab[val0 & EXPTAB_MASK] * EXPPOLY( x0 );
1435 #undef EXPPOLY_32F_A0
1438 static void Exp_32f_ipp(const float *x, float *y, int n)
1442 if (0 <= ippsExp_32f_A21(x, y, n))
1444 CV_IMPL_ADD(CV_IMPL_IPP);
1447 setIppErrorStatus();
1452 static void Exp_64f_ipp(const double *x, double *y, int n)
1456 if (0 <= ippsExp_64f_A50(x, y, n))
1458 CV_IMPL_ADD(CV_IMPL_IPP);
1461 setIppErrorStatus();
1466 #define Exp_32f Exp_32f_ipp
1467 #define Exp_64f Exp_64f_ipp
1471 void exp( InputArray _src, OutputArray _dst )
1473 int type = _src.type(), depth = _src.depth(), cn = _src.channels();
1474 CV_Assert( depth == CV_32F || depth == CV_64F );
1476 CV_OCL_RUN(_dst.isUMat() && _src.dims() <= 2,
1477 ocl_math_op(_src, noArray(), _dst, OCL_OP_EXP))
1479 Mat src = _src.getMat();
1480 _dst.create( src.dims, src.size, type );
1481 Mat dst = _dst.getMat();
1483 const Mat* arrays[] = {&src, &dst, 0};
1485 NAryMatIterator it(arrays, ptrs);
1486 int len = (int)(it.size*cn);
1488 for( size_t i = 0; i < it.nplanes; i++, ++it )
1490 if( depth == CV_32F )
1491 Exp_32f((const float*)ptrs[0], (float*)ptrs[1], len);
1493 Exp_64f((const double*)ptrs[0], (double*)ptrs[1], len);
1498 /****************************************************************************************\
1500 \****************************************************************************************/
1502 #define LOGTAB_SCALE 8
1503 #define LOGTAB_MASK ((1 << LOGTAB_SCALE) - 1)
1504 #define LOGTAB_MASK2 ((1 << (20 - LOGTAB_SCALE)) - 1)
1505 #define LOGTAB_MASK2_32F ((1 << (23 - LOGTAB_SCALE)) - 1)
1507 static const double CV_DECL_ALIGNED(16) icvLogTab[] = {
1508 0.0000000000000000000000000000000000000000, 1.000000000000000000000000000000000000000,
1509 .00389864041565732288852075271279318258166, .9961089494163424124513618677042801556420,
1510 .00778214044205494809292034119607706088573, .9922480620155038759689922480620155038760,
1511 .01165061721997527263705585198749759001657, .9884169884169884169884169884169884169884,
1512 .01550418653596525274396267235488267033361, .9846153846153846153846153846153846153846,
1513 .01934296284313093139406447562578250654042, .9808429118773946360153256704980842911877,
1514 .02316705928153437593630670221500622574241, .9770992366412213740458015267175572519084,
1515 .02697658769820207233514075539915211265906, .9733840304182509505703422053231939163498,
1516 .03077165866675368732785500469617545604706, .9696969696969696969696969696969696969697,
1517 .03455238150665972812758397481047722976656, .9660377358490566037735849056603773584906,
1518 .03831886430213659461285757856785494368522, .9624060150375939849624060150375939849624,
1519 .04207121392068705056921373852674150839447, .9588014981273408239700374531835205992509,
1520 .04580953603129420126371940114040626212953, .9552238805970149253731343283582089552239,
1521 .04953393512227662748292900118940451648088, .9516728624535315985130111524163568773234,
1522 .05324451451881227759255210685296333394944, .9481481481481481481481481481481481481481,
1523 .05694137640013842427411105973078520037234, .9446494464944649446494464944649446494465,
1524 .06062462181643483993820353816772694699466, .9411764705882352941176470588235294117647,
1525 .06429435070539725460836422143984236754475, .9377289377289377289377289377289377289377,
1526 .06795066190850773679699159401934593915938, .9343065693430656934306569343065693430657,
1527 .07159365318700880442825962290953611955044, .9309090909090909090909090909090909090909,
1528 .07522342123758751775142172846244648098944, .9275362318840579710144927536231884057971,
1529 .07884006170777602129362549021607264876369, .9241877256317689530685920577617328519856,
1530 .08244366921107458556772229485432035289706, .9208633093525179856115107913669064748201,
1531 .08603433734180314373940490213499288074675, .9175627240143369175627240143369175627240,
1532 .08961215868968712416897659522874164395031, .9142857142857142857142857142857142857143,
1533 .09317722485418328259854092721070628613231, .9110320284697508896797153024911032028470,
1534 .09672962645855109897752299730200320482256, .9078014184397163120567375886524822695035,
1535 .10026945316367513738597949668474029749630, .9045936395759717314487632508833922261484,
1536 .10379679368164355934833764649738441221420, .9014084507042253521126760563380281690141,
1537 .10731173578908805021914218968959175981580, .8982456140350877192982456140350877192982,
1538 .11081436634029011301105782649756292812530, .8951048951048951048951048951048951048951,
1539 .11430477128005862852422325204315711744130, .8919860627177700348432055749128919860627,
1540 .11778303565638344185817487641543266363440, .8888888888888888888888888888888888888889,
1541 .12124924363286967987640707633545389398930, .8858131487889273356401384083044982698962,
1542 .12470347850095722663787967121606925502420, .8827586206896551724137931034482758620690,
1543 .12814582269193003360996385708858724683530, .8797250859106529209621993127147766323024,
1544 .13157635778871926146571524895989568904040, .8767123287671232876712328767123287671233,
1545 .13499516453750481925766280255629681050780, .8737201365187713310580204778156996587031,
1546 .13840232285911913123754857224412262439730, .8707482993197278911564625850340136054422,
1547 .14179791186025733629172407290752744302150, .8677966101694915254237288135593220338983,
1548 .14518200984449788903951628071808954700830, .8648648648648648648648648648648648648649,
1549 .14855469432313711530824207329715136438610, .8619528619528619528619528619528619528620,
1550 .15191604202584196858794030049466527998450, .8590604026845637583892617449664429530201,
1551 .15526612891112392955683674244937719777230, .8561872909698996655518394648829431438127,
1552 .15860503017663857283636730244325008243330, .8533333333333333333333333333333333333333,
1553 .16193282026931324346641360989451641216880, .8504983388704318936877076411960132890365,
1554 .16524957289530714521497145597095368430010, .8476821192052980132450331125827814569536,
1555 .16855536102980664403538924034364754334090, .8448844884488448844884488448844884488449,
1556 .17185025692665920060697715143760433420540, .8421052631578947368421052631578947368421,
1557 .17513433212784912385018287750426679849630, .8393442622950819672131147540983606557377,
1558 .17840765747281828179637841458315961062910, .8366013071895424836601307189542483660131,
1559 .18167030310763465639212199675966985523700, .8338762214983713355048859934853420195440,
1560 .18492233849401198964024217730184318497780, .8311688311688311688311688311688311688312,
1561 .18816383241818296356839823602058459073300, .8284789644012944983818770226537216828479,
1562 .19139485299962943898322009772527962923050, .8258064516129032258064516129032258064516,
1563 .19461546769967164038916962454095482826240, .8231511254019292604501607717041800643087,
1564 .19782574332991986754137769821682013571260, .8205128205128205128205128205128205128205,
1565 .20102574606059073203390141770796617493040, .8178913738019169329073482428115015974441,
1566 .20421554142869088876999228432396193966280, .8152866242038216560509554140127388535032,
1567 .20739519434607056602715147164417430758480, .8126984126984126984126984126984126984127,
1568 .21056476910734961416338251183333341032260, .8101265822784810126582278481012658227848,
1569 .21372432939771812687723695489694364368910, .8075709779179810725552050473186119873817,
1570 .21687393830061435506806333251006435602900, .8050314465408805031446540880503144654088,
1571 .22001365830528207823135744547471404075630, .8025078369905956112852664576802507836991,
1572 .22314355131420973710199007200571941211830, .8000000000000000000000000000000000000000,
1573 .22626367865045338145790765338460914790630, .7975077881619937694704049844236760124611,
1574 .22937410106484582006380890106811420992010, .7950310559006211180124223602484472049689,
1575 .23247487874309405442296849741978803649550, .7925696594427244582043343653250773993808,
1576 .23556607131276688371634975283086532726890, .7901234567901234567901234567901234567901,
1577 .23864773785017498464178231643018079921600, .7876923076923076923076923076923076923077,
1578 .24171993688714515924331749374687206000090, .7852760736196319018404907975460122699387,
1579 .24478272641769091566565919038112042471760, .7828746177370030581039755351681957186544,
1580 .24783616390458124145723672882013488560910, .7804878048780487804878048780487804878049,
1581 .25088030628580937353433455427875742316250, .7781155015197568389057750759878419452888,
1582 .25391520998096339667426946107298135757450, .7757575757575757575757575757575757575758,
1583 .25694093089750041913887912414793390780680, .7734138972809667673716012084592145015106,
1584 .25995752443692604627401010475296061486000, .7710843373493975903614457831325301204819,
1585 .26296504550088134477547896494797896593800, .7687687687687687687687687687687687687688,
1586 .26596354849713793599974565040611196309330, .7664670658682634730538922155688622754491,
1587 .26895308734550393836570947314612567424780, .7641791044776119402985074626865671641791,
1588 .27193371548364175804834985683555714786050, .7619047619047619047619047619047619047619,
1589 .27490548587279922676529508862586226314300, .7596439169139465875370919881305637982196,
1590 .27786845100345625159121709657483734190480, .7573964497041420118343195266272189349112,
1591 .28082266290088775395616949026589281857030, .7551622418879056047197640117994100294985,
1592 .28376817313064456316240580235898960381750, .7529411764705882352941176470588235294118,
1593 .28670503280395426282112225635501090437180, .7507331378299120234604105571847507331378,
1594 .28963329258304265634293983566749375313530, .7485380116959064327485380116959064327485,
1595 .29255300268637740579436012922087684273730, .7463556851311953352769679300291545189504,
1596 .29546421289383584252163927885703742504130, .7441860465116279069767441860465116279070,
1597 .29836697255179722709783618483925238251680, .7420289855072463768115942028985507246377,
1598 .30126133057816173455023545102449133992200, .7398843930635838150289017341040462427746,
1599 .30414733546729666446850615102448500692850, .7377521613832853025936599423631123919308,
1600 .30702503529491181888388950937951449304830, .7356321839080459770114942528735632183908,
1601 .30989447772286465854207904158101882785550, .7335243553008595988538681948424068767908,
1602 .31275571000389684739317885942000430077330, .7314285714285714285714285714285714285714,
1603 .31560877898630329552176476681779604405180, .7293447293447293447293447293447293447293,
1604 .31845373111853458869546784626436419785030, .7272727272727272727272727272727272727273,
1605 .32129061245373424782201254856772720813750, .7252124645892351274787535410764872521246,
1606 .32411946865421192853773391107097268104550, .7231638418079096045197740112994350282486,
1607 .32694034499585328257253991068864706903700, .7211267605633802816901408450704225352113,
1608 .32975328637246797969240219572384376078850, .7191011235955056179775280898876404494382,
1609 .33255833730007655635318997155991382896900, .7170868347338935574229691876750700280112,
1610 .33535554192113781191153520921943709254280, .7150837988826815642458100558659217877095,
1611 .33814494400871636381467055798566434532400, .7130919220055710306406685236768802228412,
1612 .34092658697059319283795275623560883104800, .7111111111111111111111111111111111111111,
1613 .34370051385331840121395430287520866841080, .7091412742382271468144044321329639889197,
1614 .34646676734620857063262633346312213689100, .7071823204419889502762430939226519337017,
1615 .34922538978528827602332285096053965389730, .7052341597796143250688705234159779614325,
1616 .35197642315717814209818925519357435405250, .7032967032967032967032967032967032967033,
1617 .35471990910292899856770532096561510115850, .7013698630136986301369863013698630136986,
1618 .35745588892180374385176833129662554711100, .6994535519125683060109289617486338797814,
1619 .36018440357500774995358483465679455548530, .6975476839237057220708446866485013623978,
1620 .36290549368936841911903457003063522279280, .6956521739130434782608695652173913043478,
1621 .36561919956096466943762379742111079394830, .6937669376693766937669376693766937669377,
1622 .36832556115870762614150635272380895912650, .6918918918918918918918918918918918918919,
1623 .37102461812787262962487488948681857436900, .6900269541778975741239892183288409703504,
1624 .37371640979358405898480555151763837784530, .6881720430107526881720430107526881720430,
1625 .37640097516425302659470730759494472295050, .6863270777479892761394101876675603217158,
1626 .37907835293496944251145919224654790014030, .6844919786096256684491978609625668449198,
1627 .38174858149084833769393299007788300514230, .6826666666666666666666666666666666666667,
1628 .38441169891033200034513583887019194662580, .6808510638297872340425531914893617021277,
1629 .38706774296844825844488013899535872042180, .6790450928381962864721485411140583554377,
1630 .38971675114002518602873692543653305619950, .6772486772486772486772486772486772486772,
1631 .39235876060286384303665840889152605086580, .6754617414248021108179419525065963060686,
1632 .39499380824086893770896722344332374632350, .6736842105263157894736842105263157894737,
1633 .39762193064713846624158577469643205404280, .6719160104986876640419947506561679790026,
1634 .40024316412701266276741307592601515352730, .6701570680628272251308900523560209424084,
1635 .40285754470108348090917615991202183067800, .6684073107049608355091383812010443864230,
1636 .40546510810816432934799991016916465014230, .6666666666666666666666666666666666666667,
1637 .40806588980822172674223224930756259709600, .6649350649350649350649350649350649350649,
1638 .41065992498526837639616360320360399782650, .6632124352331606217616580310880829015544,
1639 .41324724855021932601317757871584035456180, .6614987080103359173126614987080103359173,
1640 .41582789514371093497757669865677598863850, .6597938144329896907216494845360824742268,
1641 .41840189913888381489925905043492093682300, .6580976863753213367609254498714652956298,
1642 .42096929464412963239894338585145305842150, .6564102564102564102564102564102564102564,
1643 .42353011550580327293502591601281892508280, .6547314578005115089514066496163682864450,
1644 .42608439531090003260516141381231136620050, .6530612244897959183673469387755102040816,
1645 .42863216738969872610098832410585600882780, .6513994910941475826972010178117048346056,
1646 .43117346481837132143866142541810404509300, .6497461928934010152284263959390862944162,
1647 .43370832042155937902094819946796633303180, .6481012658227848101265822784810126582278,
1648 .43623676677491801667585491486534010618930, .6464646464646464646464646464646464646465,
1649 .43875883620762790027214350629947148263450, .6448362720403022670025188916876574307305,
1650 .44127456080487520440058801796112675219780, .6432160804020100502512562814070351758794,
1651 .44378397241030093089975139264424797147500, .6416040100250626566416040100250626566416,
1652 .44628710262841947420398014401143882423650, .6400000000000000000000000000000000000000,
1653 .44878398282700665555822183705458883196130, .6384039900249376558603491271820448877805,
1654 .45127464413945855836729492693848442286250, .6368159203980099502487562189054726368159,
1655 .45375911746712049854579618113348260521900, .6352357320099255583126550868486352357320,
1656 .45623743348158757315857769754074979573500, .6336633663366336633663366336633663366337,
1657 .45870962262697662081833982483658473938700, .6320987654320987654320987654320987654321,
1658 .46117571512217014895185229761409573256980, .6305418719211822660098522167487684729064,
1659 .46363574096303250549055974261136725544930, .6289926289926289926289926289926289926290,
1660 .46608972992459918316399125615134835243230, .6274509803921568627450980392156862745098,
1661 .46853771156323925639597405279346276074650, .6259168704156479217603911980440097799511,
1662 .47097971521879100631480241645476780831830, .6243902439024390243902439024390243902439,
1663 .47341577001667212165614273544633761048330, .6228710462287104622871046228710462287105,
1664 .47584590486996386493601107758877333253630, .6213592233009708737864077669902912621359,
1665 .47827014848147025860569669930555392056700, .6198547215496368038740920096852300242131,
1666 .48068852934575190261057286988943815231330, .6183574879227053140096618357487922705314,
1667 .48310107575113581113157579238759353756900, .6168674698795180722891566265060240963855,
1668 .48550781578170076890899053978500887751580, .6153846153846153846153846153846153846154,
1669 .48790877731923892879351001283794175833480, .6139088729016786570743405275779376498801,
1670 .49030398804519381705802061333088204264650, .6124401913875598086124401913875598086124,
1671 .49269347544257524607047571407747454941280, .6109785202863961813842482100238663484487,
1672 .49507726679785146739476431321236304938800, .6095238095238095238095238095238095238095,
1673 .49745538920281889838648226032091770321130, .6080760095011876484560570071258907363420,
1674 .49982786955644931126130359189119189977650, .6066350710900473933649289099526066350711,
1675 .50219473456671548383667413872899487614650, .6052009456264775413711583924349881796690,
1676 .50455601075239520092452494282042607665050, .6037735849056603773584905660377358490566,
1677 .50691172444485432801997148999362252652650, .6023529411764705882352941176470588235294,
1678 .50926190178980790257412536448100581765150, .6009389671361502347417840375586854460094,
1679 .51160656874906207391973111953120678663250, .5995316159250585480093676814988290398126,
1680 .51394575110223428282552049495279788970950, .5981308411214953271028037383177570093458,
1681 .51627947444845445623684554448118433356300, .5967365967365967365967365967365967365967,
1682 .51860776420804555186805373523384332656850, .5953488372093023255813953488372093023256,
1683 .52093064562418522900344441950437612831600, .5939675174013921113689095127610208816705,
1684 .52324814376454775732838697877014055848100, .5925925925925925925925925925925925925926,
1685 .52556028352292727401362526507000438869000, .5912240184757505773672055427251732101617,
1686 .52786708962084227803046587723656557500350, .5898617511520737327188940092165898617512,
1687 .53016858660912158374145519701414741575700, .5885057471264367816091954022988505747126,
1688 .53246479886947173376654518506256863474850, .5871559633027522935779816513761467889908,
1689 .53475575061602764748158733709715306758900, .5858123569794050343249427917620137299771,
1690 .53704146589688361856929077475797384977350, .5844748858447488584474885844748858447489,
1691 .53932196859560876944783558428753167390800, .5831435079726651480637813211845102505695,
1692 .54159728243274429804188230264117009937750, .5818181818181818181818181818181818181818,
1693 .54386743096728351609669971367111429572100, .5804988662131519274376417233560090702948,
1694 .54613243759813556721383065450936555862450, .5791855203619909502262443438914027149321,
1695 .54839232556557315767520321969641372561450, .5778781038374717832957110609480812641084,
1696 .55064711795266219063194057525834068655950, .5765765765765765765765765765765765765766,
1697 .55289683768667763352766542084282264113450, .5752808988764044943820224719101123595506,
1698 .55514150754050151093110798683483153581600, .5739910313901345291479820627802690582960,
1699 .55738115013400635344709144192165695130850, .5727069351230425055928411633109619686801,
1700 .55961578793542265941596269840374588966350, .5714285714285714285714285714285714285714,
1701 .56184544326269181269140062795486301183700, .5701559020044543429844097995545657015590,
1702 .56407013828480290218436721261241473257550, .5688888888888888888888888888888888888889,
1703 .56628989502311577464155334382667206227800, .5676274944567627494456762749445676274945,
1704 .56850473535266865532378233183408156037350, .5663716814159292035398230088495575221239,
1705 .57071468100347144680739575051120482385150, .5651214128035320088300220750551876379691,
1706 .57291975356178548306473885531886480748650, .5638766519823788546255506607929515418502,
1707 .57511997447138785144460371157038025558000, .5626373626373626373626373626373626373626,
1708 .57731536503482350219940144597785547375700, .5614035087719298245614035087719298245614,
1709 .57950594641464214795689713355386629700650, .5601750547045951859956236323851203501094,
1710 .58169173963462239562716149521293118596100, .5589519650655021834061135371179039301310,
1711 .58387276558098266665552955601015128195300, .5577342047930283224400871459694989106754,
1712 .58604904500357812846544902640744112432000, .5565217391304347826086956521739130434783,
1713 .58822059851708596855957011939608491957200, .5553145336225596529284164859002169197397,
1714 .59038744660217634674381770309992134571100, .5541125541125541125541125541125541125541,
1715 .59254960960667157898740242671919986605650, .5529157667386609071274298056155507559395,
1716 .59470710774669277576265358220553025603300, .5517241379310344827586206896551724137931,
1717 .59685996110779382384237123915227130055450, .5505376344086021505376344086021505376344,
1718 .59900818964608337768851242799428291618800, .5493562231759656652360515021459227467811,
1719 .60115181318933474940990890900138765573500, .5481798715203426124197002141327623126338,
1720 .60329085143808425240052883964381180703650, .5470085470085470085470085470085470085470,
1721 .60542532396671688843525771517306566238400, .5458422174840085287846481876332622601279,
1722 .60755525022454170969155029524699784815300, .5446808510638297872340425531914893617021,
1723 .60968064953685519036241657886421307921400, .5435244161358811040339702760084925690021,
1724 .61180154110599282990534675263916142284850, .5423728813559322033898305084745762711864,
1725 .61391794401237043121710712512140162289150, .5412262156448202959830866807610993657505,
1726 .61602987721551394351138242200249806046500, .5400843881856540084388185654008438818565,
1727 .61813735955507864705538167982012964785100, .5389473684210526315789473684210526315789,
1728 .62024040975185745772080281312810257077200, .5378151260504201680672268907563025210084,
1729 .62233904640877868441606324267922900617100, .5366876310272536687631027253668763102725,
1730 .62443328801189346144440150965237990021700, .5355648535564853556485355648535564853556,
1731 .62652315293135274476554741340805776417250, .5344467640918580375782881002087682672234,
1732 .62860865942237409420556559780379757285100, .5333333333333333333333333333333333333333,
1733 .63068982562619868570408243613201193511500, .5322245322245322245322245322245322245322,
1734 .63276666957103777644277897707070223987100, .5311203319502074688796680497925311203320,
1735 .63483920917301017716738442686619237065300, .5300207039337474120082815734989648033126,
1736 .63690746223706917739093569252872839570050, .5289256198347107438016528925619834710744,
1737 .63897144645792069983514238629140891134750, .5278350515463917525773195876288659793814,
1738 .64103117942093124081992527862894348800200, .5267489711934156378600823045267489711934,
1739 .64308667860302726193566513757104985415950, .5256673511293634496919917864476386036961,
1740 .64513796137358470073053240412264131009600, .5245901639344262295081967213114754098361,
1741 .64718504499530948859131740391603671014300, .5235173824130879345603271983640081799591,
1742 .64922794662510974195157587018911726772800, .5224489795918367346938775510204081632653,
1743 .65126668331495807251485530287027359008800, .5213849287169042769857433808553971486762,
1744 .65330127201274557080523663898929953575150, .5203252032520325203252032520325203252033,
1745 .65533172956312757406749369692988693714150, .5192697768762677484787018255578093306288,
1746 .65735807270835999727154330685152672231200, .5182186234817813765182186234817813765182,
1747 .65938031808912778153342060249997302889800, .5171717171717171717171717171717171717172,
1748 .66139848224536490484126716182800009846700, .5161290322580645161290322580645161290323,
1749 .66341258161706617713093692145776003599150, .5150905432595573440643863179074446680080,
1750 .66542263254509037562201001492212526500250, .5140562248995983935742971887550200803213,
1751 .66742865127195616370414654738851822912700, .5130260521042084168336673346693386773547,
1752 .66943065394262923906154583164607174694550, .5120000000000000000000000000000000000000,
1753 .67142865660530226534774556057527661323550, .5109780439121756487025948103792415169661,
1754 .67342267521216669923234121597488410770900, .5099601593625498007968127490039840637450,
1755 .67541272562017662384192817626171745359900, .5089463220675944333996023856858846918489,
1756 .67739882359180603188519853574689477682100, .5079365079365079365079365079365079365079,
1757 .67938098479579733801614338517538271844400, .5069306930693069306930693069306930693069,
1758 .68135922480790300781450241629499942064300, .5059288537549407114624505928853754940711,
1759 .68333355911162063645036823800182901322850, .5049309664694280078895463510848126232742,
1760 .68530400309891936760919861626462079584600, .5039370078740157480314960629921259842520,
1761 .68727057207096020619019327568821609020250, .5029469548133595284872298624754420432220,
1762 .68923328123880889251040571252815425395950, .5019607843137254901960784313725490196078,
1763 .69314718055994530941723212145818, 5.0e-01,
1768 #define LOGTAB_TRANSLATE(x,h) (((x) - 1.)*icvLogTab[(h)+1])
1769 static const double ln_2 = 0.69314718055994530941723212145818;
1771 static void Log_32f( const float *_x, float *y, int n )
1773 static const float shift[] = { 0, -1.f/512 };
1775 A0 = 0.3333333333333333333333333f,
1780 #define LOGPOLY(x) (((A0*(x) + A1)*(x) + A2)*(x))
1784 const int* x = (const int*)_x;
1789 static const __m128d ln2_2 = _mm_set1_pd(ln_2);
1790 static const __m128 _1_4 = _mm_set1_ps(1.f);
1791 static const __m128 shift4 = _mm_set1_ps(-1.f/512);
1793 static const __m128 mA0 = _mm_set1_ps(A0);
1794 static const __m128 mA1 = _mm_set1_ps(A1);
1795 static const __m128 mA2 = _mm_set1_ps(A2);
1797 int CV_DECL_ALIGNED(16) idx[4];
1799 for( ; i <= n - 4; i += 4 )
1801 __m128i h0 = _mm_loadu_si128((const __m128i*)(x + i));
1802 __m128i yi0 = _mm_sub_epi32(_mm_and_si128(_mm_srli_epi32(h0, 23), _mm_set1_epi32(255)), _mm_set1_epi32(127));
1803 __m128d yd0 = _mm_mul_pd(_mm_cvtepi32_pd(yi0), ln2_2);
1804 __m128d yd1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_unpackhi_epi64(yi0,yi0)), ln2_2);
1806 __m128i xi0 = _mm_or_si128(_mm_and_si128(h0, _mm_set1_epi32(LOGTAB_MASK2_32F)), _mm_set1_epi32(127 << 23));
1808 h0 = _mm_and_si128(_mm_srli_epi32(h0, 23 - LOGTAB_SCALE - 1), _mm_set1_epi32(LOGTAB_MASK*2));
1809 _mm_store_si128((__m128i*)idx, h0);
1810 h0 = _mm_cmpeq_epi32(h0, _mm_set1_epi32(510));
1812 __m128d t0, t1, t2, t3, t4;
1813 t0 = _mm_load_pd(icvLogTab + idx[0]);
1814 t2 = _mm_load_pd(icvLogTab + idx[1]);
1815 t1 = _mm_unpackhi_pd(t0, t2);
1816 t0 = _mm_unpacklo_pd(t0, t2);
1817 t2 = _mm_load_pd(icvLogTab + idx[2]);
1818 t4 = _mm_load_pd(icvLogTab + idx[3]);
1819 t3 = _mm_unpackhi_pd(t2, t4);
1820 t2 = _mm_unpacklo_pd(t2, t4);
1822 yd0 = _mm_add_pd(yd0, t0);
1823 yd1 = _mm_add_pd(yd1, t2);
1825 __m128 yf0 = _mm_movelh_ps(_mm_cvtpd_ps(yd0), _mm_cvtpd_ps(yd1));
1827 __m128 xf0 = _mm_sub_ps(_mm_castsi128_ps(xi0), _1_4);
1828 xf0 = _mm_mul_ps(xf0, _mm_movelh_ps(_mm_cvtpd_ps(t1), _mm_cvtpd_ps(t3)));
1829 xf0 = _mm_add_ps(xf0, _mm_and_ps(_mm_castsi128_ps(h0), shift4));
1831 __m128 zf0 = _mm_mul_ps(xf0, mA0);
1832 zf0 = _mm_mul_ps(_mm_add_ps(zf0, mA1), xf0);
1833 zf0 = _mm_mul_ps(_mm_add_ps(zf0, mA2), xf0);
1834 yf0 = _mm_add_ps(yf0, zf0);
1836 _mm_storeu_ps(y + i, yf0);
1841 for( ; i <= n - 4; i += 4 )
1843 double x0, x1, x2, x3;
1844 double y0, y1, y2, y3;
1849 buf[0].i = (h0 & LOGTAB_MASK2_32F) | (127 << 23);
1850 buf[1].i = (h1 & LOGTAB_MASK2_32F) | (127 << 23);
1852 y0 = (((h0 >> 23) & 0xff) - 127) * ln_2;
1853 y1 = (((h1 >> 23) & 0xff) - 127) * ln_2;
1855 h0 = (h0 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1856 h1 = (h1 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1858 y0 += icvLogTab[h0];
1859 y1 += icvLogTab[h1];
1864 x0 = LOGTAB_TRANSLATE( buf[0].f, h0 );
1865 x1 = LOGTAB_TRANSLATE( buf[1].f, h1 );
1867 buf[2].i = (h2 & LOGTAB_MASK2_32F) | (127 << 23);
1868 buf[3].i = (h3 & LOGTAB_MASK2_32F) | (127 << 23);
1870 y2 = (((h2 >> 23) & 0xff) - 127) * ln_2;
1871 y3 = (((h3 >> 23) & 0xff) - 127) * ln_2;
1873 h2 = (h2 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1874 h3 = (h3 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1876 y2 += icvLogTab[h2];
1877 y3 += icvLogTab[h3];
1879 x2 = LOGTAB_TRANSLATE( buf[2].f, h2 );
1880 x3 = LOGTAB_TRANSLATE( buf[3].f, h3 );
1882 x0 += shift[h0 == 510];
1883 x1 += shift[h1 == 510];
1884 y0 += LOGPOLY( x0 );
1885 y1 += LOGPOLY( x1 );
1888 y[i + 1] = (float) y1;
1890 x2 += shift[h2 == 510];
1891 x3 += shift[h3 == 510];
1892 y2 += LOGPOLY( x2 );
1893 y3 += LOGPOLY( x3 );
1895 y[i + 2] = (float) y2;
1896 y[i + 3] = (float) y3;
1905 y0 = (((h0 >> 23) & 0xff) - 127) * ln_2;
1907 buf[0].i = (h0 & LOGTAB_MASK2_32F) | (127 << 23);
1908 h0 = (h0 >> (23 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
1910 y0 += icvLogTab[h0];
1911 x0 = (float)LOGTAB_TRANSLATE( buf[0].f, h0 );
1912 x0 += shift[h0 == 510];
1913 y0 += LOGPOLY( x0 );
1920 static void Log_64f( const double *x, double *y, int n )
1922 static const double shift[] = { 0, -1./512 };
1926 A5 = 0.333333333333333314829616256247390992939472198486328125,
1929 A2 = -0.1666666666666666574148081281236954964697360992431640625,
1930 A1 = 0.1428571428571428769682682968777953647077083587646484375,
1934 #define LOGPOLY(x,k) ((x)+=shift[k], xq = (x)*(x),\
1935 (((A0*xq + A2)*xq + A4)*xq + A6)*xq + \
1936 (((A1*xq + A3)*xq + A5)*xq + A7)*(x))
1940 DBLINT *X = (DBLINT *) x;
1945 static const __m128d ln2_2 = _mm_set1_pd(ln_2);
1946 static const __m128d _1_2 = _mm_set1_pd(1.);
1947 static const __m128d shift2 = _mm_set1_pd(-1./512);
1949 static const __m128i log_and_mask2 = _mm_set_epi32(LOGTAB_MASK2, 0xffffffff, LOGTAB_MASK2, 0xffffffff);
1950 static const __m128i log_or_mask2 = _mm_set_epi32(1023 << 20, 0, 1023 << 20, 0);
1952 static const __m128d mA0 = _mm_set1_pd(A0);
1953 static const __m128d mA1 = _mm_set1_pd(A1);
1954 static const __m128d mA2 = _mm_set1_pd(A2);
1955 static const __m128d mA3 = _mm_set1_pd(A3);
1956 static const __m128d mA4 = _mm_set1_pd(A4);
1957 static const __m128d mA5 = _mm_set1_pd(A5);
1958 static const __m128d mA6 = _mm_set1_pd(A6);
1959 static const __m128d mA7 = _mm_set1_pd(A7);
1961 int CV_DECL_ALIGNED(16) idx[4];
1963 for( ; i <= n - 4; i += 4 )
1965 __m128i h0 = _mm_loadu_si128((const __m128i*)(x + i));
1966 __m128i h1 = _mm_loadu_si128((const __m128i*)(x + i + 2));
1968 __m128d xd0 = _mm_castsi128_pd(_mm_or_si128(_mm_and_si128(h0, log_and_mask2), log_or_mask2));
1969 __m128d xd1 = _mm_castsi128_pd(_mm_or_si128(_mm_and_si128(h1, log_and_mask2), log_or_mask2));
1971 h0 = _mm_unpackhi_epi32(_mm_unpacklo_epi32(h0, h1), _mm_unpackhi_epi32(h0, h1));
1973 __m128i yi0 = _mm_sub_epi32(_mm_and_si128(_mm_srli_epi32(h0, 20),
1974 _mm_set1_epi32(2047)), _mm_set1_epi32(1023));
1975 __m128d yd0 = _mm_mul_pd(_mm_cvtepi32_pd(yi0), ln2_2);
1976 __m128d yd1 = _mm_mul_pd(_mm_cvtepi32_pd(_mm_unpackhi_epi64(yi0, yi0)), ln2_2);
1978 h0 = _mm_and_si128(_mm_srli_epi32(h0, 20 - LOGTAB_SCALE - 1), _mm_set1_epi32(LOGTAB_MASK * 2));
1979 _mm_store_si128((__m128i*)idx, h0);
1980 h0 = _mm_cmpeq_epi32(h0, _mm_set1_epi32(510));
1982 __m128d t0, t1, t2, t3, t4;
1983 t0 = _mm_load_pd(icvLogTab + idx[0]);
1984 t2 = _mm_load_pd(icvLogTab + idx[1]);
1985 t1 = _mm_unpackhi_pd(t0, t2);
1986 t0 = _mm_unpacklo_pd(t0, t2);
1987 t2 = _mm_load_pd(icvLogTab + idx[2]);
1988 t4 = _mm_load_pd(icvLogTab + idx[3]);
1989 t3 = _mm_unpackhi_pd(t2, t4);
1990 t2 = _mm_unpacklo_pd(t2, t4);
1992 yd0 = _mm_add_pd(yd0, t0);
1993 yd1 = _mm_add_pd(yd1, t2);
1995 xd0 = _mm_mul_pd(_mm_sub_pd(xd0, _1_2), t1);
1996 xd1 = _mm_mul_pd(_mm_sub_pd(xd1, _1_2), t3);
1998 xd0 = _mm_add_pd(xd0, _mm_and_pd(_mm_castsi128_pd(_mm_unpacklo_epi32(h0, h0)), shift2));
1999 xd1 = _mm_add_pd(xd1, _mm_and_pd(_mm_castsi128_pd(_mm_unpackhi_epi32(h0, h0)), shift2));
2001 __m128d zd0 = _mm_mul_pd(xd0, mA0);
2002 __m128d zd1 = _mm_mul_pd(xd1, mA0);
2003 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA1), xd0);
2004 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA1), xd1);
2005 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA2), xd0);
2006 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA2), xd1);
2007 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA3), xd0);
2008 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA3), xd1);
2009 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA4), xd0);
2010 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA4), xd1);
2011 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA5), xd0);
2012 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA5), xd1);
2013 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA6), xd0);
2014 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA6), xd1);
2015 zd0 = _mm_mul_pd(_mm_add_pd(zd0, mA7), xd0);
2016 zd1 = _mm_mul_pd(_mm_add_pd(zd1, mA7), xd1);
2018 yd0 = _mm_add_pd(yd0, zd0);
2019 yd1 = _mm_add_pd(yd1, zd1);
2021 _mm_storeu_pd(y + i, yd0);
2022 _mm_storeu_pd(y + i + 2, yd1);
2027 for( ; i <= n - 4; i += 4 )
2030 double x0, x1, x2, x3;
2031 double y0, y1, y2, y3;
2041 buf[0].i.hi = (h0 & LOGTAB_MASK2) | (1023 << 20);
2042 buf[1].i.hi = (h1 & LOGTAB_MASK2) | (1023 << 20);
2044 y0 = (((h0 >> 20) & 0x7ff) - 1023) * ln_2;
2045 y1 = (((h1 >> 20) & 0x7ff) - 1023) * ln_2;
2052 h0 = (h0 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
2053 h1 = (h1 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
2055 y0 += icvLogTab[h0];
2056 y1 += icvLogTab[h1];
2061 x0 = LOGTAB_TRANSLATE( buf[0].d, h0 );
2062 x1 = LOGTAB_TRANSLATE( buf[1].d, h1 );
2064 buf[2].i.hi = (h2 & LOGTAB_MASK2) | (1023 << 20);
2065 buf[3].i.hi = (h3 & LOGTAB_MASK2) | (1023 << 20);
2067 y2 = (((h2 >> 20) & 0x7ff) - 1023) * ln_2;
2068 y3 = (((h3 >> 20) & 0x7ff) - 1023) * ln_2;
2070 h2 = (h2 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
2071 h3 = (h3 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
2073 y2 += icvLogTab[h2];
2074 y3 += icvLogTab[h3];
2076 x2 = LOGTAB_TRANSLATE( buf[2].d, h2 );
2077 x3 = LOGTAB_TRANSLATE( buf[3].d, h3 );
2079 y0 += LOGPOLY( x0, h0 == 510 );
2080 y1 += LOGPOLY( x1, h1 == 510 );
2085 y2 += LOGPOLY( x2, h2 == 510 );
2086 y3 += LOGPOLY( x3, h3 == 510 );
2096 double x0, y0 = (((h0 >> 20) & 0x7ff) - 1023) * ln_2;
2098 buf[0].i.hi = (h0 & LOGTAB_MASK2) | (1023 << 20);
2099 buf[0].i.lo = X[i].i.lo;
2100 h0 = (h0 >> (20 - LOGTAB_SCALE - 1)) & LOGTAB_MASK * 2;
2102 y0 += icvLogTab[h0];
2103 x0 = LOGTAB_TRANSLATE( buf[0].d, h0 );
2104 y0 += LOGPOLY( x0, h0 == 510 );
2110 static void Log_32f_ipp(const float *x, float *y, int n)
2114 if (0 <= ippsLn_32f_A21(x, y, n))
2116 CV_IMPL_ADD(CV_IMPL_IPP);
2119 setIppErrorStatus();
2124 static void Log_64f_ipp(const double *x, double *y, int n)
2128 if (0 <= ippsLn_64f_A50(x, y, n))
2130 CV_IMPL_ADD(CV_IMPL_IPP);
2133 setIppErrorStatus();
2138 #define Log_32f Log_32f_ipp
2139 #define Log_64f Log_64f_ipp
2142 void log( InputArray _src, OutputArray _dst )
2144 int type = _src.type(), depth = _src.depth(), cn = _src.channels();
2145 CV_Assert( depth == CV_32F || depth == CV_64F );
2147 CV_OCL_RUN( _dst.isUMat() && _src.dims() <= 2,
2148 ocl_math_op(_src, noArray(), _dst, OCL_OP_LOG))
2150 Mat src = _src.getMat();
2151 _dst.create( src.dims, src.size, type );
2152 Mat dst = _dst.getMat();
2154 const Mat* arrays[] = {&src, &dst, 0};
2156 NAryMatIterator it(arrays, ptrs);
2157 int len = (int)(it.size*cn);
2159 for( size_t i = 0; i < it.nplanes; i++, ++it )
2161 if( depth == CV_32F )
2162 Log_32f( (const float*)ptrs[0], (float*)ptrs[1], len );
2164 Log_64f( (const double*)ptrs[0], (double*)ptrs[1], len );
2168 /****************************************************************************************\
2170 \****************************************************************************************/
2172 template <typename T, typename WT>
2175 int operator() ( const T *, T *, int, int)
2184 struct iPow_SIMD<uchar, int>
2186 int operator() ( const uchar * src, uchar * dst, int len, int power)
2189 uint32x4_t v_1 = vdupq_n_u32(1u);
2191 for ( ; i <= len - 8; i += 8)
2193 uint32x4_t v_a1 = v_1, v_a2 = v_1;
2194 uint16x8_t v_src = vmovl_u8(vld1_u8(src + i));
2195 uint32x4_t v_b1 = vmovl_u16(vget_low_u16(v_src)), v_b2 = vmovl_u16(vget_high_u16(v_src));
2202 v_a1 = vmulq_u32(v_a1, v_b1);
2203 v_a2 = vmulq_u32(v_a2, v_b2);
2205 v_b1 = vmulq_u32(v_b1, v_b1);
2206 v_b2 = vmulq_u32(v_b2, v_b2);
2210 v_a1 = vmulq_u32(v_a1, v_b1);
2211 v_a2 = vmulq_u32(v_a2, v_b2);
2212 vst1_u8(dst + i, vqmovn_u16(vcombine_u16(vqmovn_u32(v_a1), vqmovn_u32(v_a2))));
2220 struct iPow_SIMD<schar, int>
2222 int operator() ( const schar * src, schar * dst, int len, int power)
2225 int32x4_t v_1 = vdupq_n_s32(1);
2227 for ( ; i <= len - 8; i += 8)
2229 int32x4_t v_a1 = v_1, v_a2 = v_1;
2230 int16x8_t v_src = vmovl_s8(vld1_s8(src + i));
2231 int32x4_t v_b1 = vmovl_s16(vget_low_s16(v_src)), v_b2 = vmovl_s16(vget_high_s16(v_src));
2238 v_a1 = vmulq_s32(v_a1, v_b1);
2239 v_a2 = vmulq_s32(v_a2, v_b2);
2241 v_b1 = vmulq_s32(v_b1, v_b1);
2242 v_b2 = vmulq_s32(v_b2, v_b2);
2246 v_a1 = vmulq_s32(v_a1, v_b1);
2247 v_a2 = vmulq_s32(v_a2, v_b2);
2248 vst1_s8(dst + i, vqmovn_s16(vcombine_s16(vqmovn_s32(v_a1), vqmovn_s32(v_a2))));
2256 struct iPow_SIMD<ushort, int>
2258 int operator() ( const ushort * src, ushort * dst, int len, int power)
2261 uint32x4_t v_1 = vdupq_n_u32(1u);
2263 for ( ; i <= len - 8; i += 8)
2265 uint32x4_t v_a1 = v_1, v_a2 = v_1;
2266 uint16x8_t v_src = vld1q_u16(src + i);
2267 uint32x4_t v_b1 = vmovl_u16(vget_low_u16(v_src)), v_b2 = vmovl_u16(vget_high_u16(v_src));
2274 v_a1 = vmulq_u32(v_a1, v_b1);
2275 v_a2 = vmulq_u32(v_a2, v_b2);
2277 v_b1 = vmulq_u32(v_b1, v_b1);
2278 v_b2 = vmulq_u32(v_b2, v_b2);
2282 v_a1 = vmulq_u32(v_a1, v_b1);
2283 v_a2 = vmulq_u32(v_a2, v_b2);
2284 vst1q_u16(dst + i, vcombine_u16(vqmovn_u32(v_a1), vqmovn_u32(v_a2)));
2292 struct iPow_SIMD<short, int>
2294 int operator() ( const short * src, short * dst, int len, int power)
2297 int32x4_t v_1 = vdupq_n_s32(1);
2299 for ( ; i <= len - 8; i += 8)
2301 int32x4_t v_a1 = v_1, v_a2 = v_1;
2302 int16x8_t v_src = vld1q_s16(src + i);
2303 int32x4_t v_b1 = vmovl_s16(vget_low_s16(v_src)), v_b2 = vmovl_s16(vget_high_s16(v_src));
2310 v_a1 = vmulq_s32(v_a1, v_b1);
2311 v_a2 = vmulq_s32(v_a2, v_b2);
2313 v_b1 = vmulq_s32(v_b1, v_b1);
2314 v_b2 = vmulq_s32(v_b2, v_b2);
2318 v_a1 = vmulq_s32(v_a1, v_b1);
2319 v_a2 = vmulq_s32(v_a2, v_b2);
2320 vst1q_s16(dst + i, vcombine_s16(vqmovn_s32(v_a1), vqmovn_s32(v_a2)));
2329 struct iPow_SIMD<int, int>
2331 int operator() ( const int * src, int * dst, int len, int power)
2334 int32x4_t v_1 = vdupq_n_s32(1);
2336 for ( ; i <= len - 4; i += 4)
2338 int32x4_t v_b = vld1q_s32(src + i), v_a = v_1;
2344 v_a = vmulq_s32(v_a, v_b);
2345 v_b = vmulq_s32(v_b, v_b);
2349 v_a = vmulq_s32(v_a, v_b);
2350 vst1q_s32(dst + i, v_a);
2358 struct iPow_SIMD<float, float>
2360 int operator() ( const float * src, float * dst, int len, int power)
2363 float32x4_t v_1 = vdupq_n_f32(1.0f);
2365 for ( ; i <= len - 4; i += 4)
2367 float32x4_t v_b = vld1q_f32(src + i), v_a = v_1;
2373 v_a = vmulq_f32(v_a, v_b);
2374 v_b = vmulq_f32(v_b, v_b);
2378 v_a = vmulq_f32(v_a, v_b);
2379 vst1q_f32(dst + i, v_a);
2388 template<typename T, typename WT>
2390 iPow_( const T* src, T* dst, int len, int power )
2392 iPow_SIMD<T, WT> vop;
2393 int i = vop(src, dst, len, power);
2395 for( ; i < len; i++ )
2397 WT a = 1, b = src[i];
2408 dst[i] = saturate_cast<T>(a);
2413 static void iPow8u(const uchar* src, uchar* dst, int len, int power)
2415 iPow_<uchar, int>(src, dst, len, power);
2418 static void iPow8s(const schar* src, schar* dst, int len, int power)
2420 iPow_<schar, int>(src, dst, len, power);
2423 static void iPow16u(const ushort* src, ushort* dst, int len, int power)
2425 iPow_<ushort, int>(src, dst, len, power);
2428 static void iPow16s(const short* src, short* dst, int len, int power)
2430 iPow_<short, int>(src, dst, len, power);
2433 static void iPow32s(const int* src, int* dst, int len, int power)
2435 iPow_<int, int>(src, dst, len, power);
2438 static void iPow32f(const float* src, float* dst, int len, int power)
2440 iPow_<float, float>(src, dst, len, power);
2443 static void iPow64f(const double* src, double* dst, int len, int power)
2445 iPow_<double, double>(src, dst, len, power);
2449 typedef void (*IPowFunc)( const uchar* src, uchar* dst, int len, int power );
2451 static IPowFunc ipowTab[] =
2453 (IPowFunc)iPow8u, (IPowFunc)iPow8s, (IPowFunc)iPow16u, (IPowFunc)iPow16s,
2454 (IPowFunc)iPow32s, (IPowFunc)iPow32f, (IPowFunc)iPow64f, 0
2459 static bool ocl_pow(InputArray _src, double power, OutputArray _dst,
2460 bool is_ipower, int ipower)
2462 const ocl::Device & d = ocl::Device::getDefault();
2463 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type),
2464 rowsPerWI = d.isIntel() ? 4 : 1;
2465 bool doubleSupport = d.doubleFPConfig() > 0;
2467 _dst.createSameSize(_src, type);
2468 if (is_ipower && (ipower == 0 || ipower == 1))
2471 _dst.setTo(Scalar::all(1));
2472 else if (ipower == 1)
2478 if (depth == CV_64F && !doubleSupport)
2481 bool issqrt = std::abs(power - 0.5) < DBL_EPSILON;
2482 const char * const op = issqrt ? "OP_SQRT" : is_ipower ? "OP_POWN" : "OP_POW";
2484 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
2485 format("-D dstT=%s -D depth=%d -D rowsPerWI=%d -D %s -D UNARY_OP%s",
2486 ocl::typeToStr(depth), depth, rowsPerWI, op,
2487 doubleSupport ? " -D DOUBLE_SUPPORT" : ""));
2491 UMat src = _src.getUMat();
2492 _dst.create(src.size(), type);
2493 UMat dst = _dst.getUMat();
2495 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
2496 dstarg = ocl::KernelArg::WriteOnly(dst, cn);
2499 k.args(srcarg, dstarg);
2501 k.args(srcarg, dstarg, ipower);
2504 if (depth == CV_32F)
2505 k.args(srcarg, dstarg, (float)power);
2507 k.args(srcarg, dstarg, power);
2510 size_t globalsize[2] = { dst.cols * cn, (dst.rows + rowsPerWI - 1) / rowsPerWI };
2511 return k.run(2, globalsize, NULL, false);
2516 void pow( InputArray _src, double power, OutputArray _dst )
2518 int type = _src.type(), depth = CV_MAT_DEPTH(type),
2519 cn = CV_MAT_CN(type), ipower = cvRound(power);
2520 bool is_ipower = fabs(ipower - power) < DBL_EPSILON, same = false,
2521 useOpenCL = _dst.isUMat() && _src.dims() <= 2;
2523 if( is_ipower && !(ocl::Device::getDefault().isIntel() && useOpenCL && depth != CV_64F))
2527 divide( Scalar::all(1), _src, _dst );
2537 _dst.createSameSize(_src, type);
2538 _dst.setTo(Scalar::all(1));
2544 #if defined(HAVE_IPP)
2547 if (depth == CV_32F && !same && ( (_src.dims() <= 2 && !ocl::useOpenCL()) ||
2548 (_src.dims() > 2 && _src.isContinuous() && _dst.isContinuous()) ))
2550 Mat src = _src.getMat();
2551 _dst.create( src.dims, src.size, type );
2552 Mat dst = _dst.getMat();
2554 Size size = src.size();
2555 int srcstep = (int)src.step, dststep = (int)dst.step, esz = CV_ELEM_SIZE(type);
2556 if (src.isContinuous() && dst.isContinuous())
2558 size.width = (int)src.total();
2560 srcstep = dststep = (int)src.total() * esz;
2564 IppStatus status = ippiSqr_32f_C1R(src.ptr<Ipp32f>(), srcstep, dst.ptr<Ipp32f>(), dststep, ippiSize(size.width, size.height));
2568 CV_IMPL_ADD(CV_IMPL_IPP);
2571 setIppErrorStatus();
2576 multiply(_dst, _dst, _dst);
2578 multiply(_src, _src, _dst);
2583 CV_Assert( depth == CV_32F || depth == CV_64F );
2585 CV_OCL_RUN(useOpenCL,
2586 ocl_pow(same ? _dst : _src, power, _dst, is_ipower, ipower))
2590 src = dst = _dst.getMat();
2593 src = _src.getMat();
2594 _dst.create( src.dims, src.size, type );
2595 dst = _dst.getMat();
2598 const Mat* arrays[] = {&src, &dst, 0};
2600 NAryMatIterator it(arrays, ptrs);
2601 int len = (int)(it.size*cn);
2605 IPowFunc func = ipowTab[depth];
2606 CV_Assert( func != 0 );
2608 for( size_t i = 0; i < it.nplanes; i++, ++it )
2609 func( ptrs[0], ptrs[1], len, ipower );
2611 else if( fabs(fabs(power) - 0.5) < DBL_EPSILON )
2613 MathFunc func = power < 0 ?
2614 (depth == CV_32F ? (MathFunc)InvSqrt_32f : (MathFunc)InvSqrt_64f) :
2615 (depth == CV_32F ? (MathFunc)Sqrt_32f : (MathFunc)Sqrt_64f);
2617 for( size_t i = 0; i < it.nplanes; i++, ++it )
2618 func( ptrs[0], ptrs[1], len );
2622 #if defined(HAVE_IPP)
2625 if (src.isContinuous() && dst.isContinuous())
2627 IppStatus status = depth == CV_32F ?
2628 ippsPowx_32f_A21(src.ptr<Ipp32f>(), (Ipp32f)power, dst.ptr<Ipp32f>(), (Ipp32s)(src.total() * cn)) :
2629 ippsPowx_64f_A50(src.ptr<Ipp64f>(), power, dst.ptr<Ipp64f>(), (Ipp32s)(src.total() * cn));
2633 CV_IMPL_ADD(CV_IMPL_IPP);
2636 setIppErrorStatus();
2641 int j, k, blockSize = std::min(len, ((BLOCK_SIZE + cn-1)/cn)*cn);
2642 size_t esz1 = src.elemSize1();
2644 for( size_t i = 0; i < it.nplanes; i++, ++it )
2646 for( j = 0; j < len; j += blockSize )
2648 int bsz = std::min(len - j, blockSize);
2649 if( depth == CV_32F )
2651 const float* x = (const float*)ptrs[0];
2652 float* y = (float*)ptrs[1];
2655 for( k = 0; k < bsz; k++ )
2656 y[k] = (float)(y[k]*power);
2661 const double* x = (const double*)ptrs[0];
2662 double* y = (double*)ptrs[1];
2665 for( k = 0; k < bsz; k++ )
2669 ptrs[0] += bsz*esz1;
2670 ptrs[1] += bsz*esz1;
2676 void sqrt(InputArray a, OutputArray b)
2681 /************************** CheckArray for NaN's, Inf's *********************************/
2683 template<int cv_mat_type> struct mat_type_assotiations{};
2685 template<> struct mat_type_assotiations<CV_8U>
2687 typedef unsigned char type;
2688 static const type min_allowable = 0x0;
2689 static const type max_allowable = 0xFF;
2692 template<> struct mat_type_assotiations<CV_8S>
2694 typedef signed char type;
2695 static const type min_allowable = SCHAR_MIN;
2696 static const type max_allowable = SCHAR_MAX;
2699 template<> struct mat_type_assotiations<CV_16U>
2701 typedef unsigned short type;
2702 static const type min_allowable = 0x0;
2703 static const type max_allowable = USHRT_MAX;
2705 template<> struct mat_type_assotiations<CV_16S>
2707 typedef signed short type;
2708 static const type min_allowable = SHRT_MIN;
2709 static const type max_allowable = SHRT_MAX;
2712 template<> struct mat_type_assotiations<CV_32S>
2715 static const type min_allowable = (-INT_MAX - 1);
2716 static const type max_allowable = INT_MAX;
2719 // inclusive maxVal !!!
2721 bool checkIntegerRange(cv::Mat src, Point& bad_pt, int minVal, int maxVal, double& bad_value)
2723 typedef mat_type_assotiations<depth> type_ass;
2725 if (minVal < type_ass::min_allowable && maxVal > type_ass::max_allowable)
2729 else if (minVal > type_ass::max_allowable || maxVal < type_ass::min_allowable || maxVal < minVal)
2731 bad_pt = cv::Point(0,0);
2734 cv::Mat as_one_channel = src.reshape(1,0);
2736 for (int j = 0; j < as_one_channel.rows; ++j)
2737 for (int i = 0; i < as_one_channel.cols; ++i)
2739 if (as_one_channel.at<typename type_ass::type>(j ,i) < minVal || as_one_channel.at<typename type_ass::type>(j ,i) > maxVal)
2742 bad_pt.x = i % src.channels();
2743 bad_value = as_one_channel.at<typename type_ass::type>(j ,i);
2752 typedef bool (*check_range_function)(cv::Mat src, Point& bad_pt, int minVal, int maxVal, double& bad_value);
2754 check_range_function check_range_functions[] =
2756 &checkIntegerRange<CV_8U>,
2757 &checkIntegerRange<CV_8S>,
2758 &checkIntegerRange<CV_16U>,
2759 &checkIntegerRange<CV_16S>,
2760 &checkIntegerRange<CV_32S>
2763 bool checkRange(InputArray _src, bool quiet, Point* pt, double minVal, double maxVal)
2765 Mat src = _src.getMat();
2769 const Mat* arrays[] = {&src, 0};
2771 NAryMatIterator it(arrays, planes);
2773 for ( size_t i = 0; i < it.nplanes; i++, ++it )
2775 if (!checkRange( it.planes[0], quiet, pt, minVal, maxVal ))
2777 // todo: set index properly
2784 int depth = src.depth();
2785 Point badPt(-1, -1);
2786 double badValue = 0;
2791 int minVali = minVal<(-INT_MAX - 1) ? (-INT_MAX - 1) : cvFloor(minVal);
2792 int maxVali = maxVal>INT_MAX ? INT_MAX : cvCeil(maxVal) - 1; // checkIntegerRang() use inclusive maxVal
2794 (check_range_functions[depth])(src, badPt, minVali, maxVali, badValue);
2799 Size size = getContinuousSize( src, src.channels() );
2801 if( depth == CV_32F )
2805 const int* isrc = src.ptr<int>();
2806 size_t step = src.step/sizeof(isrc[0]);
2808 a.f = (float)std::max(minVal, (double)-FLT_MAX);
2809 b.f = (float)std::min(maxVal, (double)FLT_MAX);
2811 ia = CV_TOGGLE_FLT(a.i);
2812 ib = CV_TOGGLE_FLT(b.i);
2814 for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step )
2816 for( i = 0; i < size.width; i++ )
2819 val = CV_TOGGLE_FLT(val);
2821 if( val < ia || val >= ib )
2823 badPt = Point((loc + i) % src.cols, (loc + i) / src.cols);
2824 badValue = ((const float*)isrc)[i];
2834 const int64* isrc = src.ptr<int64>();
2835 size_t step = src.step/sizeof(isrc[0]);
2840 ia = CV_TOGGLE_DBL(a.i);
2841 ib = CV_TOGGLE_DBL(b.i);
2843 for( ; badPt.x < 0 && size.height--; loc += size.width, isrc += step )
2845 for( i = 0; i < size.width; i++ )
2847 int64 val = isrc[i];
2848 val = CV_TOGGLE_DBL(val);
2850 if( val < ia || val >= ib )
2852 badPt = Point((loc + i) % src.cols, (loc + i) / src.cols);
2853 badValue = ((const double*)isrc)[i];
2866 CV_Error_( CV_StsOutOfRange,
2867 ("the value at (%d, %d)=%g is out of range", badPt.x, badPt.y, badValue));
2874 static bool ocl_patchNaNs( InputOutputArray _a, float value )
2876 int rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
2877 ocl::Kernel k("KF", ocl::core::arithm_oclsrc,
2878 format("-D UNARY_OP -D OP_PATCH_NANS -D dstT=float -D rowsPerWI=%d",
2883 UMat a = _a.getUMat();
2884 int cn = a.channels();
2886 k.args(ocl::KernelArg::ReadOnlyNoSize(a),
2887 ocl::KernelArg::WriteOnly(a, cn), (float)value);
2889 size_t globalsize[2] = { a.cols * cn, (a.rows + rowsPerWI - 1) / rowsPerWI };
2890 return k.run(2, globalsize, NULL, false);
2895 void patchNaNs( InputOutputArray _a, double _val )
2897 CV_Assert( _a.depth() == CV_32F );
2899 CV_OCL_RUN(_a.isUMat() && _a.dims() <= 2,
2900 ocl_patchNaNs(_a, (float)_val))
2902 Mat a = _a.getMat();
2903 const Mat* arrays[] = {&a, 0};
2905 NAryMatIterator it(arrays, (uchar**)ptrs);
2906 size_t len = it.size*a.channels();
2908 val.f = (float)_val;
2911 __m128i v_mask1 = _mm_set1_epi32(0x7fffffff), v_mask2 = _mm_set1_epi32(0x7f800000);
2912 __m128i v_val = _mm_set1_epi32(val.i);
2914 int32x4_t v_mask1 = vdupq_n_s32(0x7fffffff), v_mask2 = vdupq_n_s32(0x7f800000),
2915 v_val = vdupq_n_s32(val.i);
2918 for( size_t i = 0; i < it.nplanes; i++, ++it )
2920 int* tptr = ptrs[0];
2926 for ( ; j + 4 <= len; j += 4)
2928 __m128i v_src = _mm_loadu_si128((__m128i const *)(tptr + j));
2929 __m128i v_cmp_mask = _mm_cmplt_epi32(v_mask2, _mm_and_si128(v_src, v_mask1));
2930 __m128i v_res = _mm_or_si128(_mm_andnot_si128(v_cmp_mask, v_src), _mm_and_si128(v_cmp_mask, v_val));
2931 _mm_storeu_si128((__m128i *)(tptr + j), v_res);
2935 for ( ; j + 4 <= len; j += 4)
2937 int32x4_t v_src = vld1q_s32(tptr + j);
2938 uint32x4_t v_cmp_mask = vcltq_s32(v_mask2, vandq_s32(v_src, v_mask1));
2939 int32x4_t v_dst = vbslq_s32(v_cmp_mask, v_val, v_src);
2940 vst1q_s32(tptr + j, v_dst);
2944 for( ; j < len; j++ )
2945 if( (tptr[j] & 0x7fffffff) > 0x7f800000 )
2951 void exp(const float* src, float* dst, int n)
2953 Exp_32f(src, dst, n);
2956 void log(const float* src, float* dst, int n)
2958 Log_32f(src, dst, n);
2961 void fastAtan2(const float* y, const float* x, float* dst, int n, bool angleInDegrees)
2963 FastAtan2_32f(y, x, dst, n, angleInDegrees);
2966 void magnitude(const float* x, const float* y, float* dst, int n)
2968 Magnitude_32f(x, y, dst, n);
2973 CV_IMPL float cvCbrt(float value) { return cv::cubeRoot(value); }
2974 CV_IMPL float cvFastArctan(float y, float x) { return cv::fastAtan2(y, x); }
2977 cvCartToPolar( const CvArr* xarr, const CvArr* yarr,
2978 CvArr* magarr, CvArr* anglearr,
2979 int angle_in_degrees )
2981 cv::Mat X = cv::cvarrToMat(xarr), Y = cv::cvarrToMat(yarr), Mag, Angle;
2984 Mag = cv::cvarrToMat(magarr);
2985 CV_Assert( Mag.size() == X.size() && Mag.type() == X.type() );
2989 Angle = cv::cvarrToMat(anglearr);
2990 CV_Assert( Angle.size() == X.size() && Angle.type() == X.type() );
2995 cv::cartToPolar( X, Y, Mag, Angle, angle_in_degrees != 0 );
2997 cv::magnitude( X, Y, Mag );
3000 cv::phase( X, Y, Angle, angle_in_degrees != 0 );
3004 cvPolarToCart( const CvArr* magarr, const CvArr* anglearr,
3005 CvArr* xarr, CvArr* yarr, int angle_in_degrees )
3007 cv::Mat X, Y, Angle = cv::cvarrToMat(anglearr), Mag;
3010 Mag = cv::cvarrToMat(magarr);
3011 CV_Assert( Mag.size() == Angle.size() && Mag.type() == Angle.type() );
3015 X = cv::cvarrToMat(xarr);
3016 CV_Assert( X.size() == Angle.size() && X.type() == Angle.type() );
3020 Y = cv::cvarrToMat(yarr);
3021 CV_Assert( Y.size() == Angle.size() && Y.type() == Angle.type() );
3024 cv::polarToCart( Mag, Angle, X, Y, angle_in_degrees != 0 );
3027 CV_IMPL void cvExp( const CvArr* srcarr, CvArr* dstarr )
3029 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3030 CV_Assert( src.type() == dst.type() && src.size == dst.size );
3031 cv::exp( src, dst );
3034 CV_IMPL void cvLog( const CvArr* srcarr, CvArr* dstarr )
3036 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3037 CV_Assert( src.type() == dst.type() && src.size == dst.size );
3038 cv::log( src, dst );
3041 CV_IMPL void cvPow( const CvArr* srcarr, CvArr* dstarr, double power )
3043 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
3044 CV_Assert( src.type() == dst.type() && src.size == dst.size );
3045 cv::pow( src, power, dst );
3048 CV_IMPL int cvCheckArr( const CvArr* arr, int flags,
3049 double minVal, double maxVal )
3051 if( (flags & CV_CHECK_RANGE) == 0 )
3052 minVal = -DBL_MAX, maxVal = DBL_MAX;
3053 return cv::checkRange(cv::cvarrToMat(arr), (flags & CV_CHECK_QUIET) != 0, 0, minVal, maxVal );
3058 Finds real roots of cubic, quadratic or linear equation.
3059 The original code has been taken from Ken Turkowski web page
3060 (http://www.worldserver.com/turk/opensource/) and adopted for OpenCV.
3061 Here is the copyright notice.
3063 -----------------------------------------------------------------------
3064 Copyright (C) 1978-1999 Ken Turkowski. <turk@computer.org>
3066 All rights reserved.
3068 Warranty Information
3069 Even though I have reviewed this software, I make no warranty
3070 or representation, either express or implied, with respect to this
3071 software, its quality, accuracy, merchantability, or fitness for a
3072 particular purpose. As a result, this software is provided "as is,"
3073 and you, its user, are assuming the entire risk as to its quality
3076 This code may be used and freely distributed as long as it includes
3077 this copyright notice and the above warranty information.
3078 -----------------------------------------------------------------------
3081 int cv::solveCubic( InputArray _coeffs, OutputArray _roots )
3084 Mat coeffs = _coeffs.getMat();
3085 int ctype = coeffs.type();
3087 CV_Assert( ctype == CV_32F || ctype == CV_64F );
3088 CV_Assert( (coeffs.size() == Size(n0, 1) ||
3089 coeffs.size() == Size(n0+1, 1) ||
3090 coeffs.size() == Size(1, n0) ||
3091 coeffs.size() == Size(1, n0+1)) );
3093 _roots.create(n0, 1, ctype, -1, true, _OutputArray::DEPTH_MASK_FLT);
3094 Mat roots = _roots.getMat();
3097 double a0 = 1., a1, a2, a3;
3098 double x0 = 0., x1 = 0., x2 = 0.;
3099 int ncoeffs = coeffs.rows + coeffs.cols - 1;
3101 if( ctype == CV_32FC1 )
3104 a0 = coeffs.at<float>(++i);
3106 a1 = coeffs.at<float>(i+1);
3107 a2 = coeffs.at<float>(i+2);
3108 a3 = coeffs.at<float>(i+3);
3113 a0 = coeffs.at<double>(++i);
3115 a1 = coeffs.at<double>(i+1);
3116 a2 = coeffs.at<double>(i+2);
3117 a3 = coeffs.at<double>(i+3);
3125 n = a3 == 0 ? -1 : 0;
3135 // quadratic equation
3136 double d = a2*a2 - 4*a1*a3;
3140 double q1 = (-a2 + d) * 0.5;
3141 double q2 = (a2 + d) * -0.5;
3142 if( fabs(q1) > fabs(q2) )
3163 double Q = (a1 * a1 - 3 * a2) * (1./9);
3164 double R = (2 * a1 * a1 * a1 - 9 * a1 * a2 + 27 * a3) * (1./54);
3165 double Qcubed = Q * Q * Q;
3166 double d = Qcubed - R * R;
3170 double theta = acos(R / std::sqrt(Qcubed));
3171 double sqrtQ = std::sqrt(Q);
3172 double t0 = -2 * sqrtQ;
3173 double t1 = theta * (1./3);
3174 double t2 = a1 * (1./3);
3175 x0 = t0 * cos(t1) - t2;
3176 x1 = t0 * cos(t1 + (2.*CV_PI/3)) - t2;
3177 x2 = t0 * cos(t1 + (4.*CV_PI/3)) - t2;
3184 e = std::pow(d + fabs(R), 0.333333333333);
3187 x0 = (e + Q / e) - a1 * (1./3);
3192 if( roots.type() == CV_32FC1 )
3194 roots.at<float>(0) = (float)x0;
3195 roots.at<float>(1) = (float)x1;
3196 roots.at<float>(2) = (float)x2;
3200 roots.at<double>(0) = x0;
3201 roots.at<double>(1) = x1;
3202 roots.at<double>(2) = x2;
3208 /* finds complex roots of a polynomial using Durand-Kerner method:
3209 http://en.wikipedia.org/wiki/Durand%E2%80%93Kerner_method */
3210 double cv::solvePoly( InputArray _coeffs0, OutputArray _roots0, int maxIters )
3212 typedef Complex<double> C;
3216 Mat coeffs0 = _coeffs0.getMat();
3217 int ctype = _coeffs0.type();
3218 int cdepth = CV_MAT_DEPTH(ctype);
3220 CV_Assert( CV_MAT_DEPTH(ctype) >= CV_32F && CV_MAT_CN(ctype) <= 2 );
3221 CV_Assert( coeffs0.rows == 1 || coeffs0.cols == 1 );
3223 int n = coeffs0.cols + coeffs0.rows - 2;
3225 _roots0.create(n, 1, CV_MAKETYPE(cdepth, 2), -1, true, _OutputArray::DEPTH_MASK_FLT);
3226 Mat roots0 = _roots0.getMat();
3228 AutoBuffer<C> buf(n*2+2);
3229 C *coeffs = buf, *roots = coeffs + n + 1;
3230 Mat coeffs1(coeffs0.size(), CV_MAKETYPE(CV_64F, coeffs0.channels()), coeffs0.channels() == 2 ? coeffs : roots);
3231 coeffs0.convertTo(coeffs1, coeffs1.type());
3232 if( coeffs0.channels() == 1 )
3234 const double* rcoeffs = (const double*)roots;
3235 for( i = 0; i <= n; i++ )
3236 coeffs[i] = C(rcoeffs[i], 0);
3241 for( i = 0; i < n; i++ )
3247 maxIters = maxIters <= 0 ? 1000 : maxIters;
3248 for( iter = 0; iter < maxIters; iter++ )
3251 for( i = 0; i < n; i++ )
3254 C num = coeffs[n], denom = coeffs[n];
3255 for( j = 0; j < n; j++ )
3257 num = num*p + coeffs[n-j-1];
3258 if( j != i ) denom = denom * (p - roots[j]);
3262 maxDiff = std::max(maxDiff, cv::abs(num));
3268 if( coeffs0.channels() == 1 )
3270 const double verySmallEps = 1e-100;
3271 for( i = 0; i < n; i++ )
3272 if( fabs(roots[i].im) < verySmallEps )
3276 Mat(roots0.size(), CV_64FC2, roots).convertTo(roots0, roots0.type());
3282 cvSolveCubic( const CvMat* coeffs, CvMat* roots )
3284 cv::Mat _coeffs = cv::cvarrToMat(coeffs), _roots = cv::cvarrToMat(roots), _roots0 = _roots;
3285 int nroots = cv::solveCubic(_coeffs, _roots);
3286 CV_Assert( _roots.data == _roots0.data ); // check that the array of roots was not reallocated
3291 void cvSolvePoly(const CvMat* a, CvMat *r, int maxiter, int)
3293 cv::Mat _a = cv::cvarrToMat(a);
3294 cv::Mat _r = cv::cvarrToMat(r);
3296 cv::solvePoly(_a, _r, maxiter);
3297 CV_Assert( _r.data == _r0.data ); // check that the array of roots was not reallocated