1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
5 namespace cv { namespace hal {
7 CV_CPU_OPTIMIZATION_NAMESPACE_BEGIN
9 // forward declarations
10 void fastAtan32f(const float *Y, const float *X, float *angle, int len, bool angleInDegrees);
11 void fastAtan64f(const double *Y, const double *X, double *angle, int len, bool angleInDegrees);
12 void fastAtan2(const float *Y, const float *X, float *angle, int len, bool angleInDegrees);
13 void magnitude32f(const float* x, const float* y, float* mag, int len);
14 void magnitude64f(const double* x, const double* y, double* mag, int len);
15 void invSqrt32f(const float* src, float* dst, int len);
16 void invSqrt64f(const double* src, double* dst, int len);
17 void sqrt32f(const float* src, float* dst, int len);
18 void sqrt64f(const double* src, double* dst, int len);
19 void exp32f(const float *src, float *dst, int n);
20 void exp64f(const double *src, double *dst, int n);
21 void log32f(const float *src, float *dst, int n);
22 void log64f(const double *src, double *dst, int n);
23 float fastAtan2(float y, float x);
25 #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
31 static const float atan2_p1 = 0.9997878412794807f*(float)(180/CV_PI);
32 static const float atan2_p3 = -0.3258083974640975f*(float)(180/CV_PI);
33 static const float atan2_p5 = 0.1555786518463281f*(float)(180/CV_PI);
34 static const float atan2_p7 = -0.04432655554792128f*(float)(180/CV_PI);
38 static inline float atan_f32(float y, float x)
40 float ax = std::abs(x), ay = std::abs(y);
44 c = ay/(ax + (float)DBL_EPSILON);
46 a = (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
50 c = ax/(ay + (float)DBL_EPSILON);
52 a = 90.f - (((atan2_p7*c2 + atan2_p5)*c2 + atan2_p3)*c2 + atan2_p1)*c;
65 explicit v_atan_f32(const float& scale)
67 eps = vx_setall_f32((float)DBL_EPSILON);
69 p7 = vx_setall_f32(atan2_p7);
70 p5 = vx_setall_f32(atan2_p5);
71 p3 = vx_setall_f32(atan2_p3);
72 p1 = vx_setall_f32(atan2_p1);
73 val90 = vx_setall_f32(90.f);
74 val180 = vx_setall_f32(180.f);
75 val360 = vx_setall_f32(360.f);
76 s = vx_setall_f32(scale);
79 v_float32 compute(const v_float32& y, const v_float32& x)
81 v_float32 ax = v_abs(x);
82 v_float32 ay = v_abs(y);
83 v_float32 c = v_min(ax, ay) / (v_max(ax, ay) + eps);
85 v_float32 a = v_fma(v_fma(v_fma(cc, p7, p5), cc, p3), cc, p1)*c;
86 a = v_select(ax >= ay, a, val90 - a);
87 a = v_select(x < z, val180 - a, a);
88 a = v_select(y < z, val360 - a, a);
108 ///////////////////////////////////// ATAN2 ////////////////////////////////////
110 static void fastAtan32f_(const float *Y, const float *X, float *angle, int len, bool angleInDegrees )
112 float scale = angleInDegrees ? 1.f : (float)(CV_PI/180);
115 const int VECSZ = v_float32::nlanes;
118 for( ; i < len; i += VECSZ*2 )
120 if( i + VECSZ*2 > len )
122 // if it's inplace operation, we cannot repeatedly process
123 // the tail for the second time, so we have to use the
125 if( i == 0 || angle == X || angle == Y )
130 v_float32 y0 = vx_load(Y + i);
131 v_float32 x0 = vx_load(X + i);
132 v_float32 y1 = vx_load(Y + i + VECSZ);
133 v_float32 x1 = vx_load(X + i + VECSZ);
135 v_float32 r0 = v.compute(y0, x0);
136 v_float32 r1 = v.compute(y1, x1);
138 v_store(angle + i, r0);
139 v_store(angle + i + VECSZ, r1);
144 for( ; i < len; i++ )
145 angle[i] = atan_f32(Y[i], X[i])*scale;
148 void fastAtan32f(const float *Y, const float *X, float *angle, int len, bool angleInDegrees )
150 CV_INSTRUMENT_REGION()
151 fastAtan32f_(Y, X, angle, len, angleInDegrees );
154 void fastAtan64f(const double *Y, const double *X, double *angle, int len, bool angleInDegrees)
156 CV_INSTRUMENT_REGION()
158 const int BLKSZ = 128;
159 float ybuf[BLKSZ], xbuf[BLKSZ], abuf[BLKSZ];
160 for( int i = 0; i < len; i += BLKSZ )
162 int j, blksz = std::min(BLKSZ, len - i);
163 for( j = 0; j < blksz; j++ )
165 ybuf[j] = (float)Y[i + j];
166 xbuf[j] = (float)X[i + j];
168 fastAtan32f_(ybuf, xbuf, abuf, blksz, angleInDegrees);
169 for( j = 0; j < blksz; j++ )
170 angle[i + j] = abuf[j];
175 void fastAtan2(const float *Y, const float *X, float *angle, int len, bool angleInDegrees )
177 CV_INSTRUMENT_REGION()
178 fastAtan32f(Y, X, angle, len, angleInDegrees);
181 void magnitude32f(const float* x, const float* y, float* mag, int len)
183 CV_INSTRUMENT_REGION()
188 const int VECSZ = v_float32::nlanes;
189 for( ; i < len; i += VECSZ*2 )
191 if( i + VECSZ*2 > len )
193 if( i == 0 || mag == x || mag == y )
197 v_float32 x0 = vx_load(x + i), x1 = vx_load(x + i + VECSZ);
198 v_float32 y0 = vx_load(y + i), y1 = vx_load(y + i + VECSZ);
199 x0 = v_sqrt(v_muladd(x0, x0, y0*y0));
200 x1 = v_sqrt(v_muladd(x1, x1, y1*y1));
201 v_store(mag + i, x0);
202 v_store(mag + i + VECSZ, x1);
207 for( ; i < len; i++ )
209 float x0 = x[i], y0 = y[i];
210 mag[i] = std::sqrt(x0*x0 + y0*y0);
214 void magnitude64f(const double* x, const double* y, double* mag, int len)
216 CV_INSTRUMENT_REGION()
221 const int VECSZ = v_float64::nlanes;
222 for( ; i < len; i += VECSZ*2 )
224 if( i + VECSZ*2 > len )
226 if( i == 0 || mag == x || mag == y )
230 v_float64 x0 = vx_load(x + i), x1 = vx_load(x + i + VECSZ);
231 v_float64 y0 = vx_load(y + i), y1 = vx_load(y + i + VECSZ);
232 x0 = v_sqrt(v_muladd(x0, x0, y0*y0));
233 x1 = v_sqrt(v_muladd(x1, x1, y1*y1));
234 v_store(mag + i, x0);
235 v_store(mag + i + VECSZ, x1);
240 for( ; i < len; i++ )
242 double x0 = x[i], y0 = y[i];
243 mag[i] = std::sqrt(x0*x0 + y0*y0);
248 void invSqrt32f(const float* src, float* dst, int len)
250 CV_INSTRUMENT_REGION()
255 const int VECSZ = v_float32::nlanes;
256 for( ; i < len; i += VECSZ*2 )
258 if( i + VECSZ*2 > len )
260 if( i == 0 || src == dst )
264 v_float32 t0 = vx_load(src + i), t1 = vx_load(src + i + VECSZ);
267 v_store(dst + i, t0); v_store(dst + i + VECSZ, t1);
272 for( ; i < len; i++ )
273 dst[i] = 1/std::sqrt(src[i]);
277 void invSqrt64f(const double* src, double* dst, int len)
279 CV_INSTRUMENT_REGION()
283 const int VECSZ = v_float64::nlanes;
284 for ( ; i < len; i += VECSZ*2)
286 if( i + VECSZ*2 > len )
288 if( i == 0 || src == dst )
292 v_float64 t0 = vx_load(src + i), t1 = vx_load(src + i + VECSZ);
295 v_store(dst + i, t0); v_store(dst + i + VECSZ, t1);
299 for( ; i < len; i++ )
300 dst[i] = 1/std::sqrt(src[i]);
304 void sqrt32f(const float* src, float* dst, int len)
306 CV_INSTRUMENT_REGION()
311 const int VECSZ = v_float32::nlanes;
312 for( ; i < len; i += VECSZ*2 )
314 if( i + VECSZ*2 > len )
316 if( i == 0 || src == dst )
320 v_float32 t0 = vx_load(src + i), t1 = vx_load(src + i + VECSZ);
323 v_store(dst + i, t0); v_store(dst + i + VECSZ, t1);
328 for( ; i < len; i++ )
329 dst[i] = std::sqrt(src[i]);
333 void sqrt64f(const double* src, double* dst, int len)
335 CV_INSTRUMENT_REGION()
340 const int VECSZ = v_float64::nlanes;
341 for( ; i < len; i += VECSZ*2 )
343 if( i + VECSZ*2 > len )
345 if( i == 0 || src == dst )
349 v_float64 t0 = vx_load(src + i), t1 = vx_load(src + i + VECSZ);
352 v_store(dst + i, t0); v_store(dst + i + VECSZ, t1);
357 for( ; i < len; i++ )
358 dst[i] = std::sqrt(src[i]);
361 // Workaround for ICE in MSVS 2015 update 3 (issue #7795)
362 // CV_AVX is not used here, because generated code is faster in non-AVX mode.
363 // (tested with disabled IPP on i5-6300U)
364 #if (defined _MSC_VER && _MSC_VER >= 1900)
365 void exp32f(const float *src, float *dst, int n)
367 CV_INSTRUMENT_REGION()
369 for (int i = 0; i < n; i++)
371 dst[i] = std::exp(src[i]);
375 void exp64f(const double *src, double *dst, int n)
377 CV_INSTRUMENT_REGION()
379 for (int i = 0; i < n; i++)
381 dst[i] = std::exp(src[i]);
385 void log32f(const float *src, float *dst, int n)
387 CV_INSTRUMENT_REGION()
389 for (int i = 0; i < n; i++)
391 dst[i] = std::log(src[i]);
394 void log64f(const double *src, double *dst, int n)
396 CV_INSTRUMENT_REGION()
398 for (int i = 0; i < n; i++)
400 dst[i] = std::log(src[i]);
405 ////////////////////////////////////// EXP /////////////////////////////////////
407 #define EXPTAB_SCALE 6
408 #define EXPTAB_MASK ((1 << EXPTAB_SCALE) - 1)
410 #define EXPPOLY_32F_A0 .9670371139572337719125840413672004409288e-2
412 static const double expTab[] = {
413 1.0 * EXPPOLY_32F_A0,
414 1.0108892860517004600204097905619 * EXPPOLY_32F_A0,
415 1.0218971486541166782344801347833 * EXPPOLY_32F_A0,
416 1.0330248790212284225001082839705 * EXPPOLY_32F_A0,
417 1.0442737824274138403219664787399 * EXPPOLY_32F_A0,
418 1.0556451783605571588083413251529 * EXPPOLY_32F_A0,
419 1.0671404006768236181695211209928 * EXPPOLY_32F_A0,
420 1.0787607977571197937406800374385 * EXPPOLY_32F_A0,
421 1.0905077326652576592070106557607 * EXPPOLY_32F_A0,
422 1.1023825833078409435564142094256 * EXPPOLY_32F_A0,
423 1.1143867425958925363088129569196 * EXPPOLY_32F_A0,
424 1.126521618608241899794798643787 * EXPPOLY_32F_A0,
425 1.1387886347566916537038302838415 * EXPPOLY_32F_A0,
426 1.151189229952982705817759635202 * EXPPOLY_32F_A0,
427 1.1637248587775775138135735990922 * EXPPOLY_32F_A0,
428 1.1763969916502812762846457284838 * EXPPOLY_32F_A0,
429 1.1892071150027210667174999705605 * EXPPOLY_32F_A0,
430 1.2021567314527031420963969574978 * EXPPOLY_32F_A0,
431 1.2152473599804688781165202513388 * EXPPOLY_32F_A0,
432 1.2284805361068700056940089577928 * EXPPOLY_32F_A0,
433 1.2418578120734840485936774687266 * EXPPOLY_32F_A0,
434 1.2553807570246910895793906574423 * EXPPOLY_32F_A0,
435 1.2690509571917332225544190810323 * EXPPOLY_32F_A0,
436 1.2828700160787782807266697810215 * EXPPOLY_32F_A0,
437 1.2968395546510096659337541177925 * EXPPOLY_32F_A0,
438 1.3109612115247643419229917863308 * EXPPOLY_32F_A0,
439 1.3252366431597412946295370954987 * EXPPOLY_32F_A0,
440 1.3396675240533030053600306697244 * EXPPOLY_32F_A0,
441 1.3542555469368927282980147401407 * EXPPOLY_32F_A0,
442 1.3690024229745906119296011329822 * EXPPOLY_32F_A0,
443 1.3839098819638319548726595272652 * EXPPOLY_32F_A0,
444 1.3989796725383111402095281367152 * EXPPOLY_32F_A0,
445 1.4142135623730950488016887242097 * EXPPOLY_32F_A0,
446 1.4296133383919700112350657782751 * EXPPOLY_32F_A0,
447 1.4451808069770466200370062414717 * EXPPOLY_32F_A0,
448 1.4609177941806469886513028903106 * EXPPOLY_32F_A0,
449 1.476826145939499311386907480374 * EXPPOLY_32F_A0,
450 1.4929077282912648492006435314867 * EXPPOLY_32F_A0,
451 1.5091644275934227397660195510332 * EXPPOLY_32F_A0,
452 1.5255981507445383068512536895169 * EXPPOLY_32F_A0,
453 1.5422108254079408236122918620907 * EXPPOLY_32F_A0,
454 1.5590044002378369670337280894749 * EXPPOLY_32F_A0,
455 1.5759808451078864864552701601819 * EXPPOLY_32F_A0,
456 1.5931421513422668979372486431191 * EXPPOLY_32F_A0,
457 1.6104903319492543081795206673574 * EXPPOLY_32F_A0,
458 1.628027421857347766848218522014 * EXPPOLY_32F_A0,
459 1.6457554781539648445187567247258 * EXPPOLY_32F_A0,
460 1.6636765803267364350463364569764 * EXPPOLY_32F_A0,
461 1.6817928305074290860622509524664 * EXPPOLY_32F_A0,
462 1.7001063537185234695013625734975 * EXPPOLY_32F_A0,
463 1.7186192981224779156293443764563 * EXPPOLY_32F_A0,
464 1.7373338352737062489942020818722 * EXPPOLY_32F_A0,
465 1.7562521603732994831121606193753 * EXPPOLY_32F_A0,
466 1.7753764925265212525505592001993 * EXPPOLY_32F_A0,
467 1.7947090750031071864277032421278 * EXPPOLY_32F_A0,
468 1.8142521755003987562498346003623 * EXPPOLY_32F_A0,
469 1.8340080864093424634870831895883 * EXPPOLY_32F_A0,
470 1.8539791250833855683924530703377 * EXPPOLY_32F_A0,
471 1.8741676341102999013299989499544 * EXPPOLY_32F_A0,
472 1.8945759815869656413402186534269 * EXPPOLY_32F_A0,
473 1.9152065613971472938726112702958 * EXPPOLY_32F_A0,
474 1.9360617934922944505980559045667 * EXPPOLY_32F_A0,
475 1.9571441241754002690183222516269 * EXPPOLY_32F_A0,
476 1.9784560263879509682582499181312 * EXPPOLY_32F_A0,
479 static float expTab_f[EXPTAB_MASK+1];
480 static volatile bool extTab_f_initialized = false;
482 // the code below uses _mm_cast* intrinsics, which are not avialable on VS2005
483 #if (defined _MSC_VER && _MSC_VER < 1500) || \
484 (!defined __APPLE__ && defined __GNUC__ && __GNUC__*100 + __GNUC_MINOR__ < 402)
489 static const double exp_prescale = 1.4426950408889634073599246810019 * (1 << EXPTAB_SCALE);
490 static const double exp_postscale = 1./(1 << EXPTAB_SCALE);
491 static const double exp_max_val = 3000.*(1 << EXPTAB_SCALE); // log10(DBL_MAX) < 3000
493 void exp32f( const float *_x, float *y, int n )
495 CV_INSTRUMENT_REGION()
497 if( !extTab_f_initialized )
499 for( int j = 0; j <= EXPTAB_MASK; j++ )
500 expTab_f[j] = (float)expTab[j];
501 extTab_f_initialized = true;
505 A4 = (float)(1.000000000000002438532970795181890933776 / EXPPOLY_32F_A0),
506 A3 = (float)(.6931471805521448196800669615864773144641 / EXPPOLY_32F_A0),
507 A2 = (float)(.2402265109513301490103372422686535526573 / EXPPOLY_32F_A0),
508 A1 = (float)(.5550339366753125211915322047004666939128e-1 / EXPPOLY_32F_A0);
511 const Cv32suf* x = (const Cv32suf*)_x;
512 float minval = (float)(-exp_max_val/exp_prescale);
513 float maxval = (float)(exp_max_val/exp_prescale);
514 float postscale = (float)exp_postscale;
517 const int VECSZ = v_float32::nlanes;
518 const v_float32 vprescale = vx_setall_f32((float)exp_prescale);
519 const v_float32 vpostscale = vx_setall_f32((float)exp_postscale);
520 const v_float32 vminval = vx_setall_f32(minval);
521 const v_float32 vmaxval = vx_setall_f32(maxval);
523 const v_float32 vA1 = vx_setall_f32((float)A1);
524 const v_float32 vA2 = vx_setall_f32((float)A2);
525 const v_float32 vA3 = vx_setall_f32((float)A3);
526 const v_float32 vA4 = vx_setall_f32((float)A4);
528 const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK);
529 bool y_aligned = (size_t)(void*)y % 32 == 0;
531 for( ; i < n; i += VECSZ*2 )
533 if( i + VECSZ*2 > n )
535 if( i == 0 || _x == y )
541 v_float32 xf0 = vx_load(&x[i].f), xf1 = vx_load(&x[i + VECSZ].f);
543 xf0 = v_min(v_max(xf0, vminval), vmaxval);
544 xf1 = v_min(v_max(xf1, vminval), vmaxval);
549 v_int32 xi0 = v_round(xf0);
550 v_int32 xi1 = v_round(xf1);
551 xf0 = (xf0 - v_cvt_f32(xi0))*vpostscale;
552 xf1 = (xf1 - v_cvt_f32(xi1))*vpostscale;
554 v_float32 yf0 = v_lut(expTab_f, xi0 & vidxmask);
555 v_float32 yf1 = v_lut(expTab_f, xi1 & vidxmask);
557 v_int32 v0 = vx_setzero_s32(), v127 = vx_setall_s32(127), v255 = vx_setall_s32(255);
558 xi0 = v_min(v_max(v_shr<EXPTAB_SCALE>(xi0) + v127, v0), v255);
559 xi1 = v_min(v_max(v_shr<EXPTAB_SCALE>(xi1) + v127, v0), v255);
561 yf0 *= v_reinterpret_as_f32(v_shl<23>(xi0));
562 yf1 *= v_reinterpret_as_f32(v_shl<23>(xi1));
564 v_float32 zf0 = xf0 + vA1;
565 v_float32 zf1 = xf1 + vA1;
567 zf0 = v_fma(zf0, xf0, vA2);
568 zf1 = v_fma(zf1, xf1, vA2);
570 zf0 = v_fma(zf0, xf0, vA3);
571 zf1 = v_fma(zf1, xf1, vA3);
573 zf0 = v_fma(zf0, xf0, vA4);
574 zf1 = v_fma(zf1, xf1, vA4);
581 v_store_aligned(y + i, zf0);
582 v_store_aligned(y + i + VECSZ, zf1);
587 v_store(y + i + VECSZ, zf1);
596 x0 = std::min(std::max(x0, minval), maxval);
597 x0 *= (float)exp_prescale;
600 int xi = saturate_cast<int>(x0);
601 x0 = (x0 - xi)*postscale;
603 int t = (xi >> EXPTAB_SCALE) + 127;
604 t = !(t & ~255) ? t : t < 0 ? 0 : 255;
607 y[i] = buf.f * expTab_f[xi & EXPTAB_MASK] * ((((x0 + A1)*x0 + A2)*x0 + A3)*x0 + A4);
611 void exp64f( const double *_x, double *y, int n )
613 CV_INSTRUMENT_REGION()
616 A5 = .99999999999999999998285227504999 / EXPPOLY_32F_A0,
617 A4 = .69314718055994546743029643825322 / EXPPOLY_32F_A0,
618 A3 = .24022650695886477918181338054308 / EXPPOLY_32F_A0,
619 A2 = .55504108793649567998466049042729e-1 / EXPPOLY_32F_A0,
620 A1 = .96180973140732918010002372686186e-2 / EXPPOLY_32F_A0,
621 A0 = .13369713757180123244806654839424e-2 / EXPPOLY_32F_A0;
624 const Cv64suf* x = (const Cv64suf*)_x;
625 double minval = (-exp_max_val/exp_prescale);
626 double maxval = (exp_max_val/exp_prescale);
629 const int VECSZ = v_float64::nlanes;
630 const v_float64 vprescale = vx_setall_f64(exp_prescale);
631 const v_float64 vpostscale = vx_setall_f64(exp_postscale);
632 const v_float64 vminval = vx_setall_f64(minval);
633 const v_float64 vmaxval = vx_setall_f64(maxval);
635 const v_float64 vA1 = vx_setall_f64(A1);
636 const v_float64 vA2 = vx_setall_f64(A2);
637 const v_float64 vA3 = vx_setall_f64(A3);
638 const v_float64 vA4 = vx_setall_f64(A4);
639 const v_float64 vA5 = vx_setall_f64(A5);
641 const v_int32 vidxmask = vx_setall_s32(EXPTAB_MASK);
642 bool y_aligned = (size_t)(void*)y % 32 == 0;
644 for( ; i < n; i += VECSZ*2 )
646 if( i + VECSZ*2 > n )
648 if( i == 0 || _x == y )
654 v_float64 xf0 = vx_load(&x[i].f), xf1 = vx_load(&x[i + VECSZ].f);
656 xf0 = v_min(v_max(xf0, vminval), vmaxval);
657 xf1 = v_min(v_max(xf1, vminval), vmaxval);
662 v_int32 xi0 = v_round(xf0);
663 v_int32 xi1 = v_round(xf1);
664 xf0 = (xf0 - v_cvt_f64(xi0))*vpostscale;
665 xf1 = (xf1 - v_cvt_f64(xi1))*vpostscale;
667 v_float64 yf0 = v_lut(expTab, xi0 & vidxmask);
668 v_float64 yf1 = v_lut(expTab, xi1 & vidxmask);
670 v_int32 v0 = vx_setzero_s32(), v1023 = vx_setall_s32(1023), v2047 = vx_setall_s32(2047);
671 xi0 = v_min(v_max(v_shr<EXPTAB_SCALE>(xi0) + v1023, v0), v2047);
672 xi1 = v_min(v_max(v_shr<EXPTAB_SCALE>(xi1) + v1023, v0), v2047);
674 v_int64 xq0, xq1, dummy;
675 v_expand(xi0, xq0, dummy);
676 v_expand(xi1, xq1, dummy);
678 yf0 *= v_reinterpret_as_f64(v_shl<52>(xq0));
679 yf1 *= v_reinterpret_as_f64(v_shl<52>(xq1));
681 v_float64 zf0 = xf0 + vA1;
682 v_float64 zf1 = xf1 + vA1;
684 zf0 = v_fma(zf0, xf0, vA2);
685 zf1 = v_fma(zf1, xf1, vA2);
687 zf0 = v_fma(zf0, xf0, vA3);
688 zf1 = v_fma(zf1, xf1, vA3);
690 zf0 = v_fma(zf0, xf0, vA4);
691 zf1 = v_fma(zf1, xf1, vA4);
693 zf0 = v_fma(zf0, xf0, vA5);
694 zf1 = v_fma(zf1, xf1, vA5);
701 v_store_aligned(y + i, zf0);
702 v_store_aligned(y + i + VECSZ, zf1);
707 v_store(y + i + VECSZ, zf1);
716 x0 = std::min(std::max(x0, minval), maxval);
720 int xi = saturate_cast<int>(x0);
721 x0 = (x0 - xi)*exp_postscale;
723 int t = (xi >> EXPTAB_SCALE) + 1023;
724 t = !(t & ~2047) ? t : t < 0 ? 0 : 2047;
725 buf.i = (int64)t << 52;
727 y[i] = buf.f * expTab[xi & EXPTAB_MASK] * (((((A0*x0 + A1)*x0 + A2)*x0 + A3)*x0 + A4)*x0 + A5);
733 #undef EXPPOLY_32F_A0
735 /////////////////////////////////////////// LOG ///////////////////////////////////////
737 #define LOGTAB_SCALE 8
738 #define LOGTAB_MASK ((1 << LOGTAB_SCALE) - 1)
740 static const double CV_DECL_ALIGNED(16) logTab[] = {
741 0.0000000000000000000000000000000000000000, 1.000000000000000000000000000000000000000,
742 .00389864041565732288852075271279318258166, .9961089494163424124513618677042801556420,
743 .00778214044205494809292034119607706088573, .9922480620155038759689922480620155038760,
744 .01165061721997527263705585198749759001657, .9884169884169884169884169884169884169884,
745 .01550418653596525274396267235488267033361, .9846153846153846153846153846153846153846,
746 .01934296284313093139406447562578250654042, .9808429118773946360153256704980842911877,
747 .02316705928153437593630670221500622574241, .9770992366412213740458015267175572519084,
748 .02697658769820207233514075539915211265906, .9733840304182509505703422053231939163498,
749 .03077165866675368732785500469617545604706, .9696969696969696969696969696969696969697,
750 .03455238150665972812758397481047722976656, .9660377358490566037735849056603773584906,
751 .03831886430213659461285757856785494368522, .9624060150375939849624060150375939849624,
752 .04207121392068705056921373852674150839447, .9588014981273408239700374531835205992509,
753 .04580953603129420126371940114040626212953, .9552238805970149253731343283582089552239,
754 .04953393512227662748292900118940451648088, .9516728624535315985130111524163568773234,
755 .05324451451881227759255210685296333394944, .9481481481481481481481481481481481481481,
756 .05694137640013842427411105973078520037234, .9446494464944649446494464944649446494465,
757 .06062462181643483993820353816772694699466, .9411764705882352941176470588235294117647,
758 .06429435070539725460836422143984236754475, .9377289377289377289377289377289377289377,
759 .06795066190850773679699159401934593915938, .9343065693430656934306569343065693430657,
760 .07159365318700880442825962290953611955044, .9309090909090909090909090909090909090909,
761 .07522342123758751775142172846244648098944, .9275362318840579710144927536231884057971,
762 .07884006170777602129362549021607264876369, .9241877256317689530685920577617328519856,
763 .08244366921107458556772229485432035289706, .9208633093525179856115107913669064748201,
764 .08603433734180314373940490213499288074675, .9175627240143369175627240143369175627240,
765 .08961215868968712416897659522874164395031, .9142857142857142857142857142857142857143,
766 .09317722485418328259854092721070628613231, .9110320284697508896797153024911032028470,
767 .09672962645855109897752299730200320482256, .9078014184397163120567375886524822695035,
768 .10026945316367513738597949668474029749630, .9045936395759717314487632508833922261484,
769 .10379679368164355934833764649738441221420, .9014084507042253521126760563380281690141,
770 .10731173578908805021914218968959175981580, .8982456140350877192982456140350877192982,
771 .11081436634029011301105782649756292812530, .8951048951048951048951048951048951048951,
772 .11430477128005862852422325204315711744130, .8919860627177700348432055749128919860627,
773 .11778303565638344185817487641543266363440, .8888888888888888888888888888888888888889,
774 .12124924363286967987640707633545389398930, .8858131487889273356401384083044982698962,
775 .12470347850095722663787967121606925502420, .8827586206896551724137931034482758620690,
776 .12814582269193003360996385708858724683530, .8797250859106529209621993127147766323024,
777 .13157635778871926146571524895989568904040, .8767123287671232876712328767123287671233,
778 .13499516453750481925766280255629681050780, .8737201365187713310580204778156996587031,
779 .13840232285911913123754857224412262439730, .8707482993197278911564625850340136054422,
780 .14179791186025733629172407290752744302150, .8677966101694915254237288135593220338983,
781 .14518200984449788903951628071808954700830, .8648648648648648648648648648648648648649,
782 .14855469432313711530824207329715136438610, .8619528619528619528619528619528619528620,
783 .15191604202584196858794030049466527998450, .8590604026845637583892617449664429530201,
784 .15526612891112392955683674244937719777230, .8561872909698996655518394648829431438127,
785 .15860503017663857283636730244325008243330, .8533333333333333333333333333333333333333,
786 .16193282026931324346641360989451641216880, .8504983388704318936877076411960132890365,
787 .16524957289530714521497145597095368430010, .8476821192052980132450331125827814569536,
788 .16855536102980664403538924034364754334090, .8448844884488448844884488448844884488449,
789 .17185025692665920060697715143760433420540, .8421052631578947368421052631578947368421,
790 .17513433212784912385018287750426679849630, .8393442622950819672131147540983606557377,
791 .17840765747281828179637841458315961062910, .8366013071895424836601307189542483660131,
792 .18167030310763465639212199675966985523700, .8338762214983713355048859934853420195440,
793 .18492233849401198964024217730184318497780, .8311688311688311688311688311688311688312,
794 .18816383241818296356839823602058459073300, .8284789644012944983818770226537216828479,
795 .19139485299962943898322009772527962923050, .8258064516129032258064516129032258064516,
796 .19461546769967164038916962454095482826240, .8231511254019292604501607717041800643087,
797 .19782574332991986754137769821682013571260, .8205128205128205128205128205128205128205,
798 .20102574606059073203390141770796617493040, .8178913738019169329073482428115015974441,
799 .20421554142869088876999228432396193966280, .8152866242038216560509554140127388535032,
800 .20739519434607056602715147164417430758480, .8126984126984126984126984126984126984127,
801 .21056476910734961416338251183333341032260, .8101265822784810126582278481012658227848,
802 .21372432939771812687723695489694364368910, .8075709779179810725552050473186119873817,
803 .21687393830061435506806333251006435602900, .8050314465408805031446540880503144654088,
804 .22001365830528207823135744547471404075630, .8025078369905956112852664576802507836991,
805 .22314355131420973710199007200571941211830, .8000000000000000000000000000000000000000,
806 .22626367865045338145790765338460914790630, .7975077881619937694704049844236760124611,
807 .22937410106484582006380890106811420992010, .7950310559006211180124223602484472049689,
808 .23247487874309405442296849741978803649550, .7925696594427244582043343653250773993808,
809 .23556607131276688371634975283086532726890, .7901234567901234567901234567901234567901,
810 .23864773785017498464178231643018079921600, .7876923076923076923076923076923076923077,
811 .24171993688714515924331749374687206000090, .7852760736196319018404907975460122699387,
812 .24478272641769091566565919038112042471760, .7828746177370030581039755351681957186544,
813 .24783616390458124145723672882013488560910, .7804878048780487804878048780487804878049,
814 .25088030628580937353433455427875742316250, .7781155015197568389057750759878419452888,
815 .25391520998096339667426946107298135757450, .7757575757575757575757575757575757575758,
816 .25694093089750041913887912414793390780680, .7734138972809667673716012084592145015106,
817 .25995752443692604627401010475296061486000, .7710843373493975903614457831325301204819,
818 .26296504550088134477547896494797896593800, .7687687687687687687687687687687687687688,
819 .26596354849713793599974565040611196309330, .7664670658682634730538922155688622754491,
820 .26895308734550393836570947314612567424780, .7641791044776119402985074626865671641791,
821 .27193371548364175804834985683555714786050, .7619047619047619047619047619047619047619,
822 .27490548587279922676529508862586226314300, .7596439169139465875370919881305637982196,
823 .27786845100345625159121709657483734190480, .7573964497041420118343195266272189349112,
824 .28082266290088775395616949026589281857030, .7551622418879056047197640117994100294985,
825 .28376817313064456316240580235898960381750, .7529411764705882352941176470588235294118,
826 .28670503280395426282112225635501090437180, .7507331378299120234604105571847507331378,
827 .28963329258304265634293983566749375313530, .7485380116959064327485380116959064327485,
828 .29255300268637740579436012922087684273730, .7463556851311953352769679300291545189504,
829 .29546421289383584252163927885703742504130, .7441860465116279069767441860465116279070,
830 .29836697255179722709783618483925238251680, .7420289855072463768115942028985507246377,
831 .30126133057816173455023545102449133992200, .7398843930635838150289017341040462427746,
832 .30414733546729666446850615102448500692850, .7377521613832853025936599423631123919308,
833 .30702503529491181888388950937951449304830, .7356321839080459770114942528735632183908,
834 .30989447772286465854207904158101882785550, .7335243553008595988538681948424068767908,
835 .31275571000389684739317885942000430077330, .7314285714285714285714285714285714285714,
836 .31560877898630329552176476681779604405180, .7293447293447293447293447293447293447293,
837 .31845373111853458869546784626436419785030, .7272727272727272727272727272727272727273,
838 .32129061245373424782201254856772720813750, .7252124645892351274787535410764872521246,
839 .32411946865421192853773391107097268104550, .7231638418079096045197740112994350282486,
840 .32694034499585328257253991068864706903700, .7211267605633802816901408450704225352113,
841 .32975328637246797969240219572384376078850, .7191011235955056179775280898876404494382,
842 .33255833730007655635318997155991382896900, .7170868347338935574229691876750700280112,
843 .33535554192113781191153520921943709254280, .7150837988826815642458100558659217877095,
844 .33814494400871636381467055798566434532400, .7130919220055710306406685236768802228412,
845 .34092658697059319283795275623560883104800, .7111111111111111111111111111111111111111,
846 .34370051385331840121395430287520866841080, .7091412742382271468144044321329639889197,
847 .34646676734620857063262633346312213689100, .7071823204419889502762430939226519337017,
848 .34922538978528827602332285096053965389730, .7052341597796143250688705234159779614325,
849 .35197642315717814209818925519357435405250, .7032967032967032967032967032967032967033,
850 .35471990910292899856770532096561510115850, .7013698630136986301369863013698630136986,
851 .35745588892180374385176833129662554711100, .6994535519125683060109289617486338797814,
852 .36018440357500774995358483465679455548530, .6975476839237057220708446866485013623978,
853 .36290549368936841911903457003063522279280, .6956521739130434782608695652173913043478,
854 .36561919956096466943762379742111079394830, .6937669376693766937669376693766937669377,
855 .36832556115870762614150635272380895912650, .6918918918918918918918918918918918918919,
856 .37102461812787262962487488948681857436900, .6900269541778975741239892183288409703504,
857 .37371640979358405898480555151763837784530, .6881720430107526881720430107526881720430,
858 .37640097516425302659470730759494472295050, .6863270777479892761394101876675603217158,
859 .37907835293496944251145919224654790014030, .6844919786096256684491978609625668449198,
860 .38174858149084833769393299007788300514230, .6826666666666666666666666666666666666667,
861 .38441169891033200034513583887019194662580, .6808510638297872340425531914893617021277,
862 .38706774296844825844488013899535872042180, .6790450928381962864721485411140583554377,
863 .38971675114002518602873692543653305619950, .6772486772486772486772486772486772486772,
864 .39235876060286384303665840889152605086580, .6754617414248021108179419525065963060686,
865 .39499380824086893770896722344332374632350, .6736842105263157894736842105263157894737,
866 .39762193064713846624158577469643205404280, .6719160104986876640419947506561679790026,
867 .40024316412701266276741307592601515352730, .6701570680628272251308900523560209424084,
868 .40285754470108348090917615991202183067800, .6684073107049608355091383812010443864230,
869 .40546510810816432934799991016916465014230, .6666666666666666666666666666666666666667,
870 .40806588980822172674223224930756259709600, .6649350649350649350649350649350649350649,
871 .41065992498526837639616360320360399782650, .6632124352331606217616580310880829015544,
872 .41324724855021932601317757871584035456180, .6614987080103359173126614987080103359173,
873 .41582789514371093497757669865677598863850, .6597938144329896907216494845360824742268,
874 .41840189913888381489925905043492093682300, .6580976863753213367609254498714652956298,
875 .42096929464412963239894338585145305842150, .6564102564102564102564102564102564102564,
876 .42353011550580327293502591601281892508280, .6547314578005115089514066496163682864450,
877 .42608439531090003260516141381231136620050, .6530612244897959183673469387755102040816,
878 .42863216738969872610098832410585600882780, .6513994910941475826972010178117048346056,
879 .43117346481837132143866142541810404509300, .6497461928934010152284263959390862944162,
880 .43370832042155937902094819946796633303180, .6481012658227848101265822784810126582278,
881 .43623676677491801667585491486534010618930, .6464646464646464646464646464646464646465,
882 .43875883620762790027214350629947148263450, .6448362720403022670025188916876574307305,
883 .44127456080487520440058801796112675219780, .6432160804020100502512562814070351758794,
884 .44378397241030093089975139264424797147500, .6416040100250626566416040100250626566416,
885 .44628710262841947420398014401143882423650, .6400000000000000000000000000000000000000,
886 .44878398282700665555822183705458883196130, .6384039900249376558603491271820448877805,
887 .45127464413945855836729492693848442286250, .6368159203980099502487562189054726368159,
888 .45375911746712049854579618113348260521900, .6352357320099255583126550868486352357320,
889 .45623743348158757315857769754074979573500, .6336633663366336633663366336633663366337,
890 .45870962262697662081833982483658473938700, .6320987654320987654320987654320987654321,
891 .46117571512217014895185229761409573256980, .6305418719211822660098522167487684729064,
892 .46363574096303250549055974261136725544930, .6289926289926289926289926289926289926290,
893 .46608972992459918316399125615134835243230, .6274509803921568627450980392156862745098,
894 .46853771156323925639597405279346276074650, .6259168704156479217603911980440097799511,
895 .47097971521879100631480241645476780831830, .6243902439024390243902439024390243902439,
896 .47341577001667212165614273544633761048330, .6228710462287104622871046228710462287105,
897 .47584590486996386493601107758877333253630, .6213592233009708737864077669902912621359,
898 .47827014848147025860569669930555392056700, .6198547215496368038740920096852300242131,
899 .48068852934575190261057286988943815231330, .6183574879227053140096618357487922705314,
900 .48310107575113581113157579238759353756900, .6168674698795180722891566265060240963855,
901 .48550781578170076890899053978500887751580, .6153846153846153846153846153846153846154,
902 .48790877731923892879351001283794175833480, .6139088729016786570743405275779376498801,
903 .49030398804519381705802061333088204264650, .6124401913875598086124401913875598086124,
904 .49269347544257524607047571407747454941280, .6109785202863961813842482100238663484487,
905 .49507726679785146739476431321236304938800, .6095238095238095238095238095238095238095,
906 .49745538920281889838648226032091770321130, .6080760095011876484560570071258907363420,
907 .49982786955644931126130359189119189977650, .6066350710900473933649289099526066350711,
908 .50219473456671548383667413872899487614650, .6052009456264775413711583924349881796690,
909 .50455601075239520092452494282042607665050, .6037735849056603773584905660377358490566,
910 .50691172444485432801997148999362252652650, .6023529411764705882352941176470588235294,
911 .50926190178980790257412536448100581765150, .6009389671361502347417840375586854460094,
912 .51160656874906207391973111953120678663250, .5995316159250585480093676814988290398126,
913 .51394575110223428282552049495279788970950, .5981308411214953271028037383177570093458,
914 .51627947444845445623684554448118433356300, .5967365967365967365967365967365967365967,
915 .51860776420804555186805373523384332656850, .5953488372093023255813953488372093023256,
916 .52093064562418522900344441950437612831600, .5939675174013921113689095127610208816705,
917 .52324814376454775732838697877014055848100, .5925925925925925925925925925925925925926,
918 .52556028352292727401362526507000438869000, .5912240184757505773672055427251732101617,
919 .52786708962084227803046587723656557500350, .5898617511520737327188940092165898617512,
920 .53016858660912158374145519701414741575700, .5885057471264367816091954022988505747126,
921 .53246479886947173376654518506256863474850, .5871559633027522935779816513761467889908,
922 .53475575061602764748158733709715306758900, .5858123569794050343249427917620137299771,
923 .53704146589688361856929077475797384977350, .5844748858447488584474885844748858447489,
924 .53932196859560876944783558428753167390800, .5831435079726651480637813211845102505695,
925 .54159728243274429804188230264117009937750, .5818181818181818181818181818181818181818,
926 .54386743096728351609669971367111429572100, .5804988662131519274376417233560090702948,
927 .54613243759813556721383065450936555862450, .5791855203619909502262443438914027149321,
928 .54839232556557315767520321969641372561450, .5778781038374717832957110609480812641084,
929 .55064711795266219063194057525834068655950, .5765765765765765765765765765765765765766,
930 .55289683768667763352766542084282264113450, .5752808988764044943820224719101123595506,
931 .55514150754050151093110798683483153581600, .5739910313901345291479820627802690582960,
932 .55738115013400635344709144192165695130850, .5727069351230425055928411633109619686801,
933 .55961578793542265941596269840374588966350, .5714285714285714285714285714285714285714,
934 .56184544326269181269140062795486301183700, .5701559020044543429844097995545657015590,
935 .56407013828480290218436721261241473257550, .5688888888888888888888888888888888888889,
936 .56628989502311577464155334382667206227800, .5676274944567627494456762749445676274945,
937 .56850473535266865532378233183408156037350, .5663716814159292035398230088495575221239,
938 .57071468100347144680739575051120482385150, .5651214128035320088300220750551876379691,
939 .57291975356178548306473885531886480748650, .5638766519823788546255506607929515418502,
940 .57511997447138785144460371157038025558000, .5626373626373626373626373626373626373626,
941 .57731536503482350219940144597785547375700, .5614035087719298245614035087719298245614,
942 .57950594641464214795689713355386629700650, .5601750547045951859956236323851203501094,
943 .58169173963462239562716149521293118596100, .5589519650655021834061135371179039301310,
944 .58387276558098266665552955601015128195300, .5577342047930283224400871459694989106754,
945 .58604904500357812846544902640744112432000, .5565217391304347826086956521739130434783,
946 .58822059851708596855957011939608491957200, .5553145336225596529284164859002169197397,
947 .59038744660217634674381770309992134571100, .5541125541125541125541125541125541125541,
948 .59254960960667157898740242671919986605650, .5529157667386609071274298056155507559395,
949 .59470710774669277576265358220553025603300, .5517241379310344827586206896551724137931,
950 .59685996110779382384237123915227130055450, .5505376344086021505376344086021505376344,
951 .59900818964608337768851242799428291618800, .5493562231759656652360515021459227467811,
952 .60115181318933474940990890900138765573500, .5481798715203426124197002141327623126338,
953 .60329085143808425240052883964381180703650, .5470085470085470085470085470085470085470,
954 .60542532396671688843525771517306566238400, .5458422174840085287846481876332622601279,
955 .60755525022454170969155029524699784815300, .5446808510638297872340425531914893617021,
956 .60968064953685519036241657886421307921400, .5435244161358811040339702760084925690021,
957 .61180154110599282990534675263916142284850, .5423728813559322033898305084745762711864,
958 .61391794401237043121710712512140162289150, .5412262156448202959830866807610993657505,
959 .61602987721551394351138242200249806046500, .5400843881856540084388185654008438818565,
960 .61813735955507864705538167982012964785100, .5389473684210526315789473684210526315789,
961 .62024040975185745772080281312810257077200, .5378151260504201680672268907563025210084,
962 .62233904640877868441606324267922900617100, .5366876310272536687631027253668763102725,
963 .62443328801189346144440150965237990021700, .5355648535564853556485355648535564853556,
964 .62652315293135274476554741340805776417250, .5344467640918580375782881002087682672234,
965 .62860865942237409420556559780379757285100, .5333333333333333333333333333333333333333,
966 .63068982562619868570408243613201193511500, .5322245322245322245322245322245322245322,
967 .63276666957103777644277897707070223987100, .5311203319502074688796680497925311203320,
968 .63483920917301017716738442686619237065300, .5300207039337474120082815734989648033126,
969 .63690746223706917739093569252872839570050, .5289256198347107438016528925619834710744,
970 .63897144645792069983514238629140891134750, .5278350515463917525773195876288659793814,
971 .64103117942093124081992527862894348800200, .5267489711934156378600823045267489711934,
972 .64308667860302726193566513757104985415950, .5256673511293634496919917864476386036961,
973 .64513796137358470073053240412264131009600, .5245901639344262295081967213114754098361,
974 .64718504499530948859131740391603671014300, .5235173824130879345603271983640081799591,
975 .64922794662510974195157587018911726772800, .5224489795918367346938775510204081632653,
976 .65126668331495807251485530287027359008800, .5213849287169042769857433808553971486762,
977 .65330127201274557080523663898929953575150, .5203252032520325203252032520325203252033,
978 .65533172956312757406749369692988693714150, .5192697768762677484787018255578093306288,
979 .65735807270835999727154330685152672231200, .5182186234817813765182186234817813765182,
980 .65938031808912778153342060249997302889800, .5171717171717171717171717171717171717172,
981 .66139848224536490484126716182800009846700, .5161290322580645161290322580645161290323,
982 .66341258161706617713093692145776003599150, .5150905432595573440643863179074446680080,
983 .66542263254509037562201001492212526500250, .5140562248995983935742971887550200803213,
984 .66742865127195616370414654738851822912700, .5130260521042084168336673346693386773547,
985 .66943065394262923906154583164607174694550, .5120000000000000000000000000000000000000,
986 .67142865660530226534774556057527661323550, .5109780439121756487025948103792415169661,
987 .67342267521216669923234121597488410770900, .5099601593625498007968127490039840637450,
988 .67541272562017662384192817626171745359900, .5089463220675944333996023856858846918489,
989 .67739882359180603188519853574689477682100, .5079365079365079365079365079365079365079,
990 .67938098479579733801614338517538271844400, .5069306930693069306930693069306930693069,
991 .68135922480790300781450241629499942064300, .5059288537549407114624505928853754940711,
992 .68333355911162063645036823800182901322850, .5049309664694280078895463510848126232742,
993 .68530400309891936760919861626462079584600, .5039370078740157480314960629921259842520,
994 .68727057207096020619019327568821609020250, .5029469548133595284872298624754420432220,
995 .68923328123880889251040571252815425395950, .5019607843137254901960784313725490196078,
996 .69314718055994530941723212145818, 5.0e-01,
999 static float logTab_f[(LOGTAB_MASK+1)*2];
1000 static volatile bool logTab_f_initialized = false;
1002 #define LOGTAB_TRANSLATE(tab, x, h) (((x) - 1.f)*tab[(h)+1])
1003 static const double ln_2 = 0.69314718055994530941723212145818;
1005 void log32f( const float *_x, float *y, int n )
1007 CV_INSTRUMENT_REGION()
1009 if( !logTab_f_initialized )
1011 for( int j = 0; j < (LOGTAB_MASK+1)*2; j++ )
1012 logTab_f[j] = (float)logTab[j];
1013 logTab_f_initialized = true;
1016 static const int LOGTAB_MASK2_32F = (1 << (23 - LOGTAB_SCALE)) - 1;
1018 A0 = 0.3333333333333333333333333f,
1023 const int* x = (const int*)_x;
1026 const int VECSZ = v_float32::nlanes;
1027 const v_float32 vln2 = vx_setall_f32((float)ln_2);
1028 const v_float32 v1 = vx_setall_f32(1.f);
1029 const v_float32 vshift = vx_setall_f32(-1.f/512);
1031 const v_float32 vA0 = vx_setall_f32(A0);
1032 const v_float32 vA1 = vx_setall_f32(A1);
1033 const v_float32 vA2 = vx_setall_f32(A2);
1035 for( ; i < n; i += VECSZ )
1039 if( i == 0 || _x == y )
1044 v_int32 h0 = vx_load(x + i);
1045 v_int32 yi0 = (v_shr<23>(h0) & vx_setall_s32(255)) - vx_setall_s32(127);
1046 v_int32 xi0 = (h0 & vx_setall_s32(LOGTAB_MASK2_32F)) | vx_setall_s32(127 << 23);
1048 h0 = v_shr<23 - LOGTAB_SCALE - 1>(h0) & vx_setall_s32(LOGTAB_MASK*2);
1051 v_lut_deinterleave(logTab_f, h0, yf0, xf0);
1053 yf0 = v_fma(v_cvt_f32(yi0), vln2, yf0);
1055 v_float32 delta = v_reinterpret_as_f32(h0 == vx_setall_s32(510)) & vshift;
1056 xf0 = v_fma((v_reinterpret_as_f32(xi0) - v1), xf0, delta);
1058 v_float32 zf0 = v_fma(xf0, vA0, vA1);
1059 zf0 = v_fma(zf0, xf0, vA2);
1060 zf0 = v_fma(zf0, xf0, yf0);
1062 v_store(y + i, zf0);
1072 buf.i = (i0 & LOGTAB_MASK2_32F) | (127 << 23);
1073 int idx = (i0 >> (23 - LOGTAB_SCALE - 1)) & (LOGTAB_MASK*2);
1075 float y0 = (((i0 >> 23) & 0xff) - 127) * (float)ln_2 + logTab_f[idx];
1076 float x0 = (buf.f - 1.f)*logTab_f[idx + 1] + (idx == 510 ? -1.f/512 : 0.f);
1077 y[i] = ((A0*x0 + A1)*x0 + A2)*x0 + y0;
1081 void log64f( const double *x, double *y, int n )
1083 CV_INSTRUMENT_REGION()
1085 static const int64 LOGTAB_MASK2_64F = ((int64)1 << (52 - LOGTAB_SCALE)) - 1;
1089 A5 = 0.333333333333333314829616256247390992939472198486328125,
1092 A2 = -0.1666666666666666574148081281236954964697360992431640625,
1093 A1 = 0.1428571428571428769682682968777953647077083587646484375,
1099 const int VECSZ = v_float64::nlanes;
1100 const v_float64 vln2 = vx_setall_f64(ln_2);
1103 vA0 = vx_setall_f64(A0), vA1 = vx_setall_f64(A1),
1104 vA2 = vx_setall_f64(A2), vA3 = vx_setall_f64(A3),
1105 vA4 = vx_setall_f64(A4), vA5 = vx_setall_f64(A5),
1106 vA6 = vx_setall_f64(A6), vA7 = vx_setall_f64(A7);
1108 for( ; i < n; i += VECSZ )
1112 if( i == 0 || x == y )
1117 v_int64 h0 = vx_load((const int64*)x + i);
1118 v_int32 yi0 = v_pack(v_shr<52>(h0), vx_setzero_s64());
1119 yi0 = (yi0 & vx_setall_s32(0x7ff)) - vx_setall_s32(1023);
1121 v_int64 xi0 = (h0 & vx_setall_s64(LOGTAB_MASK2_64F)) | vx_setall_s64((int64)1023 << 52);
1122 h0 = v_shr<52 - LOGTAB_SCALE - 1>(h0);
1123 v_int32 idx = v_pack(h0, h0) & vx_setall_s32(LOGTAB_MASK*2);
1126 v_lut_deinterleave(logTab, idx, yf0, xf0);
1128 yf0 = v_fma(v_cvt_f64(yi0), vln2, yf0);
1129 v_float64 delta = v_cvt_f64(idx == vx_setall_s32(510))*vx_setall_f64(1./512);
1130 xf0 = v_fma(v_reinterpret_as_f64(xi0) - vx_setall_f64(1.), xf0, delta);
1132 v_float64 xq = xf0*xf0;
1133 v_float64 zf0 = v_fma(xq, vA0, vA2);
1134 v_float64 zf1 = v_fma(xq, vA1, vA3);
1135 zf0 = v_fma(zf0, xq, vA4);
1136 zf1 = v_fma(zf1, xq, vA5);
1137 zf0 = v_fma(zf0, xq, vA6);
1138 zf1 = v_fma(zf1, xq, vA7);
1139 zf1 = v_fma(zf1, xf0, yf0);
1140 zf0 = v_fma(zf0, xq, zf1);
1142 v_store(y + i, zf0);
1149 int64 i0 = ((const int64*)x)[i];
1151 buf.i = (i0 & LOGTAB_MASK2_64F) | ((int64)1023 << 52);
1152 int idx = (int)(i0 >> (52 - LOGTAB_SCALE - 1)) & (LOGTAB_MASK*2);
1154 double y0 = (((int)(i0 >> 52) & 0x7ff) - 1023) * ln_2 + logTab[idx];
1155 double x0 = (buf.f - 1.)*logTab[idx + 1] + (idx == 510 ? -1./512 : 0.);
1158 y[i] = (((A0*xq + A2)*xq + A4)*xq + A6)*xq + (((A1*xq + A3)*xq + A5)*xq + A7)*x0 + y0;
1162 #endif // issue 7795
1164 float fastAtan2( float y, float x )
1166 return atan_f32(y, x);
1169 #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY
1171 CV_CPU_OPTIMIZATION_NAMESPACE_END
1173 }} // namespace cv::hal