X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=boost%2Fmath%2Fspecial_functions%2Fnext.hpp;h=9602bc7697f109cd393f0727ee059725540f0f6e;hb=08c1e93fa36a49f49325a07fe91ff92c964c2b6c;hp=6c91cd1e383ca1e75217439d6372c71c0b958277;hpb=bb4dd8289b351fae6b55e303f189127a394a1edd;p=platform%2Fupstream%2Fboost.git diff --git a/boost/math/special_functions/next.hpp b/boost/math/special_functions/next.hpp index 6c91cd1..9602bc7 100644 --- a/boost/math/special_functions/next.hpp +++ b/boost/math/special_functions/next.hpp @@ -10,13 +10,19 @@ #pragma once #endif +#include #include #include #include #include -#ifdef BOOST_MSVC #include + +#if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3))) +#if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__) +#include "xmmintrin.h" +#define BOOST_MATH_CHECK_SSE2 +#endif #endif namespace boost{ namespace math{ @@ -26,7 +32,17 @@ namespace detail{ template inline T get_smallest_value(mpl::true_ const&) { - return std::numeric_limits::denorm_min(); + // + // numeric_limits lies about denorms being present - particularly + // when this can be turned on or off at runtime, as is the case + // when using the SSE2 registers in DAZ or FTZ mode. + // + static const T m = std::numeric_limits::denorm_min(); +#ifdef BOOST_MATH_CHECK_SSE2 + return (_mm_getcsr() & (_MM_FLUSH_ZERO_ON | 0x40)) ? tools::min_value() : m;; +#else + return ((tools::min_value() / 2) == 0) ? tools::min_value() : m; +#endif } template @@ -45,16 +61,59 @@ inline T get_smallest_value() #endif } +// +// Returns the smallest value that won't generate denorms when +// we calculate the value of the least-significant-bit: +// +template +T get_min_shift_value(); + +template +struct min_shift_initializer +{ + struct init + { + init() + { + do_init(); + } + static void do_init() + { + get_min_shift_value(); + } + void force_instantiate()const{} + }; + static const init initializer; + static void force_instantiate() + { + initializer.force_instantiate(); + } +}; + +template +const typename min_shift_initializer::init min_shift_initializer::initializer; + + +template +inline T get_min_shift_value() +{ + BOOST_MATH_STD_USING + static const T val = ldexp(tools::min_value(), tools::digits() + 1); + min_shift_initializer::force_instantiate(); + + return val; } template -T float_next(const T& val, const Policy& pol) +T float_next_imp(const T& val, const Policy& pol) { BOOST_MATH_STD_USING int expon; static const char* function = "float_next<%1%>(%1%)"; - if(!(boost::math::isfinite)(val)) + int fpclass = (boost::math::fpclassify)(val); + + if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE)) { if(val < 0) return -tools::max_value(); @@ -69,6 +128,16 @@ T float_next(const T& val, const Policy& pol) if(val == 0) return detail::get_smallest_value(); + if((fpclass != (int)FP_SUBNORMAL) && (fpclass != (int)FP_ZERO) && (fabs(val) < detail::get_min_shift_value()) && (val != -tools::min_value())) + { + // + // Special case: if the value of the least significant bit is a denorm, and the result + // would not be a denorm, then shift the input, increment, and shift back. + // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set. + // + return ldexp(float_next(T(ldexp(val, 2 * tools::digits())), pol), -2 * tools::digits()); + } + if(-0.5f == frexp(val, &expon)) --expon; // reduce exponent when val is a power of two, and negative. T diff = ldexp(T(1), expon - tools::digits()); @@ -77,7 +146,21 @@ T float_next(const T& val, const Policy& pol) return val + diff; } -#ifdef BOOST_MSVC +} + +template +inline typename tools::promote_args::type float_next(const T& val, const Policy& pol) +{ + typedef typename tools::promote_args::type result_type; + return detail::float_next_imp(static_cast(val), pol); +} + +#if 0 //def BOOST_MSVC +// +// We used to use ::_nextafter here, but doing so fails when using +// the SSE2 registers if the FTZ or DAZ flags are set, so use our own +// - albeit slower - code instead as at least that gives the correct answer. +// template inline double float_next(const double& val, const Policy& pol) { @@ -96,19 +179,23 @@ inline double float_next(const double& val, const Policy& pol) #endif template -inline T float_next(const T& val) +inline typename tools::promote_args::type float_next(const T& val) { return float_next(val, policies::policy<>()); } +namespace detail{ + template -T float_prior(const T& val, const Policy& pol) +T float_prior_imp(const T& val, const Policy& pol) { BOOST_MATH_STD_USING int expon; static const char* function = "float_prior<%1%>(%1%)"; - if(!(boost::math::isfinite)(val)) + int fpclass = (boost::math::fpclassify)(val); + + if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE)) { if(val > 0) return tools::max_value(); @@ -123,6 +210,16 @@ T float_prior(const T& val, const Policy& pol) if(val == 0) return -detail::get_smallest_value(); + if((fpclass != (int)FP_SUBNORMAL) && (fpclass != (int)FP_ZERO) && (fabs(val) < detail::get_min_shift_value()) && (val != tools::min_value())) + { + // + // Special case: if the value of the least significant bit is a denorm, and the result + // would not be a denorm, then shift the input, increment, and shift back. + // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set. + // + return ldexp(float_prior(T(ldexp(val, 2 * tools::digits())), pol), -2 * tools::digits()); + } + T remain = frexp(val, &expon); if(remain == 0.5) --expon; // when val is a power of two we must reduce the exponent @@ -132,7 +229,21 @@ T float_prior(const T& val, const Policy& pol) return val - diff; } -#ifdef BOOST_MSVC +} + +template +inline typename tools::promote_args::type float_prior(const T& val, const Policy& pol) +{ + typedef typename tools::promote_args::type result_type; + return detail::float_prior_imp(static_cast(val), pol); +} + +#if 0 //def BOOST_MSVC +// +// We used to use ::_nextafter here, but doing so fails when using +// the SSE2 registers if the FTZ or DAZ flags are set, so use our own +// - albeit slower - code instead as at least that gives the correct answer. +// template inline double float_prior(const double& val, const Policy& pol) { @@ -151,25 +262,28 @@ inline double float_prior(const double& val, const Policy& pol) #endif template -inline T float_prior(const T& val) +inline typename tools::promote_args::type float_prior(const T& val) { return float_prior(val, policies::policy<>()); } -template -inline T nextafter(const T& val, const T& direction, const Policy& pol) +template +inline typename tools::promote_args::type nextafter(const T& val, const U& direction, const Policy& pol) { - return val < direction ? boost::math::float_next(val, pol) : val == direction ? val : boost::math::float_prior(val, pol); + typedef typename tools::promote_args::type result_type; + return val < direction ? boost::math::float_next(val, pol) : val == direction ? val : boost::math::float_prior(val, pol); } -template -inline T nextafter(const T& val, const T& direction) +template +inline typename tools::promote_args::type nextafter(const T& val, const U& direction) { return nextafter(val, direction, policies::policy<>()); } +namespace detail{ + template -T float_distance(const T& a, const T& b, const Policy& pol) +T float_distance_imp(const T& a, const T& b, const Policy& pol) { BOOST_MATH_STD_USING // @@ -188,22 +302,22 @@ T float_distance(const T& a, const T& b, const Policy& pol) // Special cases: // if(a > b) - return -float_distance(b, a); + return -float_distance(b, a, pol); if(a == b) return 0; if(a == 0) - return 1 + fabs(float_distance(static_cast(boost::math::sign(b) * detail::get_smallest_value()), b, pol)); + return 1 + fabs(float_distance(static_cast((b < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), b, pol)); if(b == 0) - return 1 + fabs(float_distance(static_cast(boost::math::sign(a) * detail::get_smallest_value()), a, pol)); + return 1 + fabs(float_distance(static_cast((a < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), a, pol)); if(boost::math::sign(a) != boost::math::sign(b)) - return 2 + fabs(float_distance(static_cast(boost::math::sign(b) * detail::get_smallest_value()), b, pol)) - + fabs(float_distance(static_cast(boost::math::sign(a) * detail::get_smallest_value()), a, pol)); + return 2 + fabs(float_distance(static_cast((b < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), b, pol)) + + fabs(float_distance(static_cast((a < 0) ? T(-detail::get_smallest_value()) : detail::get_smallest_value()), a, pol)); // // By the time we get here, both a and b must have the same sign, we want // b > a and both postive for the following logic: // if(a < 0) - return float_distance(static_cast(-b), static_cast(-a)); + return float_distance(static_cast(-b), static_cast(-a), pol); BOOST_ASSERT(a >= 0); BOOST_ASSERT(b >= a); @@ -214,7 +328,7 @@ T float_distance(const T& a, const T& b, const Policy& pol) // because we actually have fewer than tools::digits() // significant bits in the representation: // - frexp(((boost::math::fpclassify)(a) == FP_SUBNORMAL) ? tools::min_value() : a, &expon); + frexp(((boost::math::fpclassify)(a) == (int)FP_SUBNORMAL) ? tools::min_value() : a, &expon); T upper = ldexp(T(1), expon); T result = 0; expon = tools::digits() - expon; @@ -227,13 +341,33 @@ T float_distance(const T& a, const T& b, const Policy& pol) result = float_distance(upper, b); } // - // Use compensated double-double addition to avoid rounding + // Use compensated double-double addition to avoid rounding // errors in the subtraction: // - T mb = -(std::min)(upper, b); - T x = a + mb; - T z = x - a; - T y = (a - (x - z)) + (mb - z); + T mb, x, y, z; + if(((boost::math::fpclassify)(a) == (int)FP_SUBNORMAL) || (b - a < tools::min_value())) + { + // + // Special case - either one end of the range is a denormal, or else the difference is. + // The regular code will fail if we're using the SSE2 registers on Intel and either + // the FTZ or DAZ flags are set. + // + T a2 = ldexp(a, tools::digits()); + T b2 = ldexp(b, tools::digits()); + mb = -(std::min)(T(ldexp(upper, tools::digits())), b2); + x = a2 + mb; + z = x - a2; + y = (a2 - (x - z)) + (mb - z); + + expon -= tools::digits(); + } + else + { + mb = -(std::min)(upper, b); + x = a + mb; + z = x - a; + y = (a - (x - z)) + (mb - z); + } if(x < 0) { x = -x; @@ -247,20 +381,35 @@ T float_distance(const T& a, const T& b, const Policy& pol) return result; } -template -T float_distance(const T& a, const T& b) +} + +template +inline typename tools::promote_args::type float_distance(const T& a, const U& b, const Policy& pol) +{ + typedef typename tools::promote_args::type result_type; + return detail::float_distance_imp(static_cast(a), static_cast(b), pol); +} + +template +typename tools::promote_args::type float_distance(const T& a, const U& b) { return boost::math::float_distance(a, b, policies::policy<>()); } +namespace detail{ + template -T float_advance(T val, int distance, const Policy& pol) +T float_advance_imp(T val, int distance, const Policy& pol) { + BOOST_MATH_STD_USING // // Error handling: // static const char* function = "float_advance<%1%>(%1%, int)"; - if(!(boost::math::isfinite)(val)) + + int fpclass = (boost::math::fpclassify)(val); + + if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE)) return policies::raise_domain_error( function, "Argument val must be finite, but got %1%", val, pol); @@ -273,7 +422,25 @@ T float_advance(T val, int distance, const Policy& pol) return float_next(val, pol); if(distance == -1) return float_prior(val, pol); - BOOST_MATH_STD_USING + + if(fabs(val) < detail::get_min_shift_value()) + { + // + // Special case: if the value of the least significant bit is a denorm, + // implement in terms of float_next/float_prior. + // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set. + // + if(distance > 0) + { + do{ val = float_next(val, pol); } while(--distance); + } + else + { + do{ val = float_prior(val, pol); } while(++distance); + } + return val; + } + int expon; frexp(val, &expon); T limit = ldexp((distance < 0 ? T(0.5f) : T(1)), expon); @@ -286,7 +453,7 @@ T float_advance(T val, int distance, const Policy& pol) { distance -= itrunc(limit_distance); val = limit; - if(distance < 0) + if(distance < 0) { limit /= 2; expon--; @@ -297,6 +464,10 @@ T float_advance(T val, int distance, const Policy& pol) expon++; } limit_distance = float_distance(val, limit); + if(distance && (limit_distance == 0)) + { + return policies::raise_evaluation_error(function, "Internal logic failed while trying to increment floating point value %1%: most likely your FPU is in non-IEEE conforming mode.", val, pol); + } } if((0.5f == frexp(val, &expon)) && (distance < 0)) --expon; @@ -308,8 +479,17 @@ T float_advance(T val, int distance, const Policy& pol) return val += diff; } +} + +template +inline typename tools::promote_args::type float_advance(T val, int distance, const Policy& pol) +{ + typedef typename tools::promote_args::type result_type; + return detail::float_advance_imp(static_cast(val), distance, pol); +} + template -inline T float_advance(const T& val, int distance) +inline typename tools::promote_args::type float_advance(const T& val, int distance) { return boost::math::float_advance(val, distance, policies::policy<>()); }