Imported Upstream version 1.57.0

[platform/upstream/boost.git] / boost / math / special_functions / next.hpp
diff --git a/boost/math/special_functions/next.hpp b/boost/math/special_functions/next.hpp

index 6c91cd1..9602bc7 100644 (file)
--- a/boost/math/special_functions/next.hpp
+++ b/boost/math/special_functions/next.hpp
@@ -10,13 +10,19 @@
  #pragma once
  #endif
  
+#include <boost/math/special_functions/math_fwd.hpp>
  #include <boost/math/policies/error_handling.hpp>
  #include <boost/math/special_functions/fpclassify.hpp>
  #include <boost/math/special_functions/sign.hpp>
  #include <boost/math/special_functions/trunc.hpp>
  
-#ifdef BOOST_MSVC
  #include <float.h>
+
+#if !defined(_CRAYC) && !defined(__CUDACC__) && (!defined(__GNUC__) || (__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ > 3)))
+#if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__SSE2__)
+#include "xmmintrin.h"
+#define BOOST_MATH_CHECK_SSE2
+#endif
  #endif
  
  namespace boost{ namespace math{
@@ -26,7 +32,17 @@ namespace detail{
  template <class T>
  inline T get_smallest_value(mpl::true_ const&)
  {
-   return std::numeric_limits<T>::denorm_min();
+   //
+   // numeric_limits lies about denorms being present - particularly
+   // when this can be turned on or off at runtime, as is the case
+   // when using the SSE2 registers in DAZ or FTZ mode.
+   //
+   static const T m = std::numeric_limits<T>::denorm_min();
+#ifdef BOOST_MATH_CHECK_SSE2
+   return (_mm_getcsr() & (_MM_FLUSH_ZERO_ON | 0x40)) ? tools::min_value<T>() : m;;
+#else
+   return ((tools::min_value<T>() / 2) == 0) ? tools::min_value<T>() : m;
+#endif
  }
  
  template <class T>
@@ -45,16 +61,59 @@ inline T get_smallest_value()
  #endif
  }
  
+//
+// Returns the smallest value that won't generate denorms when
+// we calculate the value of the least-significant-bit:
+//
+template <class T>
+T get_min_shift_value();
+
+template <class T>
+struct min_shift_initializer
+{
+   struct init
+   {
+      init()
+      {
+         do_init();
+      }
+      static void do_init()
+      {
+         get_min_shift_value<T>();
+      }
+      void force_instantiate()const{}
+   };
+   static const init initializer;
+   static void force_instantiate()
+   {
+      initializer.force_instantiate();
+   }
+};
+
+template <class T>
+const typename min_shift_initializer<T>::init min_shift_initializer<T>::initializer;
+
+
+template <class T>
+inline T get_min_shift_value()
+{
+   BOOST_MATH_STD_USING
+   static const T val = ldexp(tools::min_value<T>(), tools::digits<T>() + 1);
+   min_shift_initializer<T>::force_instantiate();
+
+   return val;
  }
  
  template <class T, class Policy>
-T float_next(const T& val, const Policy& pol)
+T float_next_imp(const T& val, const Policy& pol)
  {
     BOOST_MATH_STD_USING
     int expon;
     static const char* function = "float_next<%1%>(%1%)";
  
-   if(!(boost::math::isfinite)(val))
+   int fpclass = (boost::math::fpclassify)(val);
+
+   if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE))
     {
        if(val < 0)
           return -tools::max_value<T>();
@@ -69,6 +128,16 @@ T float_next(const T& val, const Policy& pol)
     if(val == 0)
        return detail::get_smallest_value<T>();
  
+   if((fpclass != (int)FP_SUBNORMAL) && (fpclass != (int)FP_ZERO) && (fabs(val) < detail::get_min_shift_value<T>()) && (val != -tools::min_value<T>()))
+   {
+      //
+      // Special case: if the value of the least significant bit is a denorm, and the result
+      // would not be a denorm, then shift the input, increment, and shift back.
+      // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set.
+      //
+      return ldexp(float_next(T(ldexp(val, 2 * tools::digits<T>())), pol), -2 * tools::digits<T>());
+   }
+
     if(-0.5f == frexp(val, &expon))
        --expon; // reduce exponent when val is a power of two, and negative.
     T diff = ldexp(T(1), expon - tools::digits<T>());
@@ -77,7 +146,21 @@ T float_next(const T& val, const Policy& pol)
     return val + diff;
  }
  
-#ifdef BOOST_MSVC
+}
+
+template <class T, class Policy>
+inline typename tools::promote_args<T>::type float_next(const T& val, const Policy& pol)
+{
+   typedef typename tools::promote_args<T>::type result_type;
+   return detail::float_next_imp(static_cast<result_type>(val), pol);
+}
+
+#if 0 //def BOOST_MSVC
+//
+// We used to use ::_nextafter here, but doing so fails when using
+// the SSE2 registers if the FTZ or DAZ flags are set, so use our own
+// - albeit slower - code instead as at least that gives the correct answer.
+//
  template <class Policy>
  inline double float_next(const double& val, const Policy& pol)
  {
@@ -96,19 +179,23 @@ inline double float_next(const double& val, const Policy& pol)
  #endif
  
  template <class T>
-inline T float_next(const T& val)
+inline typename tools::promote_args<T>::type float_next(const T& val)
  {
     return float_next(val, policies::policy<>());
  }
  
+namespace detail{
+
  template <class T, class Policy>
-T float_prior(const T& val, const Policy& pol)
+T float_prior_imp(const T& val, const Policy& pol)
  {
     BOOST_MATH_STD_USING
     int expon;
     static const char* function = "float_prior<%1%>(%1%)";
  
-   if(!(boost::math::isfinite)(val))
+   int fpclass = (boost::math::fpclassify)(val);
+
+   if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE))
     {
        if(val > 0)
           return tools::max_value<T>();
@@ -123,6 +210,16 @@ T float_prior(const T& val, const Policy& pol)
     if(val == 0)
        return -detail::get_smallest_value<T>();
  
+   if((fpclass != (int)FP_SUBNORMAL) && (fpclass != (int)FP_ZERO) && (fabs(val) < detail::get_min_shift_value<T>()) && (val != tools::min_value<T>()))
+   {
+      //
+      // Special case: if the value of the least significant bit is a denorm, and the result
+      // would not be a denorm, then shift the input, increment, and shift back.
+      // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set.
+      //
+      return ldexp(float_prior(T(ldexp(val, 2 * tools::digits<T>())), pol), -2 * tools::digits<T>());
+   }
+
     T remain = frexp(val, &expon);
     if(remain == 0.5)
        --expon; // when val is a power of two we must reduce the exponent
@@ -132,7 +229,21 @@ T float_prior(const T& val, const Policy& pol)
     return val - diff;
  }
  
-#ifdef BOOST_MSVC
+}
+
+template <class T, class Policy>
+inline typename tools::promote_args<T>::type float_prior(const T& val, const Policy& pol)
+{
+   typedef typename tools::promote_args<T>::type result_type;
+   return detail::float_prior_imp(static_cast<result_type>(val), pol);
+}
+
+#if 0 //def BOOST_MSVC
+//
+// We used to use ::_nextafter here, but doing so fails when using
+// the SSE2 registers if the FTZ or DAZ flags are set, so use our own
+// - albeit slower - code instead as at least that gives the correct answer.
+//
  template <class Policy>
  inline double float_prior(const double& val, const Policy& pol)
  {
@@ -151,25 +262,28 @@ inline double float_prior(const double& val, const Policy& pol)
  #endif
  
  template <class T>
-inline T float_prior(const T& val)
+inline typename tools::promote_args<T>::type float_prior(const T& val)
  {
     return float_prior(val, policies::policy<>());
  }
  
-template <class T, class Policy>
-inline T nextafter(const T& val, const T& direction, const Policy& pol)
+template <class T, class U, class Policy>
+inline typename tools::promote_args<T, U>::type nextafter(const T& val, const U& direction, const Policy& pol)
  {
-   return val < direction ? boost::math::float_next(val, pol) : val == direction ? val : boost::math::float_prior(val, pol);
+   typedef typename tools::promote_args<T, U>::type result_type;
+   return val < direction ? boost::math::float_next<result_type>(val, pol) : val == direction ? val : boost::math::float_prior<result_type>(val, pol);
  }
  
-template <class T>
-inline T nextafter(const T& val, const T& direction)
+template <class T, class U>
+inline typename tools::promote_args<T, U>::type nextafter(const T& val, const U& direction)
  {
     return nextafter(val, direction, policies::policy<>());
  }
  
+namespace detail{
+
  template <class T, class Policy>
-T float_distance(const T& a, const T& b, const Policy& pol)
+T float_distance_imp(const T& a, const T& b, const Policy& pol)
  {
     BOOST_MATH_STD_USING
     //
@@ -188,22 +302,22 @@ T float_distance(const T& a, const T& b, const Policy& pol)
     // Special cases:
     //
     if(a > b)
-      return -float_distance(b, a);
+      return -float_distance(b, a, pol);
     if(a == b)
        return 0;
     if(a == 0)
-      return 1 + fabs(float_distance(static_cast<T>(boost::math::sign(b) * detail::get_smallest_value<T>()), b, pol));
+      return 1 + fabs(float_distance(static_cast<T>((b < 0) ? T(-detail::get_smallest_value<T>()) : detail::get_smallest_value<T>()), b, pol));
     if(b == 0)
-      return 1 + fabs(float_distance(static_cast<T>(boost::math::sign(a) * detail::get_smallest_value<T>()), a, pol));
+      return 1 + fabs(float_distance(static_cast<T>((a < 0) ? T(-detail::get_smallest_value<T>()) : detail::get_smallest_value<T>()), a, pol));
     if(boost::math::sign(a) != boost::math::sign(b))
-      return 2 + fabs(float_distance(static_cast<T>(boost::math::sign(b) * detail::get_smallest_value<T>()), b, pol))
-         + fabs(float_distance(static_cast<T>(boost::math::sign(a) * detail::get_smallest_value<T>()), a, pol));
+      return 2 + fabs(float_distance(static_cast<T>((b < 0) ? T(-detail::get_smallest_value<T>()) : detail::get_smallest_value<T>()), b, pol))
+         + fabs(float_distance(static_cast<T>((a < 0) ? T(-detail::get_smallest_value<T>()) : detail::get_smallest_value<T>()), a, pol));
     //
     // By the time we get here, both a and b must have the same sign, we want
     // b > a and both postive for the following logic:
     //
     if(a < 0)
-      return float_distance(static_cast<T>(-b), static_cast<T>(-a));
+      return float_distance(static_cast<T>(-b), static_cast<T>(-a), pol);
  
     BOOST_ASSERT(a >= 0);
     BOOST_ASSERT(b >= a);
@@ -214,7 +328,7 @@ T float_distance(const T& a, const T& b, const Policy& pol)
     // because we actually have fewer than tools::digits<T>()
     // significant bits in the representation:
     //
-   frexp(((boost::math::fpclassify)(a) == FP_SUBNORMAL) ? tools::min_value<T>() : a, &expon);
+   frexp(((boost::math::fpclassify)(a) == (int)FP_SUBNORMAL) ? tools::min_value<T>() : a, &expon);
     T upper = ldexp(T(1), expon);
     T result = 0;
     expon = tools::digits<T>() - expon;
@@ -227,13 +341,33 @@ T float_distance(const T& a, const T& b, const Policy& pol)
        result = float_distance(upper, b);
     }
     //
-   // Use compensated double-double addition to avoid rounding 
+   // Use compensated double-double addition to avoid rounding
     // errors in the subtraction:
     //
-   T mb = -(std::min)(upper, b);
-   T x = a + mb;
-   T z = x - a;
-   T y = (a - (x - z)) + (mb - z);
+   T mb, x, y, z;
+   if(((boost::math::fpclassify)(a) == (int)FP_SUBNORMAL) || (b - a < tools::min_value<T>()))
+   {
+      //
+      // Special case - either one end of the range is a denormal, or else the difference is.
+      // The regular code will fail if we're using the SSE2 registers on Intel and either
+      // the FTZ or DAZ flags are set.
+      //
+      T a2 = ldexp(a, tools::digits<T>());
+      T b2 = ldexp(b, tools::digits<T>());
+      mb = -(std::min)(T(ldexp(upper, tools::digits<T>())), b2);
+      x = a2 + mb;
+      z = x - a2;
+      y = (a2 - (x - z)) + (mb - z);
+
+      expon -= tools::digits<T>();
+   }
+   else
+   {
+      mb = -(std::min)(upper, b);
+      x = a + mb;
+      z = x - a;
+      y = (a - (x - z)) + (mb - z);
+   }
     if(x < 0)
     {
        x = -x;
@@ -247,20 +381,35 @@ T float_distance(const T& a, const T& b, const Policy& pol)
     return result;
  }
  
-template <class T>
-T float_distance(const T& a, const T& b)
+}
+
+template <class T, class U, class Policy>
+inline typename tools::promote_args<T, U>::type float_distance(const T& a, const U& b, const Policy& pol)
+{
+   typedef typename tools::promote_args<T, U>::type result_type;
+   return detail::float_distance_imp(static_cast<result_type>(a), static_cast<result_type>(b), pol);
+}
+
+template <class T, class U>
+typename tools::promote_args<T, U>::type float_distance(const T& a, const U& b)
  {
     return boost::math::float_distance(a, b, policies::policy<>());
  }
  
+namespace detail{
+
  template <class T, class Policy>
-T float_advance(T val, int distance, const Policy& pol)
+T float_advance_imp(T val, int distance, const Policy& pol)
  {
+   BOOST_MATH_STD_USING
     //
     // Error handling:
     //
     static const char* function = "float_advance<%1%>(%1%, int)";
-   if(!(boost::math::isfinite)(val))
+
+   int fpclass = (boost::math::fpclassify)(val);
+
+   if((fpclass == (int)FP_NAN) || (fpclass == (int)FP_INFINITE))
        return policies::raise_domain_error<T>(
           function,
           "Argument val must be finite, but got %1%", val, pol);
@@ -273,7 +422,25 @@ T float_advance(T val, int distance, const Policy& pol)
        return float_next(val, pol);
     if(distance == -1)
        return float_prior(val, pol);
-   BOOST_MATH_STD_USING
+
+   if(fabs(val) < detail::get_min_shift_value<T>())
+   {
+      //
+      // Special case: if the value of the least significant bit is a denorm,
+      // implement in terms of float_next/float_prior.
+      // This avoids issues with the Intel SSE2 registers when the FTZ or DAZ flags are set.
+      //
+      if(distance > 0)
+      {
+         do{ val = float_next(val, pol); } while(--distance);
+      }
+      else
+      {
+         do{ val = float_prior(val, pol); } while(++distance);
+      }
+      return val;
+   }
+
     int expon;
     frexp(val, &expon);
     T limit = ldexp((distance < 0 ? T(0.5f) : T(1)), expon);
@@ -286,7 +453,7 @@ T float_advance(T val, int distance, const Policy& pol)
     {
        distance -= itrunc(limit_distance);
        val = limit;
-      if(distance < 0) 
+      if(distance < 0)
        {
           limit /= 2;
           expon--;
@@ -297,6 +464,10 @@ T float_advance(T val, int distance, const Policy& pol)
           expon++;
        }
        limit_distance = float_distance(val, limit);
+      if(distance && (limit_distance == 0))
+      {
+         return policies::raise_evaluation_error<T>(function, "Internal logic failed while trying to increment floating point value %1%: most likely your FPU is in non-IEEE conforming mode.", val, pol);
+      }
     }
     if((0.5f == frexp(val, &expon)) && (distance < 0))
        --expon;
@@ -308,8 +479,17 @@ T float_advance(T val, int distance, const Policy& pol)
     return val += diff;
  }
  
+}
+
+template <class T, class Policy>
+inline typename tools::promote_args<T>::type float_advance(T val, int distance, const Policy& pol)
+{
+   typedef typename tools::promote_args<T>::type result_type;
+   return detail::float_advance_imp(static_cast<result_type>(val), distance, pol);
+}
+
  template <class T>
-inline T float_advance(const T& val, int distance)
+inline typename tools::promote_args<T>::type float_advance(const T& val, int distance)
  {
     return boost::math::float_advance(val, distance, policies::policy<>());
  }